diff --git a/src/bin/proxmox_apt_mirror_cmds/config.rs b/src/bin/proxmox_apt_mirror_cmds/config.rs index 085997b..3901725 100644 --- a/src/bin/proxmox_apt_mirror_cmds/config.rs +++ b/src/bin/proxmox_apt_mirror_cmds/config.rs @@ -322,7 +322,7 @@ async fn list_media(config: Option, param: Value) -> Result, id: String, param: Value) -> Result { let config = config.unwrap_or_else(|| DEFAULT_CONFIG_PATH.to_string()); diff --git a/src/config.rs b/src/config.rs index 64e65b9..3d8387c 100644 --- a/src/config.rs +++ b/src/config.rs @@ -67,19 +67,15 @@ pub struct MirrorConfig { }, mountpoint: { type: String, - description: "Path where sync target is mounted." }, verify: { type: bool, - description: "Whether to verify existing files stored in pool (IO-intensive).", }, sync: { type: bool, - description: "Whether to write pool updates with fsync flag.", }, mirrors: { type: Array, - description: "List of mirror IDs this sync target should contain.", items: { schema: MIRROR_ID_SCHEMA, }, @@ -88,13 +84,18 @@ pub struct MirrorConfig { )] #[derive(Debug, Serialize, Deserialize, Updater)] #[serde(rename_all = "kebab-case")] -/// Configuration file for mirrored repositories. +/// Configuration entry for an external medium. pub struct MediaConfig { #[updater(skip)] + /// Identifier for this entry. pub id: String, + /// Mountpoint where medium is available on mirroring system. pub mountpoint: String, + /// List of [MirrorConfig] IDs which should be synced to medium. pub mirrors: Vec, + /// Whether to verify existing files or assume they are valid (IO-intensive). pub verify: bool, + /// Whether to write new files using FSYNC. pub sync: bool, } @@ -127,9 +128,12 @@ fn init() -> SectionConfig { config } +/// Lock guard for guarding modifications of config file. +/// +/// Obtained via [lock_config], should only be dropped once config file should no longer be locked. pub struct ConfigLockGuard(std::fs::File); -/// Get exclusive lock +/// Get exclusive lock for config file (in order to make or protect against modifications). pub fn lock_config(path: &str) -> Result { let path = Path::new(path); @@ -148,6 +152,7 @@ pub fn lock_config(path: &str) -> Result { Ok(ConfigLockGuard(file)) } +/// Read config pub fn config(path: &str) -> Result<(SectionConfigData, [u8; 32]), Error> { let content = proxmox_sys::fs::file_read_optional_string(path)?.unwrap_or_else(|| "".to_string()); @@ -157,6 +162,7 @@ pub fn config(path: &str) -> Result<(SectionConfigData, [u8; 32]), Error> { Ok((data, digest)) } +/// Write config (and verify data matches schema!) pub fn save_config(path: &str, data: &SectionConfigData) -> Result<(), Error> { let raw = CONFIG.write(path, data)?; replace_file(path, raw.as_bytes(), CreateOptions::default(), true) diff --git a/src/helpers/tty.rs b/src/helpers/tty.rs index 1e10495..7ef5469 100644 --- a/src/helpers/tty.rs +++ b/src/helpers/tty.rs @@ -3,6 +3,9 @@ use std::io::Write; use anyhow::{bail, format_err, Error}; use proxmox_schema::parse_boolean; +/// Prints `query`, reads string from terminal, defaulting to `default`. +/// +/// Will retry if no default is given and user doesn't input any data. pub fn read_string_from_tty(query: &str, default: Option<&str>) -> Result { use std::io::{BufRead, BufReader}; @@ -29,6 +32,9 @@ pub fn read_string_from_tty(query: &str, default: Option<&str>) -> Result) -> Result { let default = default.map(|v| if v { "yes" } else { "no" }); @@ -46,6 +52,9 @@ pub fn read_bool_from_tty(query: &str, default: Option) -> Result( query: &str, choices: &'a [(V, &str)], diff --git a/src/helpers/verifier.rs b/src/helpers/verifier.rs index 4e556c3..57bfd1b 100644 --- a/src/helpers/verifier.rs +++ b/src/helpers/verifier.rs @@ -60,6 +60,8 @@ impl<'a> VerificationHelper for Helper<'a> { } } } + +/// Verifies GPG-signed `msg` was signed by `key`, returning the verified data without signature. pub(crate) fn verify_signature<'msg>( msg: &'msg [u8], key: &[u8], diff --git a/src/lib.rs b/src/lib.rs index 294ce00..e25a8a7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,13 @@ +//! Proxmox mirroring tool for APT repositories. +//! +//! This library provides the underlying functionality of the `proxmox-apt-mirror` and +//! `proxmox-apt-repo` binaries. +//! +//! It implements the following features: +//! - local storage in a hardlink-based pool +//! - intelligent fetching only those files of a repository that have changed since the last mirroring operation +//! - syncing to external media + use std::{ fmt::Display, ops::{Add, AddAssign}, @@ -9,15 +19,24 @@ use medium::MirrorInfo; use proxmox_apt::repositories::{APTRepository, APTRepositoryFile, APTRepositoryFileType}; use types::Snapshot; +/// Main configuration file containing definitions of mirrors and external media. pub mod config; +/// Helpers pub mod helpers; +/// Operations concerning a medium. pub mod medium; +/// Operations concerning a mirror. pub mod mirror; -pub mod pool; +/// Hardlink pool. +pub(crate) mod pool; +/// Various common types pub mod types; +/// Combination of data and whether it needed to be fetched or was re-used. struct FetchResult { + /// Fetched/read data data: Vec, + /// Number of bytes fetched (0 if re-using pool data) fetched: usize, } @@ -32,6 +51,7 @@ impl FetchResult { } #[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord)] +/// To keep track of progress and how much data was newly fetched vs. re-used and just linked struct Progress { new: usize, new_bytes: usize, @@ -92,12 +112,14 @@ impl Display for Progress { } } +/// Try to parse a line in sources.list format into an `APTRepository`. pub(crate) fn convert_repo_line(line: String) -> Result { let mut repository = APTRepositoryFile::with_content(line, APTRepositoryFileType::List); repository.parse()?; Ok(repository.repositories[0].clone()) } +/// Generate a file-based repository line in sources.list format pub fn generate_repo_file_line( medium_base: &Path, mirror_id: &str, diff --git a/src/medium.rs b/src/medium.rs index 5fec1ea..955452f 100644 --- a/src/medium.rs +++ b/src/medium.rs @@ -18,8 +18,13 @@ use crate::{ }; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] +/// Information about a mirror on the medium. +/// +/// Used to generate repository lines for accessing the synced mirror. pub struct MirrorInfo { + /// Original repository line pub repository: String, + /// Mirrored architectures pub architectures: Vec, } @@ -43,17 +48,29 @@ impl From for MirrorInfo { #[derive(Debug, Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] +/// State of mirrors on the medium pub struct MediumState { + /// Map of mirror ID to `MirrorInfo`. pub mirrors: HashMap, + /// Timestamp of last sync operation. pub last_sync: i64, } +/// Information about the mirrors on a medium. +/// +/// Derived from `MediaConfig` (supposed state) and `MediumState` (actual state) pub struct MediumMirrorState { + /// Mirrors which are configured and synced pub synced: HashSet, + /// Mirrors which are configured + pub config: HashSet, + /// Mirrors which are configured but not synced yet pub source_only: HashSet, + /// Mirrors which are not configured but exist on medium pub target_only: HashSet, } +// helper to derive `MediumMirrorState` fn get_mirror_state(config: &MediaConfig, state: &MediumState) -> MediumMirrorState { let synced_mirrors: HashSet = state .mirrors @@ -72,11 +89,13 @@ fn get_mirror_state(config: &MediaConfig, state: &MediumState) -> MediumMirrorSt MediumMirrorState { synced: synced_mirrors, + config: config_mirrors, source_only: new_mirrors, target_only: dropped_mirrors, } } +// Helper to lock medium fn lock(base: &Path) -> Result { let mut lockfile = base.to_path_buf(); lockfile.push("mirror-state"); @@ -86,12 +105,14 @@ fn lock(base: &Path) -> Result { config::lock_config(lockfile) } +// Helper to get statefile path fn statefile(base: &Path) -> PathBuf { let mut statefile = base.to_path_buf(); statefile.push(".mirror-state"); statefile } +// Helper to load statefile fn load_state(base: &Path) -> Result, Error> { let statefile = statefile(base); @@ -104,6 +125,7 @@ fn load_state(base: &Path) -> Result, Error> { } } +// Helper to write statefile fn write_state(_lock: &ConfigLockGuard, base: &Path, state: &MediumState) -> Result<(), Error> { replace_file( &statefile(base), @@ -115,6 +137,7 @@ fn write_state(_lock: &ConfigLockGuard, base: &Path, state: &MediumState) -> Res Ok(()) } +/// List snapshots of a given mirror on a given medium. pub fn list_snapshots(medium_base: &Path, mirror: &str) -> Result, Error> { if !medium_base.exists() { bail!("Medium mountpoint doesn't exist."); @@ -144,6 +167,7 @@ pub fn list_snapshots(medium_base: &Path, mirror: &str) -> Result, Ok(list) } +/// Generate a repository snippet for a selection of mirrors on a medium. pub fn generate_repo_snippet( medium_base: &Path, repositories: &HashMap, @@ -160,6 +184,7 @@ pub fn generate_repo_snippet( Ok(res) } +/// Run garbage collection on all mirrors on a medium. pub fn gc(medium: &crate::config::MediaConfig) -> Result<(), Error> { let medium_base = Path::new(&medium.mountpoint); if !medium_base.exists() { @@ -205,6 +230,7 @@ pub fn gc(medium: &crate::config::MediaConfig) -> Result<(), Error> { Ok(()) } +/// Get `MediumState` and `MediumMirrorState` for a given medium. pub fn status( medium: &crate::config::MediaConfig, ) -> Result<(MediumState, MediumMirrorState), Error> { @@ -220,6 +246,7 @@ pub fn status( Ok((state, mirror_state)) } +/// Sync medium's content according to config. pub fn sync(medium: &crate::config::MediaConfig, mirrors: Vec) -> Result<(), Error> { println!( "Syncing {} mirrors {:?} to medium '{}' ({:?})", @@ -229,6 +256,10 @@ pub fn sync(medium: &crate::config::MediaConfig, mirrors: Vec) -> &medium.mountpoint ); + if mirrors.len() != medium.mirrors.len() { + bail!("Number of mirrors in config and sync request don't match."); + } + let medium_base = Path::new(&medium.mountpoint); if !medium_base.exists() { bail!("Medium mountpoint doesn't exist."); @@ -260,6 +291,15 @@ pub fn sync(medium: &crate::config::MediaConfig, mirrors: Vec) -> let mirror_state = get_mirror_state(medium, &state); println!("Previously synced mirrors: {:?}", &mirror_state.synced); + let requested: HashSet = mirrors.iter().map(|mirror| mirror.id.clone()).collect(); + if requested != mirror_state.config { + bail!( + "Config and sync request don't use the same mirror list: {:?} / {:?}", + mirror_state.config, + requested + ); + } + if !mirror_state.source_only.is_empty() { println!( "Adding {} new mirror(s) to target medium: {:?}", diff --git a/src/mirror.rs b/src/mirror.rs index 4a697da..43e7308 100644 --- a/src/mirror.rs +++ b/src/mirror.rs @@ -31,6 +31,7 @@ pub(crate) fn pool(config: &MirrorConfig) -> Result { Pool::open(Path::new(&config.dir), Path::new(&pool_dir)) } +/// `MirrorConfig`, but some fields converted/parsed into usable types. struct ParsedMirrorConfig { pub repository: APTRepository, pub architectures: Vec, @@ -61,12 +62,14 @@ impl TryInto for MirrorConfig { } } +// Helper to get absolute URL for dist-specific relative `path`. fn get_dist_url(repo: &APTRepository, path: &str) -> String { let dist_root = format!("{}/dists/{}", repo.uris[0], repo.suites[0]); format!("{}/{}", dist_root, path) } +// Helper to get dist-specific path given a `prefix` (snapshot dir) and relative `path`. fn get_dist_path(repo: &APTRepository, prefix: &Path, path: &str) -> PathBuf { let mut base = PathBuf::from(prefix); base.push("dists"); @@ -75,10 +78,14 @@ fn get_dist_path(repo: &APTRepository, prefix: &Path, path: &str) -> PathBuf { base } +// Helper to get generic URL given a `repo` and `path`. fn get_repo_url(repo: &APTRepository, path: &str) -> String { format!("{}/{}", repo.uris[0], path) } +/// Helper to fetch file from URI and optionally verify the responses checksum. +/// +/// Only fetches and returns data, doesn't store anything anywhere. fn fetch_repo_file( uri: &str, max_size: Option, @@ -103,6 +110,9 @@ fn fetch_repo_file( }) } +/// Helper to fetch InRelease (`detached` == false) or Release/Release.gpg (`detached` == true) files from repository. +/// +/// Verifies the contained/detached signature, stores all fetched files under `prefix`, and returns the verified raw release file data. fn fetch_release( config: &ParsedMirrorConfig, prefix: &Path, @@ -176,6 +186,12 @@ fn fetch_release( }) } +/// Helper to fetch an index file referenced by a `ReleaseFile`. +/// +/// Since these usually come in compressed and uncompressed form, with the latter often not actually existing in the source repository as file, this fetches and if necessary decompresses to obtain a copy of the uncompressed data. +/// Will skip fetching if both references are already available with the expected checksum in the pool, in which case they will just be re-linked under the new path. +/// +/// Returns the uncompressed data. fn fetch_index_file( config: &ParsedMirrorConfig, prefix: &Path, @@ -238,6 +254,11 @@ fn fetch_index_file( }) } +/// Helper to fetch arbitrary files like binary packages. +/// +/// Will skip fetching if matching file already exists locally, in which case it will just be re-linked under the new path. +/// +/// If need_data is false and the mirror config is set to skip verification, reading the file's content will be skipped as well if fetching was skipped. fn fetch_plain_file( config: &ParsedMirrorConfig, url: &str, @@ -271,12 +292,14 @@ fn fetch_plain_file( Ok(res) } +/// Initialize a new mirror (by creating the corresponding pool). pub fn init(config: &MirrorConfig) -> Result<(), Error> { let pool_dir = format!("{}/.pool", config.dir); Pool::create(Path::new(&config.dir), Path::new(&pool_dir))?; Ok(()) } +/// Destroy a mirror (by destroying the corresponding pool). pub fn destroy(config: &MirrorConfig) -> Result<(), Error> { let pool: Pool = pool(config)?; pool.lock()?.destroy()?; @@ -284,6 +307,7 @@ pub fn destroy(config: &MirrorConfig) -> Result<(), Error> { Ok(()) } +/// List snapshots pub fn list_snapshots(config: &MirrorConfig) -> Result, Error> { let _pool: Pool = pool(config)?; @@ -309,6 +333,14 @@ pub fn list_snapshots(config: &MirrorConfig) -> Result, Error> { Ok(list) } +/// Create a new snapshot of the remote repository, fetching and storing files as needed. +/// +/// Operates in three phases: +/// - Fetch and verify release files +/// - Fetch referenced indices according to config +/// - Fetch binary packages referenced by package indices +/// +/// Files will be linked in a temporary directory and only renamed to the final, valid snapshot directory at the end. In case of error, leftover `XXX.tmp` directories at the top level of `base_dir` can be safely removed once the next snapshot was successfully created, as they only contain hardlinks. pub fn create_snapshot(config: MirrorConfig, snapshot: &Snapshot) -> Result<(), Error> { let config: ParsedMirrorConfig = config.try_into()?; @@ -499,6 +531,7 @@ pub fn create_snapshot(config: MirrorConfig, snapshot: &Snapshot) -> Result<(), Ok(()) } +/// Remove a snapshot by removing the corresponding snapshot directory. To actually free up space, a garbage collection needs to be run afterwards. pub fn remove_snapshot(config: &MirrorConfig, snapshot: &Snapshot) -> Result<(), Error> { let pool: Pool = pool(config)?; let path = pool.get_path(Path::new(&snapshot.to_string()))?; @@ -506,6 +539,7 @@ pub fn remove_snapshot(config: &MirrorConfig, snapshot: &Snapshot) -> Result<(), pool.lock()?.remove_dir(&path) } +/// Run a garbage collection on the underlying pool. pub fn gc(config: &MirrorConfig) -> Result<(usize, u64), Error> { let pool: Pool = pool(config)?; diff --git a/src/pool.rs b/src/pool.rs index 5ce76a4..cb61cbc 100644 --- a/src/pool.rs +++ b/src/pool.rs @@ -15,17 +15,24 @@ use proxmox_sys::fs::{create_path, file_get_contents, replace_file, CreateOption use walkdir::WalkDir; #[derive(Debug)] +/// Pool consisting of two (possibly overlapping) directory trees: +/// - pool_dir contains checksum files added by `add_file` +/// - base_dir contains directories and hardlinks to checksum files created by `link_file` +/// +/// Files are considered orphaned and eligible for GC if they either only exist in pool_dir or only exist in base_dir pub(crate) struct Pool { pool_dir: PathBuf, base_dir: PathBuf, } +/// Lock guard used to guard against concurrent modification pub(crate) struct PoolLockGuard<'lock> { pool: &'lock Pool, _lock: Option, } impl Pool { + /// Create a new pool by creating `pool_dir` and `base_dir`. They must not exist before calling this function. pub(crate) fn create(base: &Path, pool: &Path) -> Result { if base.exists() { bail!("Pool base dir already exists."); @@ -43,6 +50,8 @@ impl Pool { base_dir: base.to_path_buf(), }) } + + /// Open an existing pool. `pool_dir` and `base_dir` must exist. pub(crate) fn open(base: &Path, pool: &Path) -> Result { if !base.exists() { bail!("Pool base dir doesn't exist.") @@ -58,6 +67,7 @@ impl Pool { }) } + /// Lock a pool to add/remove files or links, or protect against concurrent modifications. pub(crate) fn lock(&self) -> Result { let timeout = std::time::Duration::new(10, 0); let lock = Some(proxmox_sys::fs::open_file_locked( @@ -72,6 +82,8 @@ impl Pool { _lock: lock, }) } + + /// Returns whether the pool contain a file for the given checksum. pub(crate) fn contains(&self, checksums: &CheckSums) -> bool { match self.get_checksum_paths(checksums) { Ok(paths) => paths.iter().any(|path| path.exists()), @@ -79,6 +91,7 @@ impl Pool { } } + /// Returns the file contents for a given checksum, optionally `verify`ing whether the on-disk data matches the checksum. pub(crate) fn get_contents( &self, checksums: &CheckSums, @@ -97,6 +110,7 @@ impl Pool { Ok(data) } + // Helper to return all possible checksum file paths for a given checksum. Checksums considered insecure will be ignored. fn get_checksum_paths(&self, checksums: &CheckSums) -> Result, Error> { if !checksums.is_secure() { bail!("pool cannot operate on files lacking secure checksum!"); @@ -152,6 +166,7 @@ impl Pool { } impl PoolLockGuard<'_> { + // Helper to scan the pool for all checksum files and the total link count. The resulting HashMap can be used to check whether files in `base_dir` are properly registered in the pool or orphaned. fn get_inode_csum_map(&self) -> Result<(HashMap, u64), Error> { let mut inode_map: HashMap = HashMap::new(); let mut link_count = 0; @@ -214,6 +229,13 @@ impl PoolLockGuard<'_> { Ok((inode_map, link_count)) } + /// Syncs the pool into a target pool, optionally verifying file contents along the way. + /// + /// This proceeds in four phases: + /// - iterate over source pool checksum files, add missing ones to target pool + /// - iterate over source pool links, add missing ones to target pool + /// - iterate over target pool links, remove those which are not present in source pool + /// - if links were removed in phase 3, run GC on target pool pub(crate) fn sync_pool(&self, target: &Pool, verify: bool) -> Result<(), Error> { let target = target.lock()?; @@ -325,6 +347,9 @@ impl PoolLockGuard<'_> { Ok(()) } + /// Adds a new checksum file. + /// + /// If `checksums` contains multiple trusted checksums, they will be linked to the first checksum file. pub(crate) fn add_file( &self, data: &[u8], @@ -349,6 +374,7 @@ impl PoolLockGuard<'_> { Ok(()) } + /// Links previously added file into `path` (relative to `base_dir`). Missing parent directories will be created automatically. pub(crate) fn link_file(&self, checksums: &CheckSums, path: &Path) -> Result { let path = self.pool.get_path(path)?; if !self.pool.path_in_base(&path) { @@ -373,6 +399,7 @@ impl PoolLockGuard<'_> { link_file_do(source, &path) } + /// Unlink a previously linked file at `path` (absolute, must be below `base_dir`). Optionally remove any parent directories that became empty. pub(crate) fn unlink_file( &self, mut path: &Path, @@ -401,6 +428,7 @@ impl PoolLockGuard<'_> { Ok(()) } + /// Remove a directory tree at `path` (absolute, must be below `base_dir`) pub(crate) fn remove_dir(&self, path: &Path) -> Result<(), Error> { if !self.pool.path_in_base(path) { bail!("Cannot unlink file outside of pool."); @@ -410,6 +438,9 @@ impl PoolLockGuard<'_> { .map_err(|err| format_err!("Failed to remove {path:?} - {err}")) } + /// Run a garbage collection, removing + /// - any checksum files that have no links outside of `pool_dir` + /// - any files in `base_dir` that have no corresponding checksum files pub(crate) fn gc(&self) -> Result<(usize, u64), Error> { let (inode_map, _link_count) = self.get_inode_csum_map()?; @@ -475,6 +506,7 @@ impl PoolLockGuard<'_> { Ok((count, size)) } + /// Destroy pool by removing `base_dir` and `pool_dir`. pub(crate) fn destroy(self) -> Result<(), Error> { // TODO - this removes the lock file.. std::fs::remove_dir_all(self.pool_dir.clone())?; @@ -482,6 +514,7 @@ impl PoolLockGuard<'_> { Ok(()) } + /// Rename a link or directory from `from` to `to` (both relative to `base_dir`). pub(crate) fn rename(&self, from: &Path, to: &Path) -> Result<(), Error> { let mut abs_from = self.base_dir.clone(); abs_from.push(from); diff --git a/src/types.rs b/src/types.rs index 30da582..5e569a7 100644 --- a/src/types.rs +++ b/src/types.rs @@ -17,6 +17,8 @@ const_regex! { } pub const PROXMOX_SAFE_ID_FORMAT: ApiStringFormat = ApiStringFormat::Pattern(&PROXMOX_SAFE_ID_REGEX); + +/// Schema for config IDs pub const MIRROR_ID_SCHEMA: Schema = StringSchema::new("Mirror name.") .format(&PROXMOX_SAFE_ID_FORMAT) .min_length(3)