diff --git a/src/bin/proxmox-backup-client.rs b/src/bin/proxmox-backup-client.rs index 984eb30c..356813c9 100644 --- a/src/bin/proxmox-backup-client.rs +++ b/src/bin/proxmox-backup-client.rs @@ -231,9 +231,17 @@ async fn backup_directory>( skip_lost_and_found: bool, crypt_config: Option>, catalog: Arc>>, + entries_max: usize, ) -> Result { - let pxar_stream = PxarBackupStream::open(dir_path.as_ref(), device_set, verbose, skip_lost_and_found, catalog)?; + let pxar_stream = PxarBackupStream::open( + dir_path.as_ref(), + device_set, + verbose, + skip_lost_and_found, + catalog, + entries_max, + )?; let mut chunk_stream = ChunkStream::new(pxar_stream, chunk_size); let (mut tx, rx) = mpsc::channel(10); // allow to buffer 10 chunks @@ -776,6 +784,12 @@ fn spawn_catalog_upload( schema: CHUNK_SIZE_SCHEMA, optional: true, }, + "entries-max": { + type: Integer, + description: "Max number of entries to hold in memory.", + optional: true, + default: pxar::ENCODER_MAX_ENTRIES as isize, + }, } } )] @@ -812,6 +826,8 @@ async fn create_backup( let include_dev = param["include-dev"].as_array(); + let entries_max = param["entries-max"].as_u64().unwrap_or(pxar::ENCODER_MAX_ENTRIES as u64); + let mut devices = if all_file_systems { None } else { Some(HashSet::new()) }; if let Some(include_dev) = include_dev { @@ -960,6 +976,7 @@ async fn create_backup( skip_lost_and_found, crypt_config.clone(), catalog.clone(), + entries_max as usize, ).await?; manifest.add_file(target, stats.size, stats.csum)?; catalog.lock().unwrap().end_directory()?; diff --git a/src/bin/pxar.rs b/src/bin/pxar.rs index 87c42ccd..a05f94d4 100644 --- a/src/bin/pxar.rs +++ b/src/bin/pxar.rs @@ -178,6 +178,7 @@ fn create_archive( let no_sockets = param["no-sockets"].as_bool().unwrap_or(false); let empty = Vec::new(); let exclude_pattern = param["exclude"].as_array().unwrap_or(&empty); + let entries_max = param["entries-max"].as_u64().unwrap_or(pxar::ENCODER_MAX_ENTRIES as u64); let devices = if all_file_systems { None } else { Some(HashSet::new()) }; @@ -232,6 +233,7 @@ fn create_archive( false, feature_flags, pattern_list, + entries_max as usize, )?; writer.flush()?; @@ -342,6 +344,15 @@ const API_METHOD_CREATE_ARCHIVE: ApiMethod = ApiMethod::new( &StringSchema::new("Path or pattern matching files to restore.").schema() ).schema() ), + ( + "entries-max", + true, + &IntegerSchema::new("Max number of entries loaded at once into memory") + .default(pxar::ENCODER_MAX_ENTRIES as isize) + .minimum(0) + .maximum(std::isize::MAX) + .schema() + ), ]), ) ); diff --git a/src/client/pxar_backup_stream.rs b/src/client/pxar_backup_stream.rs index 8a1abd03..4ec3bc3d 100644 --- a/src/client/pxar_backup_stream.rs +++ b/src/client/pxar_backup_stream.rs @@ -48,6 +48,7 @@ impl PxarBackupStream { verbose: bool, skip_lost_and_found: bool, catalog: Arc>>, + entries_max: usize, ) -> Result { let (rx, tx) = nix::unistd::pipe()?; @@ -73,6 +74,7 @@ impl PxarBackupStream { skip_lost_and_found, pxar::flags::DEFAULT, exclude_pattern, + entries_max, ) { let mut error = error2.lock().unwrap(); *error = Some(err.to_string()); @@ -95,12 +97,13 @@ impl PxarBackupStream { verbose: bool, skip_lost_and_found: bool, catalog: Arc>>, + entries_max: usize, ) -> Result { let dir = nix::dir::Dir::open(dirname, OFlag::O_DIRECTORY, Mode::empty())?; let path = std::path::PathBuf::from(dirname); - Self::new(dir, path, device_set, verbose, skip_lost_and_found, catalog) + Self::new(dir, path, device_set, verbose, skip_lost_and_found, catalog, entries_max) } } diff --git a/src/pxar/encoder.rs b/src/pxar/encoder.rs index 05747e6d..f65805bd 100644 --- a/src/pxar/encoder.rs +++ b/src/pxar/encoder.rs @@ -29,11 +29,6 @@ use crate::tools::acl; use crate::tools::fs; use crate::tools::xattr; -/// The format requires to build sorted directory lookup tables in -/// memory, so we restrict the number of allowed entries to limit -/// maximum memory usage. -pub const MAX_DIRECTORY_ENTRIES: usize = 256 * 1024; - #[derive(Eq, PartialEq, Hash)] struct HardLinkInfo { st_dev: u64, @@ -55,6 +50,8 @@ pub struct Encoder<'a, W: Write, C: BackupCatalogWriter> { // Flags signaling features supported by the filesystem fs_feature_flags: u64, hardlinks: HashMap, + entry_counter: usize, + entry_max: usize, } impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { @@ -82,6 +79,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { skip_lost_and_found: bool, // fixme: should be a feature flag ?? feature_flags: u64, mut excludes: Vec, + entry_max: usize, ) -> Result<(), Error> { const FILE_COPY_BUFFER_SIZE: usize = 1024 * 1024; @@ -126,6 +124,8 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { feature_flags, fs_feature_flags, hardlinks: HashMap::new(), + entry_counter: 0, + entry_max, }; if verbose { @@ -762,14 +762,16 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { self.full_path().join(filename_osstr) ); } - (_, child_pattern) => name_list.push((filename, stat, child_pattern)), + (_, child_pattern) => { + self.entry_counter += 1; + name_list.push((filename, stat, child_pattern)); + } } - if name_list.len() > MAX_DIRECTORY_ENTRIES { + if self.entry_counter > self.entry_max { bail!( - "too many directory items in {:?} (> {})", - self.full_path(), - MAX_DIRECTORY_ENTRIES + "exceeded max number of entries (> {})", + self.entry_max ); } } @@ -778,8 +780,9 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { } name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + let num_entries = name_list.len(); - let mut goodbye_items = Vec::with_capacity(name_list.len()); + let mut goodbye_items = Vec::with_capacity(num_entries); for (filename, stat, exclude_list) in name_list { let start_pos = self.writer_pos; @@ -1049,6 +1052,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { let goodbye_offset = self.writer_pos - dir_start_pos; self.write_goodbye_table(goodbye_offset, &mut goodbye_items)?; + self.entry_counter -= num_entries; //println!("encode_dir: {:?} end1 {}", self.full_path(), self.writer_pos); Ok(()) diff --git a/src/pxar/format_definition.rs b/src/pxar/format_definition.rs index b80fc0c0..75c2edcf 100644 --- a/src/pxar/format_definition.rs +++ b/src/pxar/format_definition.rs @@ -256,3 +256,8 @@ pub fn check_ca_header(head: &PxarHeader, htype: u64) -> Result<(), Error> { Ok(()) } + +/// The format requires to build sorted directory lookup tables in +/// memory, so we restrict the number of allowed entries to limit +/// maximum memory usage. +pub const ENCODER_MAX_ENTRIES: usize = 1024 * 1024; diff --git a/tests/catar.rs b/tests/catar.rs index d45b63b2..ed3e6702 100644 --- a/tests/catar.rs +++ b/tests/catar.rs @@ -37,6 +37,7 @@ fn run_test(dir_name: &str) -> Result<(), Error> { false, flags::DEFAULT, Vec::new(), + ENCODER_MAX_ENTRIES, )?; Command::new("cmp")