From 6fc053ed85018fee7d5ae87ab4d5e0d67d858ef9 Mon Sep 17 00:00:00 2001 From: Christian Ebner Date: Fri, 10 Jan 2020 12:50:06 +0100 Subject: [PATCH] pxar: encoder: limit number of max entries held at once in memory during archive creation. Limit the total number of entries and therefore the approximate memory consumption instead of doing this on a per directory basis as it was previously. This makes more sense as it limits not only the width but also the depth of the directory tree. Further, instead of hardcoding this value, allow to pass this information as additional optional parameter 'entires-max'. By this, creation of the archive with directories containing a large number of entries is possible. Signed-off-by: Christian Ebner --- src/bin/proxmox-backup-client.rs | 19 ++++++++++++++++++- src/bin/pxar.rs | 11 +++++++++++ src/client/pxar_backup_stream.rs | 5 ++++- src/pxar/encoder.rs | 26 +++++++++++++++----------- src/pxar/format_definition.rs | 5 +++++ tests/catar.rs | 1 + 6 files changed, 54 insertions(+), 13 deletions(-) diff --git a/src/bin/proxmox-backup-client.rs b/src/bin/proxmox-backup-client.rs index 984eb30c..356813c9 100644 --- a/src/bin/proxmox-backup-client.rs +++ b/src/bin/proxmox-backup-client.rs @@ -231,9 +231,17 @@ async fn backup_directory>( skip_lost_and_found: bool, crypt_config: Option>, catalog: Arc>>, + entries_max: usize, ) -> Result { - let pxar_stream = PxarBackupStream::open(dir_path.as_ref(), device_set, verbose, skip_lost_and_found, catalog)?; + let pxar_stream = PxarBackupStream::open( + dir_path.as_ref(), + device_set, + verbose, + skip_lost_and_found, + catalog, + entries_max, + )?; let mut chunk_stream = ChunkStream::new(pxar_stream, chunk_size); let (mut tx, rx) = mpsc::channel(10); // allow to buffer 10 chunks @@ -776,6 +784,12 @@ fn spawn_catalog_upload( schema: CHUNK_SIZE_SCHEMA, optional: true, }, + "entries-max": { + type: Integer, + description: "Max number of entries to hold in memory.", + optional: true, + default: pxar::ENCODER_MAX_ENTRIES as isize, + }, } } )] @@ -812,6 +826,8 @@ async fn create_backup( let include_dev = param["include-dev"].as_array(); + let entries_max = param["entries-max"].as_u64().unwrap_or(pxar::ENCODER_MAX_ENTRIES as u64); + let mut devices = if all_file_systems { None } else { Some(HashSet::new()) }; if let Some(include_dev) = include_dev { @@ -960,6 +976,7 @@ async fn create_backup( skip_lost_and_found, crypt_config.clone(), catalog.clone(), + entries_max as usize, ).await?; manifest.add_file(target, stats.size, stats.csum)?; catalog.lock().unwrap().end_directory()?; diff --git a/src/bin/pxar.rs b/src/bin/pxar.rs index 87c42ccd..a05f94d4 100644 --- a/src/bin/pxar.rs +++ b/src/bin/pxar.rs @@ -178,6 +178,7 @@ fn create_archive( let no_sockets = param["no-sockets"].as_bool().unwrap_or(false); let empty = Vec::new(); let exclude_pattern = param["exclude"].as_array().unwrap_or(&empty); + let entries_max = param["entries-max"].as_u64().unwrap_or(pxar::ENCODER_MAX_ENTRIES as u64); let devices = if all_file_systems { None } else { Some(HashSet::new()) }; @@ -232,6 +233,7 @@ fn create_archive( false, feature_flags, pattern_list, + entries_max as usize, )?; writer.flush()?; @@ -342,6 +344,15 @@ const API_METHOD_CREATE_ARCHIVE: ApiMethod = ApiMethod::new( &StringSchema::new("Path or pattern matching files to restore.").schema() ).schema() ), + ( + "entries-max", + true, + &IntegerSchema::new("Max number of entries loaded at once into memory") + .default(pxar::ENCODER_MAX_ENTRIES as isize) + .minimum(0) + .maximum(std::isize::MAX) + .schema() + ), ]), ) ); diff --git a/src/client/pxar_backup_stream.rs b/src/client/pxar_backup_stream.rs index 8a1abd03..4ec3bc3d 100644 --- a/src/client/pxar_backup_stream.rs +++ b/src/client/pxar_backup_stream.rs @@ -48,6 +48,7 @@ impl PxarBackupStream { verbose: bool, skip_lost_and_found: bool, catalog: Arc>>, + entries_max: usize, ) -> Result { let (rx, tx) = nix::unistd::pipe()?; @@ -73,6 +74,7 @@ impl PxarBackupStream { skip_lost_and_found, pxar::flags::DEFAULT, exclude_pattern, + entries_max, ) { let mut error = error2.lock().unwrap(); *error = Some(err.to_string()); @@ -95,12 +97,13 @@ impl PxarBackupStream { verbose: bool, skip_lost_and_found: bool, catalog: Arc>>, + entries_max: usize, ) -> Result { let dir = nix::dir::Dir::open(dirname, OFlag::O_DIRECTORY, Mode::empty())?; let path = std::path::PathBuf::from(dirname); - Self::new(dir, path, device_set, verbose, skip_lost_and_found, catalog) + Self::new(dir, path, device_set, verbose, skip_lost_and_found, catalog, entries_max) } } diff --git a/src/pxar/encoder.rs b/src/pxar/encoder.rs index 05747e6d..f65805bd 100644 --- a/src/pxar/encoder.rs +++ b/src/pxar/encoder.rs @@ -29,11 +29,6 @@ use crate::tools::acl; use crate::tools::fs; use crate::tools::xattr; -/// The format requires to build sorted directory lookup tables in -/// memory, so we restrict the number of allowed entries to limit -/// maximum memory usage. -pub const MAX_DIRECTORY_ENTRIES: usize = 256 * 1024; - #[derive(Eq, PartialEq, Hash)] struct HardLinkInfo { st_dev: u64, @@ -55,6 +50,8 @@ pub struct Encoder<'a, W: Write, C: BackupCatalogWriter> { // Flags signaling features supported by the filesystem fs_feature_flags: u64, hardlinks: HashMap, + entry_counter: usize, + entry_max: usize, } impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { @@ -82,6 +79,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { skip_lost_and_found: bool, // fixme: should be a feature flag ?? feature_flags: u64, mut excludes: Vec, + entry_max: usize, ) -> Result<(), Error> { const FILE_COPY_BUFFER_SIZE: usize = 1024 * 1024; @@ -126,6 +124,8 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { feature_flags, fs_feature_flags, hardlinks: HashMap::new(), + entry_counter: 0, + entry_max, }; if verbose { @@ -762,14 +762,16 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { self.full_path().join(filename_osstr) ); } - (_, child_pattern) => name_list.push((filename, stat, child_pattern)), + (_, child_pattern) => { + self.entry_counter += 1; + name_list.push((filename, stat, child_pattern)); + } } - if name_list.len() > MAX_DIRECTORY_ENTRIES { + if self.entry_counter > self.entry_max { bail!( - "too many directory items in {:?} (> {})", - self.full_path(), - MAX_DIRECTORY_ENTRIES + "exceeded max number of entries (> {})", + self.entry_max ); } } @@ -778,8 +780,9 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { } name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + let num_entries = name_list.len(); - let mut goodbye_items = Vec::with_capacity(name_list.len()); + let mut goodbye_items = Vec::with_capacity(num_entries); for (filename, stat, exclude_list) in name_list { let start_pos = self.writer_pos; @@ -1049,6 +1052,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { let goodbye_offset = self.writer_pos - dir_start_pos; self.write_goodbye_table(goodbye_offset, &mut goodbye_items)?; + self.entry_counter -= num_entries; //println!("encode_dir: {:?} end1 {}", self.full_path(), self.writer_pos); Ok(()) diff --git a/src/pxar/format_definition.rs b/src/pxar/format_definition.rs index b80fc0c0..75c2edcf 100644 --- a/src/pxar/format_definition.rs +++ b/src/pxar/format_definition.rs @@ -256,3 +256,8 @@ pub fn check_ca_header(head: &PxarHeader, htype: u64) -> Result<(), Error> { Ok(()) } + +/// The format requires to build sorted directory lookup tables in +/// memory, so we restrict the number of allowed entries to limit +/// maximum memory usage. +pub const ENCODER_MAX_ENTRIES: usize = 1024 * 1024; diff --git a/tests/catar.rs b/tests/catar.rs index d45b63b2..ed3e6702 100644 --- a/tests/catar.rs +++ b/tests/catar.rs @@ -37,6 +37,7 @@ fn run_test(dir_name: &str) -> Result<(), Error> { false, flags::DEFAULT, Vec::new(), + ENCODER_MAX_ENTRIES, )?; Command::new("cmp")