mirror of
				https://git.proxmox.com/git/proxmox-backup
				synced 2025-10-25 07:28:33 +00:00 
			
		
		
		
	pxar: encoder: limit number of max entries held at once in memory during archive creation.
Limit the total number of entries and therefore the approximate memory consumption instead of doing this on a per directory basis as it was previously. This makes more sense as it limits not only the width but also the depth of the directory tree. Further, instead of hardcoding this value, allow to pass this information as additional optional parameter 'entires-max'. By this, creation of the archive with directories containing a large number of entries is possible. Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
This commit is contained in:
		
							parent
							
								
									141304d64e
								
							
						
					
					
						commit
						6fc053ed85
					
				| @ -231,9 +231,17 @@ async fn backup_directory<P: AsRef<Path>>( | |||||||
|     skip_lost_and_found: bool, |     skip_lost_and_found: bool, | ||||||
|     crypt_config: Option<Arc<CryptConfig>>, |     crypt_config: Option<Arc<CryptConfig>>, | ||||||
|     catalog: Arc<Mutex<CatalogWriter<SenderWriter>>>, |     catalog: Arc<Mutex<CatalogWriter<SenderWriter>>>, | ||||||
|  |     entries_max: usize, | ||||||
| ) -> Result<BackupStats, Error> { | ) -> Result<BackupStats, Error> { | ||||||
| 
 | 
 | ||||||
|     let pxar_stream = PxarBackupStream::open(dir_path.as_ref(), device_set, verbose, skip_lost_and_found, catalog)?; |     let pxar_stream = PxarBackupStream::open( | ||||||
|  |         dir_path.as_ref(), | ||||||
|  |         device_set, | ||||||
|  |         verbose, | ||||||
|  |         skip_lost_and_found, | ||||||
|  |         catalog, | ||||||
|  |         entries_max, | ||||||
|  |     )?; | ||||||
|     let mut chunk_stream = ChunkStream::new(pxar_stream, chunk_size); |     let mut chunk_stream = ChunkStream::new(pxar_stream, chunk_size); | ||||||
| 
 | 
 | ||||||
|     let (mut tx, rx) = mpsc::channel(10); // allow to buffer 10 chunks
 |     let (mut tx, rx) = mpsc::channel(10); // allow to buffer 10 chunks
 | ||||||
| @ -776,6 +784,12 @@ fn spawn_catalog_upload( | |||||||
|                schema: CHUNK_SIZE_SCHEMA, |                schema: CHUNK_SIZE_SCHEMA, | ||||||
|                optional: true, |                optional: true, | ||||||
|            }, |            }, | ||||||
|  |            "entries-max": { | ||||||
|  |                type: Integer, | ||||||
|  |                description: "Max number of entries to hold in memory.", | ||||||
|  |                optional: true, | ||||||
|  |                default: pxar::ENCODER_MAX_ENTRIES as isize, | ||||||
|  |            }, | ||||||
|        } |        } | ||||||
|    } |    } | ||||||
| )] | )] | ||||||
| @ -812,6 +826,8 @@ async fn create_backup( | |||||||
| 
 | 
 | ||||||
|     let include_dev = param["include-dev"].as_array(); |     let include_dev = param["include-dev"].as_array(); | ||||||
| 
 | 
 | ||||||
|  |     let entries_max = param["entries-max"].as_u64().unwrap_or(pxar::ENCODER_MAX_ENTRIES as u64); | ||||||
|  | 
 | ||||||
|     let mut devices = if all_file_systems { None } else { Some(HashSet::new()) }; |     let mut devices = if all_file_systems { None } else { Some(HashSet::new()) }; | ||||||
| 
 | 
 | ||||||
|     if let Some(include_dev) = include_dev { |     if let Some(include_dev) = include_dev { | ||||||
| @ -960,6 +976,7 @@ async fn create_backup( | |||||||
|                     skip_lost_and_found, |                     skip_lost_and_found, | ||||||
|                     crypt_config.clone(), |                     crypt_config.clone(), | ||||||
|                     catalog.clone(), |                     catalog.clone(), | ||||||
|  |                     entries_max as usize, | ||||||
|                 ).await?; |                 ).await?; | ||||||
|                 manifest.add_file(target, stats.size, stats.csum)?; |                 manifest.add_file(target, stats.size, stats.csum)?; | ||||||
|                 catalog.lock().unwrap().end_directory()?; |                 catalog.lock().unwrap().end_directory()?; | ||||||
|  | |||||||
| @ -178,6 +178,7 @@ fn create_archive( | |||||||
|     let no_sockets = param["no-sockets"].as_bool().unwrap_or(false); |     let no_sockets = param["no-sockets"].as_bool().unwrap_or(false); | ||||||
|     let empty = Vec::new(); |     let empty = Vec::new(); | ||||||
|     let exclude_pattern = param["exclude"].as_array().unwrap_or(&empty); |     let exclude_pattern = param["exclude"].as_array().unwrap_or(&empty); | ||||||
|  |     let entries_max = param["entries-max"].as_u64().unwrap_or(pxar::ENCODER_MAX_ENTRIES as u64); | ||||||
| 
 | 
 | ||||||
|     let devices = if all_file_systems { None } else { Some(HashSet::new()) }; |     let devices = if all_file_systems { None } else { Some(HashSet::new()) }; | ||||||
| 
 | 
 | ||||||
| @ -232,6 +233,7 @@ fn create_archive( | |||||||
|         false, |         false, | ||||||
|         feature_flags, |         feature_flags, | ||||||
|         pattern_list, |         pattern_list, | ||||||
|  |         entries_max as usize, | ||||||
|     )?; |     )?; | ||||||
| 
 | 
 | ||||||
|     writer.flush()?; |     writer.flush()?; | ||||||
| @ -342,6 +344,15 @@ const API_METHOD_CREATE_ARCHIVE: ApiMethod = ApiMethod::new( | |||||||
|                     &StringSchema::new("Path or pattern matching files to restore.").schema() |                     &StringSchema::new("Path or pattern matching files to restore.").schema() | ||||||
|                 ).schema() |                 ).schema() | ||||||
|             ), |             ), | ||||||
|  |             ( | ||||||
|  |                 "entries-max", | ||||||
|  |                 true, | ||||||
|  |                 &IntegerSchema::new("Max number of entries loaded at once into memory") | ||||||
|  |                     .default(pxar::ENCODER_MAX_ENTRIES as isize) | ||||||
|  |                     .minimum(0) | ||||||
|  |                     .maximum(std::isize::MAX) | ||||||
|  |                     .schema() | ||||||
|  |             ), | ||||||
|         ]), |         ]), | ||||||
|     ) |     ) | ||||||
| ); | ); | ||||||
|  | |||||||
| @ -48,6 +48,7 @@ impl PxarBackupStream { | |||||||
|         verbose: bool, |         verbose: bool, | ||||||
|         skip_lost_and_found: bool, |         skip_lost_and_found: bool, | ||||||
|         catalog: Arc<Mutex<CatalogWriter<W>>>, |         catalog: Arc<Mutex<CatalogWriter<W>>>, | ||||||
|  |         entries_max: usize, | ||||||
|     ) -> Result<Self, Error> { |     ) -> Result<Self, Error> { | ||||||
| 
 | 
 | ||||||
|         let (rx, tx) = nix::unistd::pipe()?; |         let (rx, tx) = nix::unistd::pipe()?; | ||||||
| @ -73,6 +74,7 @@ impl PxarBackupStream { | |||||||
|                 skip_lost_and_found, |                 skip_lost_and_found, | ||||||
|                 pxar::flags::DEFAULT, |                 pxar::flags::DEFAULT, | ||||||
|                 exclude_pattern, |                 exclude_pattern, | ||||||
|  |                 entries_max, | ||||||
|             ) { |             ) { | ||||||
|                 let mut error = error2.lock().unwrap(); |                 let mut error = error2.lock().unwrap(); | ||||||
|                 *error = Some(err.to_string()); |                 *error = Some(err.to_string()); | ||||||
| @ -95,12 +97,13 @@ impl PxarBackupStream { | |||||||
|         verbose: bool, |         verbose: bool, | ||||||
|         skip_lost_and_found: bool, |         skip_lost_and_found: bool, | ||||||
|         catalog: Arc<Mutex<CatalogWriter<W>>>, |         catalog: Arc<Mutex<CatalogWriter<W>>>, | ||||||
|  |         entries_max: usize, | ||||||
|     ) -> Result<Self, Error> { |     ) -> Result<Self, Error> { | ||||||
| 
 | 
 | ||||||
|         let dir = nix::dir::Dir::open(dirname, OFlag::O_DIRECTORY, Mode::empty())?; |         let dir = nix::dir::Dir::open(dirname, OFlag::O_DIRECTORY, Mode::empty())?; | ||||||
|         let path = std::path::PathBuf::from(dirname); |         let path = std::path::PathBuf::from(dirname); | ||||||
| 
 | 
 | ||||||
|         Self::new(dir, path, device_set, verbose, skip_lost_and_found, catalog) |         Self::new(dir, path, device_set, verbose, skip_lost_and_found, catalog, entries_max) | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -29,11 +29,6 @@ use crate::tools::acl; | |||||||
| use crate::tools::fs; | use crate::tools::fs; | ||||||
| use crate::tools::xattr; | use crate::tools::xattr; | ||||||
| 
 | 
 | ||||||
| /// The format requires to build sorted directory lookup tables in
 |  | ||||||
| /// memory, so we restrict the number of allowed entries to limit
 |  | ||||||
| /// maximum memory usage.
 |  | ||||||
| pub const MAX_DIRECTORY_ENTRIES: usize = 256 * 1024; |  | ||||||
| 
 |  | ||||||
| #[derive(Eq, PartialEq, Hash)] | #[derive(Eq, PartialEq, Hash)] | ||||||
| struct HardLinkInfo { | struct HardLinkInfo { | ||||||
|     st_dev: u64, |     st_dev: u64, | ||||||
| @ -55,6 +50,8 @@ pub struct Encoder<'a, W: Write, C: BackupCatalogWriter> { | |||||||
|     // Flags signaling features supported by the filesystem
 |     // Flags signaling features supported by the filesystem
 | ||||||
|     fs_feature_flags: u64, |     fs_feature_flags: u64, | ||||||
|     hardlinks: HashMap<HardLinkInfo, (PathBuf, u64)>, |     hardlinks: HashMap<HardLinkInfo, (PathBuf, u64)>, | ||||||
|  |     entry_counter: usize, | ||||||
|  |     entry_max: usize, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { | impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { | ||||||
| @ -82,6 +79,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { | |||||||
|         skip_lost_and_found: bool, // fixme: should be a feature flag ??
 |         skip_lost_and_found: bool, // fixme: should be a feature flag ??
 | ||||||
|         feature_flags: u64, |         feature_flags: u64, | ||||||
|         mut excludes: Vec<MatchPattern>, |         mut excludes: Vec<MatchPattern>, | ||||||
|  |         entry_max: usize, | ||||||
|     ) -> Result<(), Error> { |     ) -> Result<(), Error> { | ||||||
|         const FILE_COPY_BUFFER_SIZE: usize = 1024 * 1024; |         const FILE_COPY_BUFFER_SIZE: usize = 1024 * 1024; | ||||||
| 
 | 
 | ||||||
| @ -126,6 +124,8 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { | |||||||
|             feature_flags, |             feature_flags, | ||||||
|             fs_feature_flags, |             fs_feature_flags, | ||||||
|             hardlinks: HashMap::new(), |             hardlinks: HashMap::new(), | ||||||
|  |             entry_counter: 0, | ||||||
|  |             entry_max, | ||||||
|         }; |         }; | ||||||
| 
 | 
 | ||||||
|         if verbose { |         if verbose { | ||||||
| @ -762,14 +762,16 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { | |||||||
|                             self.full_path().join(filename_osstr) |                             self.full_path().join(filename_osstr) | ||||||
|                         ); |                         ); | ||||||
|                     } |                     } | ||||||
|                     (_, child_pattern) => name_list.push((filename, stat, child_pattern)), |                     (_, child_pattern) => { | ||||||
|  |                         self.entry_counter += 1; | ||||||
|  |                         name_list.push((filename, stat, child_pattern)); | ||||||
|  |                     } | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 if name_list.len() > MAX_DIRECTORY_ENTRIES { |                 if self.entry_counter > self.entry_max { | ||||||
|                     bail!( |                     bail!( | ||||||
|                         "too many directory items in {:?} (> {})", |                         "exceeded max number of entries (> {})", | ||||||
|                         self.full_path(), |                         self.entry_max | ||||||
|                         MAX_DIRECTORY_ENTRIES |  | ||||||
|                     ); |                     ); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
| @ -778,8 +780,9 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { | |||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0)); |         name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0)); | ||||||
|  |         let num_entries = name_list.len(); | ||||||
| 
 | 
 | ||||||
|         let mut goodbye_items = Vec::with_capacity(name_list.len()); |         let mut goodbye_items = Vec::with_capacity(num_entries); | ||||||
| 
 | 
 | ||||||
|         for (filename, stat, exclude_list) in name_list { |         for (filename, stat, exclude_list) in name_list { | ||||||
|             let start_pos = self.writer_pos; |             let start_pos = self.writer_pos; | ||||||
| @ -1049,6 +1052,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> { | |||||||
|         let goodbye_offset = self.writer_pos - dir_start_pos; |         let goodbye_offset = self.writer_pos - dir_start_pos; | ||||||
| 
 | 
 | ||||||
|         self.write_goodbye_table(goodbye_offset, &mut goodbye_items)?; |         self.write_goodbye_table(goodbye_offset, &mut goodbye_items)?; | ||||||
|  |         self.entry_counter -= num_entries; | ||||||
| 
 | 
 | ||||||
|         //println!("encode_dir: {:?} end1 {}", self.full_path(), self.writer_pos);
 |         //println!("encode_dir: {:?} end1 {}", self.full_path(), self.writer_pos);
 | ||||||
|         Ok(()) |         Ok(()) | ||||||
|  | |||||||
| @ -256,3 +256,8 @@ pub fn check_ca_header<T>(head: &PxarHeader, htype: u64) -> Result<(), Error> { | |||||||
| 
 | 
 | ||||||
|     Ok(()) |     Ok(()) | ||||||
| } | } | ||||||
|  | 
 | ||||||
|  | /// The format requires to build sorted directory lookup tables in
 | ||||||
|  | /// memory, so we restrict the number of allowed entries to limit
 | ||||||
|  | /// maximum memory usage.
 | ||||||
|  | pub const ENCODER_MAX_ENTRIES: usize = 1024 * 1024; | ||||||
|  | |||||||
| @ -37,6 +37,7 @@ fn run_test(dir_name: &str) -> Result<(), Error> { | |||||||
|         false, |         false, | ||||||
|         flags::DEFAULT, |         flags::DEFAULT, | ||||||
|         Vec::new(), |         Vec::new(), | ||||||
|  |         ENCODER_MAX_ENTRIES, | ||||||
|     )?; |     )?; | ||||||
| 
 | 
 | ||||||
|     Command::new("cmp") |     Command::new("cmp") | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Christian Ebner
						Christian Ebner