From 798f7fa065f9e3751dd04ea446e3bee73a1620dd Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Mon, 25 Feb 2019 11:36:05 +0100 Subject: [PATCH] src/backup/chunk_store.rs: return info about compressed chunk size So that we can generate better statistics .. --- src/backup/chunk_store.rs | 16 ++++++++++------ src/backup/dynamic_index.rs | 24 ++++++++++++++++++++---- src/backup/fixed_index.rs | 26 ++++++++++++++++++++------ 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/src/backup/chunk_store.rs b/src/backup/chunk_store.rs index 0224b4f1..4d3a9b3d 100644 --- a/src/backup/chunk_store.rs +++ b/src/backup/chunk_store.rs @@ -292,7 +292,7 @@ impl ChunkStore { Ok(()) } - pub fn insert_chunk(&self, chunk: &[u8]) -> Result<(bool, [u8; 32]), Error> { + pub fn insert_chunk(&self, chunk: &[u8]) -> Result<(bool, [u8; 32], u64), Error> { // fixme: use Sha512/256 when available let mut hasher = sha::Sha256::new(); @@ -312,7 +312,7 @@ impl ChunkStore { if let Ok(metadata) = std::fs::metadata(&chunk_path) { if metadata.is_file() { - return Ok((true, digest)); + return Ok((true, digest, metadata.len())); } else { bail!("Got unexpected file type on store '{}' for chunk {}", self.name, digest_str); } @@ -327,7 +327,7 @@ impl ChunkStore { let mut encoder = lz4::EncoderBuilder::new().level(1).build(f)?; encoder.write_all(chunk)?; - let (_, encode_result) = encoder.finish(); + let (f, encode_result) = encoder.finish(); encode_result?; if let Err(err) = std::fs::rename(&tmp_path, &chunk_path) { @@ -340,11 +340,15 @@ impl ChunkStore { ); } + // fixme: is there a better way to get the compressed size? + let stat = nix::sys::stat::fstat(f.as_raw_fd())?; + let compressed_size = stat.st_size as u64; + //println!("PATH {:?}", chunk_path); drop(lock); - Ok((false, digest)) + Ok((false, digest, compressed_size)) } pub fn relative_path(&self, path: &Path) -> PathBuf { @@ -372,10 +376,10 @@ fn test_chunk_store1() { assert!(chunk_store.is_err()); let chunk_store = ChunkStore::create("test", &path).unwrap(); - let (exists, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap(); + let (exists, _, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap(); assert!(!exists); - let (exists, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap(); + let (exists, _, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap(); assert!(exists); diff --git a/src/backup/dynamic_index.rs b/src/backup/dynamic_index.rs index cb203e60..7d4a747d 100644 --- a/src/backup/dynamic_index.rs +++ b/src/backup/dynamic_index.rs @@ -329,6 +329,8 @@ pub struct DynamicIndexWriter { pub uuid: [u8; 16], pub ctime: u64, + compressed_size: u64, + disk_size: u64, chunk_count: usize, chunk_offset: usize, last_chunk: usize, @@ -388,6 +390,8 @@ impl DynamicIndexWriter { ctime, uuid: *uuid.as_bytes(), + compressed_size: 0, + disk_size: 0, chunk_count: 0, chunk_offset: 0, last_chunk: 0, @@ -407,8 +411,12 @@ impl DynamicIndexWriter { self.writer.flush()?; - let avg = ((self.chunk_offset as f64)/(self.chunk_count as f64)) as usize; - println!("Average chunk size {}", avg); + let size = self.chunk_offset; + let avg = ((size as f64)/(self.chunk_count as f64)) as usize; + let compression = (self.compressed_size*100)/(size as u64); + let rate = (self.disk_size*100)/(size as u64); + println!("Size: {}, average chunk size: {}, compression rate: {}%, disk_size: {} ({}%)", + size, avg, compression, self.disk_size, rate); // fixme: if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { @@ -438,8 +446,16 @@ impl DynamicIndexWriter { self.last_chunk = self.chunk_offset; match self.store.insert_chunk(&self.chunk_buffer) { - Ok((is_duplicate, digest)) => { - println!("ADD CHUNK {:016x} {} {} {}", self.chunk_offset, chunk_size, is_duplicate, tools::digest_to_hex(&digest)); + Ok((is_duplicate, digest, compressed_size)) => { + + self.compressed_size += compressed_size; + if is_duplicate { + } else { + self.disk_size += compressed_size; + } + + println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size, + (compressed_size*100)/(chunk_size as u64), is_duplicate, tools::digest_to_hex(&digest)); self.writer.write(unsafe { &std::mem::transmute::(self.chunk_offset as u64) })?; self.writer.write(&digest)?; self.chunk_buffer.truncate(0); diff --git a/src/backup/fixed_index.rs b/src/backup/fixed_index.rs index 29617757..b89257d6 100644 --- a/src/backup/fixed_index.rs +++ b/src/backup/fixed_index.rs @@ -160,7 +160,9 @@ pub struct FixedIndexWriter { tmp_filename: PathBuf, chunk_size: usize, duplicate_chunks: usize, + disk_size: u64, size: usize, + compressed_size: u64, index: *mut u8, pub uuid: [u8; 16], pub ctime: u64, @@ -231,6 +233,8 @@ impl FixedIndexWriter { chunk_size, duplicate_chunks: 0, size, + compressed_size: 0, + disk_size: 0, index: data, ctime, uuid: *uuid.as_bytes(), @@ -249,8 +253,11 @@ impl FixedIndexWriter { self.index = std::ptr::null_mut(); - println!("Original size: {} Compressed size: {} Deduplicated size: {}", - self.size, self.size, self.size - (self.duplicate_chunks*self.chunk_size)); + let compression = (self.compressed_size*100)/(self.size as u64); + let rate = (self.disk_size*100)/(self.size as u64); + + println!("Original size: {}, compression rate: {}%, deduplicated size: {}, disk size: {} ({}%)", + self.size, compression, self.size - (self.duplicate_chunks*self.chunk_size), self.disk_size, rate); Ok(()) } @@ -290,12 +297,19 @@ impl FixedIndexWriter { if pos & (self.chunk_size-1) != 0 { bail!("add unaligned chunk (pos = {})", pos); } - let (is_duplicate, digest) = self.store.insert_chunk(chunk)?; + let (is_duplicate, digest, compressed_size) = self.store.insert_chunk(chunk)?; - println!("ADD CHUNK {} {} {} {}", pos, chunk.len(), is_duplicate, tools::digest_to_hex(&digest)); + self.compressed_size += compressed_size; + + println!("ADD CHUNK {} {} {}% {} {}", pos, chunk.len(), + (compressed_size*100)/(chunk.len() as u64), is_duplicate, tools::digest_to_hex(&digest)); + + if is_duplicate { + self.duplicate_chunks += 1; + } else { + self.disk_size += compressed_size; + } - if is_duplicate { self.duplicate_chunks += 1; } - let index_pos = (pos/self.chunk_size)*32; unsafe { let dst = self.index.add(index_pos);