diff --git a/src/backup/catalog.rs b/src/backup/catalog.rs index ac6898e3..23b670a3 100644 --- a/src/backup/catalog.rs +++ b/src/backup/catalog.rs @@ -1,7 +1,6 @@ use failure::*; use std::ffi::{CStr, CString}; use std::os::unix::ffi::OsStringExt; -use std::convert::TryInto; use std::io::{Read, Write, Seek, SeekFrom}; use std::convert::TryFrom; @@ -37,68 +36,71 @@ impl DirInfo { DirInfo::new(CString::new(b"/".to_vec()).unwrap()) } - fn encode_entry(data: &mut Vec, entry: &DirEntry, pos: u64) { + fn encode_entry( + writer: &mut W, + entry: &DirEntry, + pos: u64, + ) -> Result<(), Error> { match entry { DirEntry::Directory { name, start } => { - data.push(CatalogEntryType::Directory as u8); - data.extend_from_slice(&(name.len() as u32).to_le_bytes()); - data.extend_from_slice(name); - data.extend_from_slice(&(pos-start).to_le_bytes()); + writer.write_all(&[CatalogEntryType::Directory as u8])?; + catalog_encode_u64(writer, name.len() as u64)?; + writer.write_all(name)?; + catalog_encode_u64(writer, pos - start)?; } DirEntry::File { name, size, mtime } => { - data.push(CatalogEntryType::File as u8); - data.extend_from_slice(&(name.len() as u32).to_le_bytes()); - data.extend_from_slice(name); - data.extend_from_slice(&size.to_le_bytes()); - data.extend_from_slice(&mtime.to_le_bytes()); + writer.write_all(&[CatalogEntryType::File as u8])?; + catalog_encode_u64(writer, name.len() as u64)?; + writer.write_all(name)?; + catalog_encode_u64(writer, *size)?; + catalog_encode_u64(writer, *mtime)?; } DirEntry::Symlink { name } => { - data.push(CatalogEntryType::Symlink as u8); - data.extend_from_slice(&(name.len() as u32).to_le_bytes()); - data.extend_from_slice(name); + writer.write_all(&[CatalogEntryType::Symlink as u8])?; + catalog_encode_u64(writer, name.len() as u64)?; + writer.write_all(name)?; } DirEntry::Hardlink { name } => { - data.push(CatalogEntryType::Hardlink as u8); - data.extend_from_slice(&(name.len() as u32).to_le_bytes()); - data.extend_from_slice(name); + writer.write_all(&[CatalogEntryType::Hardlink as u8])?; + catalog_encode_u64(writer, name.len() as u64)?; + writer.write_all(name)?; } DirEntry::BlockDevice { name } => { - data.push(CatalogEntryType::BlockDevice as u8); - data.extend_from_slice(&(name.len() as u32).to_le_bytes()); - data.extend_from_slice(name); + writer.write_all(&[CatalogEntryType::BlockDevice as u8])?; + catalog_encode_u64(writer, name.len() as u64)?; + writer.write_all(name)?; } - DirEntry::CharDevice { name } => { - data.push(CatalogEntryType::CharDevice as u8); - data.extend_from_slice(&(name.len() as u32).to_le_bytes()); - data.extend_from_slice(name); + DirEntry::CharDevice { name } => { + writer.write_all(&[CatalogEntryType::CharDevice as u8])?; + catalog_encode_u64(writer, name.len() as u64)?; + writer.write_all(name)?; } DirEntry::Fifo { name } => { - data.push(CatalogEntryType::Fifo as u8); - data.extend_from_slice(&(name.len() as u32).to_le_bytes()); - data.extend_from_slice(name); + writer.write_all(&[CatalogEntryType::Fifo as u8])?; + catalog_encode_u64(writer, name.len() as u64)?; + writer.write_all(name)?; } DirEntry::Socket { name } => { - data.push(CatalogEntryType::Socket as u8); - data.extend_from_slice(&(name.len() as u32).to_le_bytes()); - data.extend_from_slice(name); + writer.write_all(&[CatalogEntryType::Socket as u8])?; + catalog_encode_u64(writer, name.len() as u64)?; + writer.write_all(name)?; } } + Ok(()) } fn encode(self, start: u64) -> Result<(CString, Vec), Error> { let mut table = Vec::new(); - let count: u32 = self.entries.len().try_into()?; + catalog_encode_u64(&mut table, self.entries.len() as u64)?; for entry in self.entries { - Self::encode_entry(&mut table, &entry, start); + Self::encode_entry(&mut table, &entry, start)?; } - let data = Vec::new(); - let mut writer = std::io::Cursor::new(data); - let size: u32 = (4 + 4 + table.len()).try_into()?; - writer.write_all(&size.to_le_bytes())?; - writer.write_all(&count.to_le_bytes())?; - writer.write_all(&table)?; - Ok((self.name, writer.into_inner())) + let mut data = Vec::new(); + catalog_encode_u64(&mut data, table.len() as u64)?; + data.extend_from_slice(&table); + + Ok((self.name, data)) } } @@ -253,7 +255,7 @@ impl CatalogReader { Self { reader } } - fn next_byte(mut reader: C) -> Result { + fn next_byte(reader: &mut C) -> Result { let mut buf = [0u8; 1]; reader.read_exact(&mut buf)?; Ok(buf[0]) @@ -272,21 +274,21 @@ impl CatalogReader { self.reader.seek(SeekFrom::Start(start))?; - let size = unsafe { self.reader.read_le_value::()? } as usize; + let size = catalog_decode_u64(&mut self.reader)?; - if size < 8 { bail!("got small directory size {}", size) }; + if size < 1 { bail!("got small directory size {}", size) }; - let data = self.reader.read_exact_allocated(size - 4)?; + let data = self.reader.read_exact_allocated(size as usize)?; let mut cursor = &data[..]; - let entries = unsafe { cursor.read_le_value::()? }; + let entries = catalog_decode_u64(&mut cursor)?; //println!("TEST {} {} size {}", start, entries, size); for _ in 0..entries { let etype = CatalogEntryType::try_from(Self::next_byte(&mut cursor)?)?; - let name_len = unsafe { cursor.read_le_value::()? }; + let name_len = catalog_decode_u64(&mut cursor)?; let name = cursor.read_exact_allocated(name_len as usize)?; let mut path = std::path::PathBuf::from(prefix); @@ -295,7 +297,7 @@ impl CatalogReader { match etype { CatalogEntryType::Directory => { println!("{} {:?}", char::from(etype as u8), path); - let offset = unsafe { cursor.read_le_value::()? }; + let offset = catalog_decode_u64(&mut cursor)?; if offset > start { bail!("got wrong directory offset ({} > {})", offset, start); } @@ -303,8 +305,8 @@ impl CatalogReader { self.dump_dir(&path, pos)?; } CatalogEntryType::File => { - let size = unsafe { cursor.read_le_value::()? }; - let mtime = unsafe { cursor.read_le_value::()? }; + let size = catalog_decode_u64(&mut cursor)?; + let mtime = catalog_decode_u64(&mut cursor)?; let dt = Local.timestamp(mtime as i64, 0); @@ -325,3 +327,75 @@ impl CatalogReader { } } + +/// Serialize u64 as short, variable length byte sequence +/// +/// Stores 7 bits per byte, Bit 8 indicates the end of the sequence (when not set). +/// We limit values to a maximum of 2^63. +pub fn catalog_encode_u64(writer: &mut W, v: u64) -> Result<(), Error> { + let mut enc = Vec::new(); + + if (v & (1<<63)) != 0 { bail!("catalog_encode_u64 failed - value >= 2^63"); } + let mut d = v; + loop { + if d < 128 { + enc.push(d as u8); + break; + } + enc.push((128 | (d & 127)) as u8); + d = d >> 7; + } + writer.write_all(&enc)?; + + Ok(()) +} + +/// Deserialize u64 from variable length byte sequence +/// +/// We currently read maximal 9 bytes, which give a maximum of 63 bits. +pub fn catalog_decode_u64(reader: &mut R) -> Result { + + let mut v: u64 = 0; + let mut buf = [0u8]; + + for i in 0..9 { // only allow 9 bytes (63 bits) + if buf.is_empty() { + bail!("decode_u64 failed - unexpected EOB"); + } + reader.read_exact(&mut buf)?; + let t = buf[0]; + if t < 128 { + v |= (t as u64) << (i*7); + return Ok(v); + } else { + v |= ((t & 127) as u64) << (i*7); + } + } + + bail!("decode_u64 failed - missing end marker"); +} + +#[test] +fn test_catalog_u64_encoder() { + + fn test_encode_decode(value: u64) { + + let mut data = Vec::new(); + catalog_encode_u64(&mut data, value).unwrap(); + + //println!("ENCODE {} {:?}", value, data); + + let slice = &mut &data[..]; + let decoded = catalog_decode_u64(slice).unwrap(); + + //println!("DECODE {}", decoded); + + assert!(decoded == value); + } + + test_encode_decode(126); + test_encode_decode((1<<12)-1); + test_encode_decode((1<<20)-1); + test_encode_decode((1<<50)-1); + test_encode_decode((1<<63)-1); +}