commit 6cd4f635b6006a0c22d524f4d7a7b906efd949d6 Author: Wolfgang Bumiller Date: Thu Jan 23 11:18:04 2020 +0100 import Signed-off-by: Wolfgang Bumiller diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..14568f4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +/target +**/*.rs.bk +Cargo.lock +test.pxar diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..eb1c2eb --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "pxar" +version = "0.1.0" +authors = ["Wolfgang Bumiller "] +edition = "2018" + +[[example]] +name = "apxar" +path = "examples/apxar.rs" +required-features = [ "async-example" ] + +[dependencies] +bitflags = "1.2.1" +endian_trait = { version = "0.6", features = ["arrays"] } +failure = "0.1" +siphasher = "0.3" + +futures = { version = "0.3.1", optional = true } +tokio = { version = "0.2.10", optional = true, default-features = false } + +[features] +default = [ "futures-io", "tokio-io" ] +futures-io = [ "futures" ] +tokio-io = [ "tokio" ] +async-example = [ + "futures-io", + "tokio-io", + "tokio/fs", + "tokio/rt-threaded", + "tokio/io-driver", + "tokio/macros", +] diff --git a/examples/apxar.rs b/examples/apxar.rs new file mode 100644 index 0000000..03c7590 --- /dev/null +++ b/examples/apxar.rs @@ -0,0 +1,32 @@ +use pxar::decoder::aio::Decoder; + +#[tokio::main] +async fn main() { + let mut args = std::env::args_os().skip(1); + + let file = args.next().expect("expected a file name"); + let file = tokio::fs::File::open(file) + .await + .expect("failed to open file"); + + let mut reader = Decoder::from_tokio(file) + .await + .expect("failed to open pxar archive contents"); + let mut i = 0; + while let Some(entry) = reader.next().await { + println!("{:#?}", entry.expect("failed to parse entry").path()); + i += 1; + if i == 2 { + break; + } + } + + // Use a Stream for the remaining entries: + use futures::stream::StreamExt; + + let mut stream = reader.into_stream(); + + while let Some(entry) = stream.next().await { + println!("{:#?}", entry.expect("failed to parse entry").path()); + } +} diff --git a/examples/randaccess.rs b/examples/randaccess.rs new file mode 100644 index 0000000..f228155 --- /dev/null +++ b/examples/randaccess.rs @@ -0,0 +1,62 @@ +use pxar::accessor::Accessor; + +fn main() { + let mut args = std::env::args_os().skip(1); + + let file = args.next().expect("expected a file name"); + + let mut accessor = Accessor::open(file).expect("failed to open file"); + let mut dir = accessor + .open_root() + .expect("failed to open archive root directory"); + for i in dir.decode_full().expect("failed to access root directory") { + println!("{:#?}", i.expect("failed to parse entry").path()); + } + + let da = dir + .lookup("da") + .expect("error looking up da/") + .expect("failed to lookup da/"); + dir.lookup("db").expect("failed to lookup db"); + dir.lookup("root1.txt").expect("failed to lookup root1.txt"); + dir.lookup("root2.txt").expect("failed to lookup root2.txt"); + + println!("{:?}", da.entry()); + let da = da.enter_directory().expect("failed to enter /da directory"); + for i in da.decode_full().expect("failed to access /da directory") { + println!( + " ==> {:#?}", + i.expect("failed to parse /da file entry").path() + ); + } + + for i in dir.read_dir() { + let i = i.expect("failed to read directory entry"); + println!("read_dir => {:?}", i.file_name()); + } + + // let file = tokio::fs::File::open(file) + // .await + // .expect("failed to open file"); + // + // let mut reader = Accessor::from_tokio(file) + // .await + // .expect("failed to open pxar archive contents"); + // let mut i = 0; + // while let Some(entry) = reader.next().await { + // println!("{:#?}", entry.expect("failed to parse entry").path()); + // i += 1; + // if i == 2 { + // break; + // } + // } + // + // // Use a Stream for the remaining entries: + // use futures::stream::StreamExt; + // + // let mut stream = reader.into_stream(); + // + // while let Some(entry) = stream.next().await { + // println!("{:#?}", entry.expect("failed to parse entry").path()); + // } +} diff --git a/rust-toolchain b/rust-toolchain new file mode 100644 index 0000000..bf867e0 --- /dev/null +++ b/rust-toolchain @@ -0,0 +1 @@ +nightly diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..32a9786 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +edition = "2018" diff --git a/src/accessor.rs b/src/accessor.rs new file mode 100644 index 0000000..6655fa3 --- /dev/null +++ b/src/accessor.rs @@ -0,0 +1,443 @@ +//! Random access for PXAR files. + +use std::ffi::OsString; +use std::io; +use std::mem::{size_of, size_of_val, MaybeUninit}; +use std::ops::Range; +use std::os::unix::ffi::{OsStrExt, OsStringExt}; +use std::path::{Path, PathBuf}; +use std::pin::Pin; +use std::task::{Context, Poll}; + +use endian_trait::Endian; + +use crate::decoder::{self, DecoderImpl}; +use crate::format::{self, GoodbyeItem}; +use crate::poll_fn::poll_fn; +use crate::util; +use crate::Entry; + +pub mod aio; +pub mod sync; + +#[doc(inline)] +pub use sync::Accessor; + +/// Random access read implementation. +pub trait ReadAt { + fn poll_read_at( + self: Pin<&Self>, + cx: &mut Context, + buf: &mut [u8], + offset: u64, + ) -> Poll>; +} + +/// We do not want to bother with actual polling, so we implement `async fn` variants of the above +/// on `dyn ReadAt`. +/// +/// The reason why this is not an internal `ReadAtExt` trait like `AsyncReadExt` is simply that +/// we'd then need to define all the `Future` types they return manually and explicitly. Since we +/// have no use for them, all we want is the ability to use `async fn`... +/// +/// The downside is that we need some `(&mut self.input as &mut dyn ReadAt)` casts in the +/// decoder's code, but that's fine. +impl<'a> dyn ReadAt + 'a { + /// awaitable version of `poll_read_at`. + async fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result { + poll_fn(|cx| unsafe { Pin::new_unchecked(self).poll_read_at(cx, buf, offset) }).await + } + + /// `read_exact_at` - since that's what we _actually_ want most of the time. + async fn read_exact_at(&self, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> { + while !buf.is_empty() { + match self.read_at(buf, offset).await? { + 0 => io_bail!("unexpected EOF"), + got => { + buf = &mut buf[got..]; + offset += got as u64; + } + } + } + Ok(()) + } + + /// Helper to read into an `Endian`-implementing `struct`. + async fn read_entry_at(&self, offset: u64) -> io::Result { + let mut data = MaybeUninit::::uninit(); + let buf = + unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::()) }; + self.read_exact_at(buf, offset).await?; + Ok(unsafe { data.assume_init().from_le() }) + } + + /// Helper to read into an allocated byte vector. + async fn read_exact_data_at(&self, size: usize, offset: u64) -> io::Result> { + let mut data = util::vec_new(size); + self.read_exact_at(&mut data[..], offset).await?; + Ok(data) + } +} + +/// The random access state machine implementation. +pub struct AccessorImpl { + input: T, + size: u64, +} + +impl AccessorImpl { + pub async fn new(input: T, size: u64) -> io::Result { + if size < (size_of::() as u64) { + io_bail!("too small to contain a pxar archive"); + } + Ok(Self { input, size }) + } + + pub async fn open_root<'a>(&'a self) -> io::Result> { + DirectoryImpl::open_at_end(&self.input, self.size, "/".into()).await + } +} + +/// The directory random-access state machine implementation. +pub struct DirectoryImpl<'a> { + input: &'a dyn ReadAt, + entry_ofs: u64, + goodbye_ofs: u64, + size: u64, + table: Box<[GoodbyeItem]>, + path: PathBuf, +} + +impl<'a> DirectoryImpl<'a> { + /// Open a directory ending at the specified position. + pub(crate) async fn open_at_end( + input: &'a dyn ReadAt, + end_offset: u64, + path: PathBuf, + ) -> io::Result> { + let tail = Self::read_tail_entry(input, end_offset).await?; + + if end_offset < tail.size { + io_bail!("goodbye tail size out of range"); + } + + let goodbye_ofs = end_offset - tail.size; + + if goodbye_ofs < tail.offset { + io_bail!("goodbye offset out of range"); + } + + let entry_ofs = goodbye_ofs - tail.offset; + let size = end_offset - entry_ofs; + + let mut this = Self { + input, + entry_ofs, + goodbye_ofs, + size, + table: Box::new([]), + path, + }; + + // sanity check: + if this.table_size() % (size_of::() as u64) != 0 { + io_bail!("invalid goodbye table size: {}", this.table_size()); + } + + this.table = this.load_table().await?; + + Ok(this) + } + + /// Load the entire goodbye table: + async fn load_table(&self) -> io::Result> { + let len = self.len(); + let mut data = Vec::with_capacity(self.len()); + unsafe { + data.set_len(len); + let slice = std::slice::from_raw_parts_mut( + data.as_mut_ptr() as *mut u8, + len * size_of_val(&data[0]), + ); + self.input.read_exact_at(slice, self.table_offset()).await?; + drop(slice); + } + Ok(data.into_boxed_slice()) + } + + #[inline] + fn end_offset(&self) -> u64 { + self.entry_ofs + self.size + } + + #[inline] + fn table_size(&self) -> u64 { + (self.end_offset() - self.goodbye_ofs) - (size_of::() as u64) + } + + #[inline] + fn table_offset(&self) -> u64 { + self.goodbye_ofs + (size_of::() as u64) + } + + /// Length *excluding* the tail marker! + #[inline] + fn len(&self) -> usize { + (self.table_size() / (size_of::() as u64)) as usize - 1 + } + + /// Read the goodbye tail and perform some sanity checks. + async fn read_tail_entry(input: &'a dyn ReadAt, end_offset: u64) -> io::Result { + if end_offset < (size_of::() as u64) { + io_bail!("goodbye tail does not fit"); + } + + let tail_offset = end_offset - (size_of::() as u64); + let tail: GoodbyeItem = input.read_entry_at(tail_offset).await?; + + if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER { + io_bail!("no goodbye tail marker found"); + } + + Ok(tail) + } + + /// Get a decoder for the directory contents. + pub(crate) async fn decode_full(&self) -> io::Result>> { + let (dir, decoder) = self + .decode_one_entry(self.entry_ofs..(self.entry_ofs + self.size), None) + .await?; + if !dir.is_dir() { + io_bail!("directory does not seem to be a directory"); + } + Ok(decoder) + } + + async fn get_decoder( + &self, + entry_range: Range, + file_name: Option<&Path>, + ) -> io::Result>> { + Ok(DecoderImpl::new_full( + SeqReadAtAdapter::new(self.input, entry_range), + match file_name { + None => self.path.clone(), + Some(file) => self.path.join(file), + }, + ) + .await?) + } + + async fn decode_one_entry( + &self, + entry_range: Range, + file_name: Option<&Path>, + ) -> io::Result<(Entry, DecoderImpl>)> { + let mut decoder = self.get_decoder(entry_range, file_name).await?; + let entry = decoder + .next() + .await + .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??; + Ok((entry, decoder)) + } + + fn lookup_hash_position(&self, hash: u64) -> Option { + format::search_binary_tree_array_by(&self.table, |i| hash.cmp(&i.hash)) + } + + /// Lookup a directory entry. + pub async fn lookup(&'a self, path: &Path) -> io::Result>> { + let hash = format::hash_filename(path.as_os_str().as_bytes()); + let index = match self.lookup_hash_position(hash) { + Some(index) => index, + None => return Ok(None), + }; + + // Lookup FILENAME, if it doesn't match increase index, once found, use the GoodbyeItem's + // offset+size as well as the file's Entry to return a DirEntry::Dir or Dir::Entry. + + while index < self.table.len() && self.table[index].hash == hash { + let cursor = self.get_cursor(index).await?; + if cursor.file_name == path { + return Ok(Some(cursor.get_entry().await?)); + } + } + + Ok(None) + } + + async fn get_cursor(&'a self, index: usize) -> io::Result> { + let entry = &self.table[index]; + let file_goodbye_ofs = entry.offset; + if self.goodbye_ofs < file_goodbye_ofs { + io_bail!("invalid file offset"); + } + + let file_ofs = self.goodbye_ofs - file_goodbye_ofs; + let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?; + + Ok(DirEntryImpl { + dir: self, + file_name, + entry_range: Range { + start: entry_ofs, + end: file_ofs + entry.size, + }, + }) + } + + async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> { + let head: format::Header = self.input.read_entry_at(file_ofs).await?; + if head.htype != format::PXAR_FILENAME { + io_bail!("expected PXAR_FILENAME header, found: {:x}", head.htype); + } + + let mut path = self + .input + .read_exact_data_at( + head.content_size() as usize, + file_ofs + (size_of_val(&head) as u64), + ) + .await?; + + if path.pop() != Some(0) { + io_bail!("invalid file name (missing terminating zero)"); + } + + if path.is_empty() { + io_bail!("invalid empty file name"); + } + + let file_name = PathBuf::from(OsString::from_vec(path)); + format::check_file_name(&file_name)?; + + Ok((file_name, file_ofs + head.full_size())) + } + + pub fn read_dir(&'a self) -> ReadDirImpl<'a> { + ReadDirImpl::new(self, 0) + } +} + +/// A file entry retrieved from a Directory. +pub struct FileEntryImpl<'a> { + parent: &'a DirectoryImpl<'a>, + entry: Entry, + decoder: Option>>, + end_offset: u64, +} + +impl<'a> FileEntryImpl<'a> { + pub async fn enter_directory(&self) -> io::Result> { + if !self.entry.is_dir() { + io_bail!("enter_directory() on a non-directory"); + } + + DirectoryImpl::open_at_end(self.parent.input, self.end_offset, self.entry.path.clone()) + .await + } + + #[inline] + pub fn into_entry(self) -> Entry { + self.entry + } + + #[inline] + pub fn entry(&self) -> &Entry { + &self.entry + } +} + +/// An iterator over the contents of a directory. +pub struct ReadDirImpl<'a> { + dir: &'a DirectoryImpl<'a>, + at: usize, +} + +impl<'a> ReadDirImpl<'a> { + pub fn new(dir: &'a DirectoryImpl<'a>, at: usize) -> Self { + Self { dir, at } + } + + pub async fn next(&mut self) -> io::Result>> { + if self.at == self.dir.table.len() { + Ok(None) + } else { + let cursor = self.dir.get_cursor(self.at).await?; + self.at += 1; + Ok(Some(cursor)) + } + } +} + +/// A cursor pointing to a file in a directory. +/// +/// At this point only the file name has been read and we remembered the position for finding the +/// actual data. This can be upgraded into a FileEntryImpl. +pub struct DirEntryImpl<'a> { + dir: &'a DirectoryImpl<'a>, + file_name: PathBuf, + entry_range: Range, +} + +impl<'a> DirEntryImpl<'a> { + pub fn file_name(&self) -> &Path { + &self.file_name + } + + pub async fn get_entry(&self) -> io::Result> { + let end_offset = self.entry_range.end; + let (entry, decoder) = self + .dir + .decode_one_entry(self.entry_range.clone(), Some(&self.file_name)) + .await?; + let decoder = if entry.is_dir() { Some(decoder) } else { None }; + + Ok(FileEntryImpl { + parent: self.dir, + entry, + decoder, + end_offset, + }) + } +} + +#[doc(hidden)] +pub struct SeqReadAtAdapter<'a> { + input: &'a dyn ReadAt, + range: Range, +} + +impl<'a> SeqReadAtAdapter<'a> { + pub fn new(input: &'a dyn ReadAt, range: Range) -> Self { + Self { input, range } + } + + #[inline] + fn remaining(&self) -> usize { + (self.range.end - self.range.start) as usize + } +} + +impl<'a> decoder::SeqRead for SeqReadAtAdapter<'a> { + fn poll_seq_read( + self: Pin<&mut Self>, + cx: &mut Context, + buf: &mut [u8], + ) -> Poll> { + let len = buf.len().min(self.remaining()); + let buf = &mut buf[..len]; + + let this = self.get_mut(); + + let got = ready!(unsafe { + Pin::new_unchecked(this.input).poll_read_at(cx, buf, this.range.start) + })?; + this.range.start += got as u64; + Poll::Ready(Ok(got)) + } + + fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll>> { + Poll::Ready(Some(Ok(self.range.start))) + } +} diff --git a/src/accessor/aio.rs b/src/accessor/aio.rs new file mode 100644 index 0000000..b75d365 --- /dev/null +++ b/src/accessor/aio.rs @@ -0,0 +1,3 @@ +//! Asynchronous `pxar` random-access handling. +//! +//! Currently neither tokio nor futures have an `AsyncFileExt` variant. diff --git a/src/accessor/sync.rs b/src/accessor/sync.rs new file mode 100644 index 0000000..7be5a9a --- /dev/null +++ b/src/accessor/sync.rs @@ -0,0 +1,183 @@ +//! Blocking `pxar` random access handling. + +use std::io; +use std::os::unix::fs::FileExt; +use std::path::Path; +use std::pin::Pin; +use std::task::{Context, Poll}; + +use crate::accessor::{self, ReadAt}; +use crate::decoder::Decoder; +use crate::util::poll_result_once; +use crate::Entry; + +/// Blocking `pxar` random-access decoder. +/// +/// This is the blocking I/O version of the `pxar` accessor. This will *not* work with an +/// asynchronous I/O object. I/O must always return `Poll::Ready`. +/// +/// Attempting to use a `Waker` from this context *will* `panic!` +/// +/// If you need to use asynchronous I/O, use `aio::Accessor`. +#[repr(transparent)] +pub struct Accessor { + inner: accessor::AccessorImpl, +} + +impl Accessor { + /// Decode a `pxar` archive from a standard file implementing `FileExt`. + #[inline] + pub fn from_file_and_size(input: T, size: u64) -> io::Result>> { + Accessor::new(FileReader::new(input), size) + } +} + +impl Accessor> { + /// Decode a `pxar` archive from a regular `std::io::File` input. + #[inline] + pub fn from_file(input: std::fs::File) -> io::Result { + let size = input.metadata()?.len(); + Accessor::from_file_and_size(input, size) + } + + /// Convenience shortcut for `File::open` followed by `Accessor::from_file`. + pub fn open>(path: P) -> io::Result { + Self::from_file(std::fs::File::open(path.as_ref())?) + } +} + +impl Accessor { + /// Create a *blocking* random-access decoder from an input implementing our internal read + /// interface. + /// + /// Note that the `input`'s `SeqRead` implementation must always return `Poll::Ready` and is + /// not allowed to use the `Waker`, as this will cause a `panic!`. + pub fn new(input: T, size: u64) -> io::Result { + Ok(Self { + inner: poll_result_once(accessor::AccessorImpl::new(input, size))?, + }) + } + + /// Open a directory handle to the root of the pxar archive. + pub fn open_root<'a>(&'a self) -> io::Result> { + Ok(Directory::new(poll_result_once(self.inner.open_root())?)) + } +} + +/// Adapter for FileExt readers. +pub struct FileReader { + inner: T, +} + +impl FileReader { + pub fn new(inner: T) -> Self { + Self { inner } + } +} + +impl ReadAt for FileReader { + fn poll_read_at( + self: Pin<&Self>, + _cx: &mut Context, + buf: &mut [u8], + offset: u64, + ) -> Poll> { + Poll::Ready(self.get_ref().inner.read_at(buf, offset)) + } +} + +/// Blocking Directory variant: +#[repr(transparent)] +pub struct Directory<'a> { + inner: accessor::DirectoryImpl<'a>, +} + +impl<'a> Directory<'a> { + fn new(inner: accessor::DirectoryImpl<'a>) -> Self { + Self { inner } + } + + /// Get a decoder for the directory contents. + pub fn decode_full(&self) -> io::Result>> { + Ok(Decoder::from_impl(poll_result_once( + self.inner.decode_full(), + )?)) + } + + /// Lookup an entry in a directory. + pub fn lookup>(&'a self, path: P) -> io::Result>> { + if let Some(file_entry) = poll_result_once(self.inner.lookup(path.as_ref()))? { + Ok(Some(FileEntry { inner: file_entry })) + } else { + Ok(None) + } + } + + /// Get an iterator over the directory's contents. + pub fn read_dir(&'a self) -> ReadDir<'a> { + ReadDir { + inner: self.inner.read_dir(), + } + } +} + +/// A file entry retrieved from a `Directory` via the `lookup` method. +#[repr(transparent)] +pub struct FileEntry<'a> { + inner: accessor::FileEntryImpl<'a>, +} + +impl<'a> FileEntry<'a> { + pub fn enter_directory(&self) -> io::Result> { + Ok(Directory::new(poll_result_once( + self.inner.enter_directory(), + )?)) + } + + #[inline] + pub fn into_entry(self) -> Entry { + self.inner.into_entry() + } + + #[inline] + pub fn entry(&self) -> &Entry { + &self.inner.entry() + } +} + +/// An iterator over the contents of a `Directory`. +#[repr(transparent)] +pub struct ReadDir<'a> { + inner: accessor::ReadDirImpl<'a>, +} + +impl<'a> Iterator for ReadDir<'a> { + type Item = io::Result>; + + fn next(&mut self) -> Option { + match poll_result_once(self.inner.next()) { + Ok(Some(inner)) => Some(Ok(DirEntry { inner })), + Ok(None) => None, + Err(err) => Some(Err(err)), + } + } +} + +impl<'a> std::iter::FusedIterator for ReadDir<'a> {} + +/// A directory entry. When iterating through the contents of a directory we first get access to +/// the file name. The remaining information can be decoded afterwards. +#[repr(transparent)] +pub struct DirEntry<'a> { + inner: accessor::DirEntryImpl<'a>, +} + +impl<'a> DirEntry<'a> { + pub fn file_name(&self) -> &Path { + self.inner.file_name() + } + + pub fn get_entry(&self) -> io::Result> { + poll_result_once(self.inner.get_entry()).map(|inner| FileEntry { inner }) + } +} diff --git a/src/bin/pxar.rs b/src/bin/pxar.rs new file mode 100644 index 0000000..9c269a7 --- /dev/null +++ b/src/bin/pxar.rs @@ -0,0 +1,13 @@ +use pxar::decoder::Decoder; + +fn main() { + let mut args = std::env::args_os().skip(1); + + let file = args.next().expect("expected a file name"); + let file = std::fs::File::open(file).expect("failed to open file"); + + let reader = Decoder::from_std(file).expect("failed to open pxar archive contents"); + for entry in reader { + println!("{:#?}", entry.expect("failed to parse entry").path()); + } +} diff --git a/src/decoder.rs b/src/decoder.rs new file mode 100644 index 0000000..b7f143e --- /dev/null +++ b/src/decoder.rs @@ -0,0 +1,553 @@ +//! The `pxar` decoder state machine. +//! +//! This is the implementation used by both the synchronous and async pxar wrappers. + +use std::convert::TryFrom; +use std::ffi::OsString; +use std::io; +use std::mem::{self, size_of, size_of_val, MaybeUninit}; +use std::os::unix::ffi::{OsStrExt, OsStringExt}; +use std::path::{Path, PathBuf}; +use std::pin::Pin; +use std::task::{Context, Poll}; + +//use std::os::unix::fs::FileExt; + +use endian_trait::Endian; + +use crate::format::{self, Header}; +use crate::poll_fn::poll_fn; +use crate::util::{self, io_err_other}; +use crate::{Entry, EntryKind, Metadata}; + +pub mod aio; +pub mod sync; + +#[doc(inline)] +pub use sync::Decoder; + +/// To skip through non-seekable files. +static mut SCRATCH_BUFFER: MaybeUninit<[u8; 4096]> = MaybeUninit::uninit(); + +fn scratch_buffer() -> &'static mut [u8] { + unsafe { &mut (*SCRATCH_BUFFER.as_mut_ptr())[..] } +} + +/// Sequential read interface used by the decoder's state machine. +/// +/// To simply iterate through a directory we just need the equivalent of `poll_read()`. +/// +/// Currently we also have a `poll_position()` method which can be added for types supporting +/// `Seek` or `AsyncSeek`. In this case the starting position of each entry becomes available +/// (accessible via the `Entry::offset()`), to allow jumping between entries. +pub trait SeqRead { + /// Mostly we want to read sequentially, so this is basically an `AsyncRead` equivalent. + fn poll_seq_read( + self: Pin<&mut Self>, + cx: &mut Context, + buf: &mut [u8], + ) -> Poll>; + + /// While going through the data we may want to take notes about some offsets within the file + /// for later. If the reader does not support seeking or positional reading, this can just + /// return `None`. + fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll>> { + Poll::Ready(None) + } +} + +/// Allow using trait objects for generics taking a `SeqRead`: +impl<'a> SeqRead for &mut (dyn SeqRead + 'a) { + fn poll_seq_read( + self: Pin<&mut Self>, + cx: &mut Context, + buf: &mut [u8], + ) -> Poll> { + unsafe { + self.map_unchecked_mut(|this| &mut **this) + .poll_seq_read(cx, buf) + } + } + + fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll>> { + unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) } + } +} + +/// We do not want to bother with actual polling, so we implement `async fn` variants of the above +/// on `dyn SeqRead`. +/// +/// The reason why this is not an internal `SeqReadExt` trait like `AsyncReadExt` is simply that +/// we'd then need to define all the `Future` types they return manually and explicitly. Since we +/// have no use for them, all we want is the ability to use `async fn`... +/// +/// The downside is that we need some `(&mut self.input as &mut dyn SeqRead)` casts in the +/// decoder's code, but that's fine. +impl<'a> dyn SeqRead + 'a { + /// awaitable version of `poll_position`. + async fn position(&mut self) -> Option> { + poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *self).poll_position(cx) }).await + } + + /// awaitable version of `poll_seq_read`. + async fn seq_read(&mut self, buf: &mut [u8]) -> io::Result { + poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *self).poll_seq_read(cx, buf) }).await + } + + /// `read_exact` - since that's what we _actually_ want most of the time, but with EOF handling + async fn seq_read_exact_or_eof(&mut self, mut buf: &mut [u8]) -> io::Result> { + let mut eof_ok = true; + while !buf.is_empty() { + match self.seq_read(buf).await? { + 0 if eof_ok => break, + 0 => io_bail!("unexpected EOF"), + got => buf = &mut buf[got..], + } + eof_ok = false; + } + Ok(Some(())) + } + + /// `read_exact` - since that's what we _actually_ want most of the time. + async fn seq_read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + match self.seq_read_exact_or_eof(buf).await? { + Some(()) => Ok(()), + None => io_bail!("unexpected eof"), + } + } + + /// Helper to read into an allocated byte vector. + async fn seq_read_exact_data(&mut self, size: usize) -> io::Result> { + let mut data = util::vec_new(size); + self.seq_read_exact(&mut data[..]).await?; + Ok(data) + } + + /// `seq_read_entry` with EOF handling + async fn seq_read_entry_or_eof(&mut self) -> io::Result> { + let mut data = MaybeUninit::::uninit(); + let buf = + unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::()) }; + if self.seq_read_exact_or_eof(buf).await?.is_none() { + return Ok(None); + } + Ok(Some(unsafe { data.assume_init().from_le() })) + } + + /// Helper to read into an `Endian`-implementing `struct`. + async fn seq_read_entry(&mut self) -> io::Result { + self.seq_read_entry_or_eof() + .await? + .ok_or_else(|| io_format_err!("unexepcted EOF")) + } +} + +/// The decoder state machine implementation. +/// +/// We use `async fn` to implement the decoder state machine so that we can easily plug in both +/// synchronous or `async` I/O objects in as input. +pub struct DecoderImpl { + input: T, + current_header: Header, + entry: Entry, + path_lengths: Vec, + state: State, + with_goodbye_tables: bool, +} + +enum State { + Begin, + Default, + InPayload, + InDirectory, + Eof, +} + +/// Control flow while parsing items. +/// +/// When parsing an entry, we usually go through all of its attribute items. Once we reach the end +/// of the entry we stop. +/// Note that if we're in a directory, we stopped at the beginning of its contents. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum ItemResult { + /// We parsed an "attribute" item and should continue parsing. + Attribute, + + /// We finished an entry (`SYMLINK`, `HARDLINK`, ...) or just entered the contents of a + /// directory (`FILENAME`, `GOODBYE`). + /// + /// We stop moving forward at this point. + Entry, +} + +impl DecoderImpl { + pub async fn new(input: T) -> io::Result { + Self::new_full(input, "/".into()).await + } + + pub(crate) async fn new_full(mut input: T, path: PathBuf) -> io::Result { + let offset = (&mut input as &mut dyn SeqRead) + .position() + .await + .transpose()?; + let this = DecoderImpl { + input, + current_header: unsafe { mem::zeroed() }, + entry: Entry { + path, + kind: EntryKind::EndOfDirectory, + metadata: Metadata::default(), + offset, + }, + path_lengths: Vec::new(), + state: State::Begin, + with_goodbye_tables: false, + }; + + // this.read_next_entry().await?; + + Ok(this) + } + + /// Get the next file entry, recursing into directories. + pub async fn next(&mut self) -> Option> { + self.next_do().await.transpose() + } + + pub(crate) async fn next_do(&mut self) -> io::Result> { + loop { + match self.state { + State::Eof => return Ok(None), + State::Begin => return self.read_next_entry().await.map(Some), + State::Default => { + // we completely finished an entry, so now we're going "up" in the directory + // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE: + self.read_next_item().await?; + } + State::InPayload => { + // We need to skip the current payload first. + self.skip_entry().await?; + self.read_next_item().await?; + } + State::InDirectory => { + // We're at the next FILENAME or GOODBYE item. + } + } + + match self.current_header.htype { + format::PXAR_FILENAME => return self.handle_file_entry().await, + format::PXAR_GOODBYE => { + if self.with_goodbye_tables { + self.entry.kind = EntryKind::EndOfDirectory; + let offset = (&mut self.input as &mut dyn SeqRead) + .position() + .await + .transpose()?; + self.entry.offset = offset; + self.state = State::InPayload; + return Ok(Some(self.entry.take())); + } + + self.skip_entry().await?; + if self.path_lengths.pop().is_some() { + self.state = State::Default; + // and move on: + continue; + } else { + self.state = State::Eof; + // early out: + return Ok(None); + } + } + h => io_bail!( + "expected filename or directory-goodbye pxar entry, got: {:x}", + h + ), + } + } + } + + async fn handle_file_entry(&mut self) -> io::Result> { + let mut data = self.read_entry_as_bytes().await?; + + // filenames are zero terminated! + if data.pop() != Some(0) { + io_bail!("illegal path found (missing terminating zero)"); + } + if data.is_empty() { + io_bail!("illegal path found (empty)"); + } + + let path = PathBuf::from(OsString::from_vec(data)); + self.set_path(&path)?; + self.read_next_entry().await.map(Some) + } + + fn reset_path(&mut self) -> io::Result<()> { + let path_len = *self + .path_lengths + .last() + .ok_or_else(|| io_format_err!("internal decoder error: path underrun"))?; + let mut path = mem::replace(&mut self.entry.path, PathBuf::new()) + .into_os_string() + .into_vec(); + path.truncate(path_len); + self.entry.path = PathBuf::from(OsString::from_vec(path)); + Ok(()) + } + + fn set_path(&mut self, path: &Path) -> io::Result<()> { + self.reset_path()?; + self.entry.path.push(path); + Ok(()) + } + + async fn read_next_entry_or_eof(&mut self) -> io::Result> { + self.state = State::Default; + self.entry.clear_data(); + + #[derive(Endian)] + #[repr(C)] + struct WithHeader { + header: Header, + data: U, + } + + let entry: WithHeader = { + let input: &mut dyn SeqRead = &mut self.input; + match input.seq_read_entry_or_eof().await? { + None => return Ok(None), + Some(entry) => entry, + } + }; + + if entry.header.htype != format::PXAR_ENTRY { + io_bail!( + "expected pxar entry of type 'Entry', got: {:x}", + entry.header.htype + ); + } + + self.current_header = unsafe { mem::zeroed() }; + self.entry.metadata = Metadata { + stat: entry.data, + ..Default::default() + }; + + while self.read_next_item().await? != ItemResult::Entry {} + + if self.entry.is_dir() { + self.path_lengths + .push(self.entry.path.as_os_str().as_bytes().len()); + } + + Ok(Some(self.entry.take())) + } + + async fn read_next_entry(&mut self) -> io::Result { + self.read_next_entry_or_eof() + .await? + .ok_or_else(|| io_format_err!("unexpected EOF")) + } + + async fn read_next_item(&mut self) -> io::Result { + self.read_next_header().await?; + self.read_current_item().await + } + + async fn read_next_header(&mut self) -> io::Result<()> { + let dest = unsafe { + std::slice::from_raw_parts_mut( + &mut self.current_header as *mut Header as *mut u8, + size_of_val(&self.current_header), + ) + }; + (&mut self.input as &mut dyn SeqRead) + .seq_read_exact(dest) + .await?; + Ok(()) + } + + /// Read the next item, the header is already loaded. + async fn read_current_item(&mut self) -> io::Result { + match self.current_header.htype { + format::PXAR_XATTR => { + let xattr = self.read_xattr().await?; + self.entry.metadata.xattrs.push(xattr); + } + format::PXAR_ACL_USER => { + let entry = self.read_acl_user().await?; + self.entry.metadata.acl.users.push(entry); + } + format::PXAR_ACL_GROUP => { + let entry = self.read_acl_group().await?; + self.entry.metadata.acl.groups.push(entry); + } + format::PXAR_ACL_GROUP_OBJ => { + if self.entry.metadata.acl.group_obj.is_some() { + io_bail!("multiple acl group object entries detected"); + } + let entry = self.read_acl_group_object().await?; + self.entry.metadata.acl.group_obj = Some(entry); + } + format::PXAR_ACL_DEFAULT => { + if self.entry.metadata.acl.default.is_some() { + io_bail!("multiple acl default entries detected"); + } + let entry = self.read_acl_default().await?; + self.entry.metadata.acl.default = Some(entry); + } + format::PXAR_ACL_DEFAULT_USER => { + let entry = self.read_acl_user().await?; + self.entry.metadata.acl.default_users.push(entry); + } + format::PXAR_ACL_DEFAULT_GROUP => { + let entry = self.read_acl_group().await?; + self.entry.metadata.acl.default_groups.push(entry); + } + format::PXAR_FCAPS => { + if self.entry.metadata.fcaps.is_some() { + io_bail!("multiple file capability entries detected"); + } + let entry = self.read_fcaps().await?; + self.entry.metadata.fcaps = Some(entry); + } + format::PXAR_QUOTA_PROJID => { + if self.entry.metadata.quota_project_id.is_some() { + io_bail!("multiple quota project id entries detected"); + } + let entry = self.read_quota_project_id().await?; + self.entry.metadata.quota_project_id = Some(entry); + } + format::PXAR_SYMLINK => { + self.entry.kind = EntryKind::Symlink(self.read_symlink().await?); + return Ok(ItemResult::Entry); + } + format::PXAR_HARDLINK => { + self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?); + return Ok(ItemResult::Entry); + } + format::PXAR_DEVICE => { + self.entry.kind = EntryKind::Device(self.read_device().await?); + return Ok(ItemResult::Entry); + } + format::PXAR_PAYLOAD => { + self.entry.kind = EntryKind::File { + size: self.current_header.content_size(), + }; + self.state = State::InPayload; + return Ok(ItemResult::Entry); + } + format::PXAR_FILENAME | format::PXAR_GOODBYE => { + self.state = State::InDirectory; + self.entry.kind = EntryKind::Directory; + return Ok(ItemResult::Entry); + } + _ => io_bail!("unexpected entry type: {:x}", self.current_header.htype), + } + + Ok(ItemResult::Attribute) + } + + // + // Local read helpers. + // + // These utilize additional information and hence are not part of the `dyn SeqRead` impl. + // + + async fn skip_entry(&mut self) -> io::Result<()> { + let mut len = self.current_header.content_size(); + let scratch = scratch_buffer(); + while len >= (scratch.len() as u64) { + (&mut self.input as &mut dyn SeqRead) + .seq_read_exact(scratch) + .await?; + len -= scratch.len() as u64; + } + let len = len as usize; + if len > 0 { + (&mut self.input as &mut dyn SeqRead) + .seq_read_exact(&mut scratch[..len]) + .await?; + } + Ok(()) + } + + async fn read_entry_as_bytes(&mut self) -> io::Result> { + let size = usize::try_from(self.current_header.content_size()).map_err(io_err_other)?; + let data = (&mut self.input as &mut dyn SeqRead) + .seq_read_exact_data(size) + .await?; + Ok(data) + } + + /// Helper to read a struct entry while checking its size. + async fn read_simple_entry( + &mut self, + what: &'static str, + ) -> io::Result { + if self.current_header.content_size() != (size_of::() as u64) { + io_bail!( + "bad {} size: {} (expected {})", + what, + self.current_header.content_size(), + size_of::(), + ); + } + (&mut self.input as &mut dyn SeqRead).seq_read_entry().await + } + + // + // Read functions for PXAR components. + // + + async fn read_xattr(&mut self) -> io::Result { + let data = self.read_entry_as_bytes().await?; + + let name_len = data + .iter() + .position(|c| *c == 0) + .ok_or_else(|| io_format_err!("missing value separator in xattr"))?; + + Ok(format::XAttr { data, name_len }) + } + + async fn read_symlink(&mut self) -> io::Result { + let data = self.read_entry_as_bytes().await?; + Ok(format::Symlink { data }) + } + + async fn read_hardlink(&mut self) -> io::Result { + let data = self.read_entry_as_bytes().await?; + Ok(format::Hardlink { data }) + } + + async fn read_device(&mut self) -> io::Result { + self.read_simple_entry("device").await + } + + async fn read_fcaps(&mut self) -> io::Result { + let data = self.read_entry_as_bytes().await?; + Ok(format::FCaps { data }) + } + + async fn read_acl_user(&mut self) -> io::Result { + self.read_simple_entry("acl user").await + } + + async fn read_acl_group(&mut self) -> io::Result { + self.read_simple_entry("acl group").await + } + + async fn read_acl_group_object(&mut self) -> io::Result { + self.read_simple_entry("acl group object").await + } + + async fn read_acl_default(&mut self) -> io::Result { + self.read_simple_entry("acl default").await + } + + async fn read_quota_project_id(&mut self) -> io::Result { + self.read_simple_entry("quota project id").await + } +} diff --git a/src/decoder/aio.rs b/src/decoder/aio.rs new file mode 100644 index 0000000..8cf7aa8 --- /dev/null +++ b/src/decoder/aio.rs @@ -0,0 +1,169 @@ +//! Asynchronous `pxar` format handling. + +use std::io; + +use crate::decoder::{self, SeqRead}; +use crate::Entry; + +/// Asynchronous `pxar` decoder. +/// +/// This is the `async` version of the `pxar` decoder. +#[repr(transparent)] +pub struct Decoder { + inner: decoder::DecoderImpl, +} + +#[cfg(feature = "futures-io")] +impl Decoder { + /// Decode a `pxar` archive from a `futures::io::AsyncRead` input. + #[inline] + pub async fn from_futures(input: T) -> io::Result>> { + Decoder::new(FuturesReader::new(input)).await + } +} + +#[cfg(feature = "tokio-io")] +impl Decoder { + /// Decode a `pxar` archive from a `tokio::io::AsyncRead` input. + #[inline] + pub async fn from_tokio(input: T) -> io::Result>> { + Decoder::new(TokioReader::new(input)).await + } +} + +impl Decoder { + /// Create an async decoder from an input implementing our internal read interface. + pub async fn new(input: T) -> io::Result { + Ok(Self { + inner: decoder::DecoderImpl::new(input).await?, + }) + } + + /// If this is a directory entry, get the next item inside the directory. + pub async fn next(&mut self) -> Option> { + self.inner.next_do().await.transpose() + } + + /// Turn this decoder into a `Stream`. + #[cfg(feature = "futures-io")] + pub fn into_stream(self) -> DecoderStream { + DecoderStream::new(self) + } +} + +#[cfg(feature = "futures-io")] +mod stream { + use std::future::Future; + use std::io; + use std::pin::Pin; + use std::task::{Context, Poll}; + + use super::{Entry, SeqRead}; + + /// A wrapper for the async decoder implementing `futures::stream::Stream`. + /// + /// As long as streams are poll-based this wrapper is required to turn `async fn next()` into + /// `Stream`'s `poll_next()` interface. + pub struct DecoderStream { + inner: super::Decoder, + future: Option>>>>>, + } + + impl DecoderStream { + pub fn new(inner: super::Decoder) -> Self { + Self { + inner, + future: None, + } + } + } + + impl futures::stream::Stream for DecoderStream { + type Item = io::Result; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { + let this = unsafe { self.get_unchecked_mut() }; + loop { + if let Some(mut fut) = this.future.take() { + match fut.as_mut().poll(cx) { + Poll::Ready(res) => return Poll::Ready(res), + Poll::Pending => { + this.future = Some(fut); + return Poll::Pending; + } + } + } + unsafe { + let fut: Box> = Box::new(this.inner.next()); + // Discard the lifetime: + let fut: *mut (dyn Future>> + 'static) = + core::mem::transmute(Box::into_raw(fut)); + let fut = Box::from_raw(fut); + this.future = Some(Pin::new_unchecked(fut)); + } + } + } + } +} + +#[cfg(feature = "futures-io")] +pub use stream::DecoderStream; + +macro_rules! async_io_impl { + ( + #[cfg( $($attr:tt)+ )] + mod $mod:ident { + $(#[$docs:meta])* + $name:ident : $trait:path ; + } + ) => { + #[cfg( $($attr)+ )] + mod $mod { + use std::io; + use std::pin::Pin; + use std::task::{Context, Poll}; + + $(#[$docs])* + pub struct $name { + inner: T, + } + + impl $name { + pub fn new(inner: T) -> Self { + Self { inner } + } + } + + impl crate::decoder::SeqRead for $name { + fn poll_seq_read( + self: Pin<&mut Self>, + cx: &mut Context, + buf: &mut [u8], + ) -> Poll> { + unsafe { + self.map_unchecked_mut(|this| &mut this.inner) + .poll_read(cx, buf) + } + } + } + } + #[cfg( $($attr)+ )] + pub use $mod::$name; + } +} + +async_io_impl! { + #[cfg(feature = "futures-io")] + mod fut { + /// Read adapter for `futures::io::AsyncRead`. + FuturesReader : futures::io::AsyncRead; + } +} + +async_io_impl! { + #[cfg(feature = "tokio-io")] + mod tok { + /// Read adapter for `tokio::io::AsyncRead`. + TokioReader : tokio::io::AsyncRead; + } +} diff --git a/src/decoder/sync.rs b/src/decoder/sync.rs new file mode 100644 index 0000000..9311f21 --- /dev/null +++ b/src/decoder/sync.rs @@ -0,0 +1,82 @@ +//! Blocking `pxar` format handling. + +use std::io; +use std::pin::Pin; +use std::task::{Context, Poll}; + +use crate::decoder::{self, SeqRead}; +use crate::util::poll_result_once; +use crate::Entry; + +/// Blocking `pxar` decoder. +/// +/// This is the blocking I/O version of the `pxar` decoder. This will *not* work with an +/// asynchronous I/O object. I/O must always return `Poll::Ready`. +/// +/// Attempting to use a `Waker` from this context *will* `panic!` +/// +/// If you need to use asynchronous I/O, use `aio::Decoder`. +#[repr(transparent)] +pub struct Decoder { + inner: decoder::DecoderImpl, +} + +impl Decoder { + /// Decode a `pxar` archive from a regular `std::io::Read` input. + #[inline] + pub fn from_std(input: T) -> io::Result>> { + Decoder::new(StandardReader::new(input)) + } +} + +impl Decoder { + /// Create a *blocking* decoder from an input implementing our internal read interface. + /// + /// Note that the `input`'s `SeqRead` implementation must always return `Poll::Ready` and is + /// not allowed to use the `Waker`, as this will cause a `panic!`. + pub fn new(input: T) -> io::Result { + Ok(Self { + inner: poll_result_once(decoder::DecoderImpl::new(input))?, + }) + } + + /// Internal helper for `Accessor`. In this case we have the low-level state machine, and the + /// layer "above" the `Accessor` propagates the actual type (sync vs async). + pub(crate) fn from_impl(inner: decoder::DecoderImpl) -> Self { + Self { inner } + } + + /// If this is a directory entry, get the next item inside the directory. + pub fn next(&mut self) -> Option> { + poll_result_once(self.inner.next_do()).transpose() + } +} + +impl Iterator for Decoder { + type Item = io::Result; + + fn next(&mut self) -> Option { + Decoder::next(self) + } +} + +/// Pxar decoder read adapter for `std::io::Read`. +pub struct StandardReader { + inner: T, +} + +impl StandardReader { + pub fn new(inner: T) -> Self { + Self { inner } + } +} + +impl SeqRead for StandardReader { + fn poll_seq_read( + self: Pin<&mut Self>, + _cx: &mut Context, + buf: &mut [u8], + ) -> Poll> { + Poll::Ready(unsafe { self.get_unchecked_mut() }.inner.read(buf)) + } +} diff --git a/src/format.rs b/src/format.rs new file mode 100644 index 0000000..1a67659 --- /dev/null +++ b/src/format.rs @@ -0,0 +1,233 @@ +//! *pxar* binary format definition +//! +//! Please note the all values are stored in little endian ordering. +//! +//! The Archive contains a list of items. Each item starts with a `Header`, followed by the +//! item data. + +use std::cmp::Ordering; +use std::io; +use std::mem::size_of; +use std::path::Path; + +use endian_trait::Endian; +use siphasher::sip::SipHasher24; + +pub mod acl; + +pub const PXAR_ENTRY: u64 = 0x1396fabcea5bbb51; +pub const PXAR_FILENAME: u64 = 0x6dbb6ebcb3161f0b; +pub const PXAR_SYMLINK: u64 = 0x664a6fb6830e0d6c; +pub const PXAR_DEVICE: u64 = 0xac3dace369dfe643; +pub const PXAR_XATTR: u64 = 0xb8157091f80bc486; +pub const PXAR_ACL_USER: u64 = 0x297dc88b2ef12faf; +pub const PXAR_ACL_GROUP: u64 = 0x36f2acb56cb3dd0b; +pub const PXAR_ACL_GROUP_OBJ: u64 = 0x23047110441f38f3; +pub const PXAR_ACL_DEFAULT: u64 = 0xfe3eeda6823c8cd0; +pub const PXAR_ACL_DEFAULT_USER: u64 = 0xbdf03df9bd010a91; +pub const PXAR_ACL_DEFAULT_GROUP: u64 = 0xa0cb1168782d1f51; +pub const PXAR_FCAPS: u64 = 0xf7267db0afed0629; +pub const PXAR_QUOTA_PROJID: u64 = 0x161baf2d8772a72b; + +/// Marks item as hardlink +/// compute_goodbye_hash(b"__PROXMOX_FORMAT_HARDLINK__"); +pub const PXAR_HARDLINK: u64 = 0x2c5e06f634f65b86; +/// Marks the beginnig of the payload (actual content) of regular files +pub const PXAR_PAYLOAD: u64 = 0x8b9e1d93d6dcffc9; +/// Marks item as entry of goodbye table +pub const PXAR_GOODBYE: u64 = 0xdfd35c5e8327c403; +/// The end marker used in the GOODBYE object +pub const PXAR_GOODBYE_TAIL_MARKER: u64 = 0x57446fa533702943; + +#[derive(Debug, Endian)] +#[repr(C)] +pub struct Header { + /// The item type (see `PXAR_` constants). + pub htype: u64, + /// The size of the item, including the size of `Header`. + full_size: u64, +} + +impl Header { + #[inline] + pub fn full_size(&self) -> u64 { + self.full_size + } + + #[inline] + pub fn content_size(&self) -> u64 { + self.full_size() - (size_of::() as u64) + } +} + +#[derive(Clone, Debug, Default, Endian)] +#[repr(C)] +pub struct Entry { + pub mode: u64, + pub flags: u64, + pub uid: u32, + pub gid: u32, + pub mtime: u64, +} + +#[derive(Clone, Debug)] +pub struct Filename { + pub name: Vec, +} + +#[derive(Clone, Debug)] +pub struct Symlink { + pub data: Vec, +} + +#[derive(Clone, Debug)] +pub struct Hardlink { + pub data: Vec, +} + +#[derive(Clone, Debug, Eq)] +#[repr(C)] +pub struct XAttr { + pub(crate) data: Vec, + pub(crate) name_len: usize, +} + +impl XAttr { + pub fn new, V: AsRef<[u8]>>(name: N, value: V) -> Self { + let name = name.as_ref(); + let value = value.as_ref(); + let mut data = Vec::with_capacity(name.len() + value.len() + 1); + data.extend(name); + data.push(0); + data.extend(value); + Self { + data, + name_len: name.len(), + } + } + + pub fn name(&self) -> &[u8] { + &self.data[..self.name_len] + } + + pub fn value(&self) -> &[u8] { + &self.data[(self.name_len + 1)..] + } +} + +impl Ord for XAttr { + fn cmp(&self, other: &XAttr) -> Ordering { + self.name().cmp(&other.name()) + } +} + +impl PartialOrd for XAttr { + fn partial_cmp(&self, other: &XAttr) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for XAttr { + fn eq(&self, other: &XAttr) -> bool { + self.name() == other.name() + } +} + +#[derive(Clone, Debug, Endian)] +#[repr(C)] +pub struct Device { + pub major: u64, + pub minor: u64, +} + +#[derive(Clone, Debug)] +#[repr(C)] +pub struct FCaps { + pub data: Vec, +} + +#[derive(Clone, Debug, Endian)] +#[repr(C)] +pub struct QuotaProjectId { + pub projid: u64, +} + +#[derive(Debug, Endian)] +#[repr(C)] +pub struct GoodbyeItem { + /// SipHash24 of the directory item name. The last GOODBYE item uses the special hash value + /// `PXAR_GOODBYE_TAIL_MARKER`. + pub hash: u64, + + /// The offset from the start of the GOODBYE object to the start of the matching directory item + /// (point to a FILENAME). The last GOODBYE item points to the start of the matching ENTRY + /// object. + pub offset: u64, + + /// The overall size of the directory item. This includes the FILENAME header. In other words, + /// `goodbye_start - offset + size` points to the end of the directory. + /// + /// The last GOODBYE item repeats the size of the GOODBYE item. + pub size: u64, +} + +impl GoodbyeItem { + pub fn new(name: &[u8], offset: u64, size: u64) -> Self { + let hash = hash_filename(name); + Self { hash, offset, size } + } +} + +pub fn hash_filename(name: &[u8]) -> u64 { + use std::hash::Hasher; + let mut hasher = SipHasher24::new_with_keys(0x8574442b0f1d84b3, 0x2736ed30d1c22ec1); + hasher.write(name); + hasher.finish() +} + +/* +pub fn search_binary_tree_array(table: &[T], key: &T) -> Option +where + T: Ord, + F: FnMut(&T) -> std::cmp::Ordering, +{ + search_binary_tree_array_by(table, |elem| key.cmp(elem)) +} +*/ + +pub fn search_binary_tree_array_by(table: &[T], mut f: F) -> Option +where + F: FnMut(&T) -> Ordering, +{ + let mut i = 0; + + while !table.is_empty() { + match f(&table[i]) { + Ordering::Equal => return Some(i), + Ordering::Less => i = 2 * i + 1, + Ordering::Greater => i = 2 * i + 2, + } + if i >= table.len() { + break; + } + } + + None +} + +pub fn path_is_legal_component(path: &Path) -> bool { + let mut components = path.components(); + match components.next() { + Some(std::path::Component::Normal(_)) => (), + _ => return false, + } + components.next().is_none() +} + +pub fn check_file_name(path: &Path) -> io::Result<()> { + if !path_is_legal_component(path) { + io_bail!("invalid file name in archive: {:?}", path); + } else { + Ok(()) + } +} diff --git a/src/format/acl.rs b/src/format/acl.rs new file mode 100644 index 0000000..457630e --- /dev/null +++ b/src/format/acl.rs @@ -0,0 +1,94 @@ +//! ACL related data + +use std::cmp::Ordering; + +use endian_trait::Endian; + +bitflags::bitflags! { + /// ACL permission bits. + #[derive(Endian)] + pub struct Permissions: u64 { + const PXAR_ACL_PERMISSION_READ = 4; + const PXAR_ACL_PERMISSION_WRITE = 2; + const PXAR_ACL_PERMISSION_EXECUTE = 1; + } +} + +#[derive(Clone, Debug, Endian, Eq)] +#[repr(C)] +pub struct User { + pub uid: u64, + pub permissions: Permissions, + //pub name: Vec, not impl for now +} + +// TODO if also name is impl, sort by uid, then by name and last by permissions +impl Ord for User { + fn cmp(&self, other: &User) -> Ordering { + match self.uid.cmp(&other.uid) { + // uids are equal, entries ordered by permissions + Ordering::Equal => self.permissions.cmp(&other.permissions), + // uids are different, entries ordered by uid + uid_order => uid_order, + } + } +} + +impl PartialOrd for User { + fn partial_cmp(&self, other: &User) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for User { + fn eq(&self, other: &User) -> bool { + self.uid == other.uid && self.permissions == other.permissions + } +} + +#[derive(Clone, Debug, Endian, Eq)] +#[repr(C)] +pub struct Group { + pub gid: u64, + pub permissions: Permissions, + //pub name: Vec, not impl for now +} + +// TODO if also name is impl, sort by gid, then by name and last by permissions +impl Ord for Group { + fn cmp(&self, other: &Group) -> Ordering { + match self.gid.cmp(&other.gid) { + // gids are equal, entries are ordered by permissions + Ordering::Equal => self.permissions.cmp(&other.permissions), + // gids are different, entries ordered by gid + gid_ordering => gid_ordering, + } + } +} + +impl PartialOrd for Group { + fn partial_cmp(&self, other: &Group) -> Option { + Some(self.cmp(other)) + } +} + +impl PartialEq for Group { + fn eq(&self, other: &Group) -> bool { + self.gid == other.gid && self.permissions == other.permissions + } +} + +#[derive(Clone, Debug, Endian)] +#[repr(C)] +pub struct GroupObject { + pub permissions: Permissions, +} + +#[derive(Clone, Debug, Endian)] +#[repr(C)] +pub struct Default { + pub user_obj_permissions: Permissions, + pub group_obj_permissions: Permissions, + pub other_permissions: Permissions, + pub mask_permissions: Permissions, +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..24b4955 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,218 @@ +//! Proxmox backup archive format handling. +//! +//! This implements a reader and writer for the proxmox archive format (.pxar). + +use std::ffi::OsStr; +use std::mem; +use std::os::unix::ffi::OsStrExt; +use std::path::{Path, PathBuf}; + +#[macro_use] +mod macros; + +pub mod format; + +pub(crate) mod util; + +mod poll_fn; + +pub mod accessor; +pub mod decoder; + +/// File metadata found in pxar archives. +/// +/// This includes the usual data you'd get from `stat()` as well as ACLs, extended attributes, file +/// capabilities and more. +#[derive(Clone, Debug, Default)] +pub struct Metadata { + /// Data typically found in a `stat()` call. + pub stat: format::Entry, + + /// Extended attributes. + pub xattrs: Vec, + + /// ACLs. + pub acl: Acl, + + /// File capabilities. + pub fcaps: Option, + + /// Quota project id. + pub quota_project_id: Option, +} + +/// ACL entries of a pxar archive. +/// +/// This contains all the various ACL entry types supported by the pxar archive format. +#[derive(Clone, Debug, Default)] +pub struct Acl { + /// User ACL list. + pub users: Vec, + + /// Group ACL list. + pub groups: Vec, + + /// Group object ACL entry. + pub group_obj: Option, + + /// Default permissions. + pub default: Option, + + /// Default user permissions. + pub default_users: Vec, + + /// Default group permissions. + pub default_groups: Vec, +} + +/// Pxar archive entry kind. +/// +/// Identifies whether the entry is a file, symlink, directory, etc. +#[derive(Clone, Debug)] +pub enum EntryKind { + /// Symbolic links. + Symlink(format::Symlink), + + /// Hard links, relative to the root of the current archive. + Hardlink(format::Hardlink), + + /// Device node. + Device(format::Device), + + /// Regular file. + File { size: u64 }, + + /// Directory entry. When iterating through an archive, the contents follow next. + Directory, + + /// End of a directory. This is for internal use to remember the goodbye-table of a directory + /// entry. Will not occur during normal iteration. + EndOfDirectory, +} + +/// A pxar archive entry. This contains the current path, file metadata and entry type specific +/// information. +#[derive(Clone, Debug)] +pub struct Entry { + path: PathBuf, + metadata: Metadata, + kind: EntryKind, + offset: Option, +} + +/// General accessors. +impl Entry { + /// Clear everything except for the path. + fn clear_data(&mut self) { + self.metadata = Metadata::default(); + self.kind = EntryKind::EndOfDirectory; + self.offset = None; + } + + fn internal_default() -> Self { + Self { + path: PathBuf::default(), + metadata: Metadata::default(), + kind: EntryKind::EndOfDirectory, + offset: None, + } + } + + fn take(&mut self) -> Self { + let this = mem::replace(self, Self::internal_default()); + self.path = this.path.clone(); + this + } + + /// If the underlying I/O implementation supports seeking, this will be filled with the start + /// offset of this entry, allowing one to jump back to this entry later on. + #[inline] + pub fn offset(&self) -> Option { + self.offset + } + + /// Get the full path of this file within the current pxar directory structure. + #[inline] + pub fn path(&self) -> &Path { + &self.path + } + + /// Convenience method to get just the file name portion of the current path. + #[inline] + pub fn file_name(&self) -> &OsStr { + self.path.file_name().unwrap_or(OsStr::new("")) + } + + /// Get the file metadata. + #[inline] + pub fn metadata(&self) -> &Metadata { + &self.metadata + } + + /// Get the value of the symbolic link if it is one. + pub fn get_symlink(&self) -> Option<&OsStr> { + match &self.kind { + EntryKind::Symlink(link) => Some(OsStr::from_bytes(&link.data)), + _ => None, + } + } + + /// Get the value of the hard link if it is one. + pub fn get_hardlink(&self) -> Option<&OsStr> { + match &self.kind { + EntryKind::Hardlink(link) => Some(OsStr::from_bytes(&link.data)), + _ => None, + } + } + + /// Get the value of the device node if it is one. + pub fn get_device(&self) -> Option { + match &self.kind { + EntryKind::Device(dev) => Some(dev.clone()), + _ => None, + } + } +} + +/// Convenience helpers. +impl Entry { + /// Check whether this is a directory. + pub fn is_dir(&self) -> bool { + match self.kind { + EntryKind::Directory { .. } => true, + _ => false, + } + } + + /// Check whether this is a symbolic link. + pub fn is_symlink(&self) -> bool { + match self.kind { + EntryKind::Symlink(_) => true, + _ => false, + } + } + + /// Check whether this is a hard link. + pub fn is_hardlink(&self) -> bool { + match self.kind { + EntryKind::Hardlink(_) => true, + _ => false, + } + } + + /// Check whether this is a device node. + pub fn is_device(&self) -> bool { + match self.kind { + EntryKind::Device(_) => true, + _ => false, + } + } + + /// Check whether this is a regular file. + pub fn is_regular_file(&self) -> bool { + match self.kind { + EntryKind::File { .. } => true, + _ => false, + } + } +} diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 0000000..31df1be --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,23 @@ +/// Like failure's `format_err` but producing a `std::io::Error`. +macro_rules! io_format_err { + ($($msg:tt)+) => { + ::std::io::Error::new(::std::io::ErrorKind::Other, format!($($msg)+)) + }; +} + +/// Like failure's `bail` but producing a `std::io::Error`. +macro_rules! io_bail { + ($($msg:tt)+) => {{ + return Err(io_format_err!($($msg)+)); + }}; +} + +/// Our dependency on `futures` is optional. +macro_rules! ready { + ($expr:expr) => {{ + match $expr { + std::task::Poll::Ready(r) => r, + std::task::Poll::Pending => return std::task::Poll::Pending, + } + }}; +} diff --git a/src/poll_fn.rs b/src/poll_fn.rs new file mode 100644 index 0000000..f193c6e --- /dev/null +++ b/src/poll_fn.rs @@ -0,0 +1,39 @@ +//! `poll_fn` reimplementation as it is otherwise the only thing we need from the futures crate. +//! +//! Our `futures` crate dependency is optional. + +use std::future::Future; +use std::pin::Pin; +use std::task::{Context, Poll}; + +pub struct PollFn { + func: Option, +} + +pub fn poll_fn(func: F) -> PollFn +where + F: FnMut(&mut Context) -> Poll, +{ + PollFn { func: Some(func) } +} + +impl Future for PollFn +where + F: FnMut(&mut Context) -> Poll, +{ + type Output = R; + + fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll { + let this = unsafe { self.get_unchecked_mut() }; + match &mut this.func { + None => panic!("poll() after Ready"), + Some(func) => { + let res = func(cx); + if res.is_ready() { + this.func = None; + } + res + } + } + } +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..f78e32c --- /dev/null +++ b/src/util.rs @@ -0,0 +1,115 @@ +#![allow(dead_code)] + +use std::future::Future; +use std::io; +use std::pin::Pin; +use std::task::{Context, Poll}; + +// from /usr/include/linux/magic.h +// and from casync util.h +#[rustfmt::skip] +#[allow(clippy::unreadable_literal)] +mod consts { + pub const BINFMTFS_MAGIC : i64 = 0x42494e4d; + pub const CGROUP2_SUPER_MAGIC : i64 = 0x63677270; + pub const CGROUP_SUPER_MAGIC : i64 = 0x0027e0eb; + pub const CONFIGFS_MAGIC : i64 = 0x62656570; + pub const DEBUGFS_MAGIC : i64 = 0x64626720; + pub const DEVPTS_SUPER_MAGIC : i64 = 0x00001cd1; + pub const EFIVARFS_MAGIC : i64 = 0xde5e81e4; + pub const FUSE_CTL_SUPER_MAGIC: i64 = 0x65735543; + pub const HUGETLBFS_MAGIC : i64 = 0x958458f6; + pub const MQUEUE_MAGIC : i64 = 0x19800202; + pub const NFSD_MAGIC : i64 = 0x6e667364; + pub const PROC_SUPER_MAGIC : i64 = 0x00009fa0; + pub const PSTOREFS_MAGIC : i64 = 0x6165676C; + pub const RPCAUTH_GSSMAGIC : i64 = 0x67596969; + pub const SECURITYFS_MAGIC : i64 = 0x73636673; + pub const SELINUX_MAGIC : i64 = 0xf97cff8c; + pub const SMACK_MAGIC : i64 = 0x43415d53; + pub const RAMFS_MAGIC : i64 = 0x858458f6; + pub const TMPFS_MAGIC : i64 = 0x01021994; + pub const SYSFS_MAGIC : i64 = 0x62656572; + pub const MSDOS_SUPER_MAGIC : i64 = 0x00004d44; + pub const BTRFS_SUPER_MAGIC : i64 = 0x9123683E; + pub const FUSE_SUPER_MAGIC : i64 = 0x65735546; + pub const EXT4_SUPER_MAGIC : i64 = 0x0000EF53; + pub const XFS_SUPER_MAGIC : i64 = 0x58465342; + pub const ZFS_SUPER_MAGIC : i64 = 0x2FC12FC1; +} + +pub fn is_virtual_file_system(magic: i64) -> bool { + match magic { + consts::BINFMTFS_MAGIC + | consts::CGROUP2_SUPER_MAGIC + | consts::CGROUP_SUPER_MAGIC + | consts::CONFIGFS_MAGIC + | consts::DEBUGFS_MAGIC + | consts::DEVPTS_SUPER_MAGIC + | consts::EFIVARFS_MAGIC + | consts::FUSE_CTL_SUPER_MAGIC + | consts::HUGETLBFS_MAGIC + | consts::MQUEUE_MAGIC + | consts::NFSD_MAGIC + | consts::PROC_SUPER_MAGIC + | consts::PSTOREFS_MAGIC + | consts::RPCAUTH_GSSMAGIC + | consts::SECURITYFS_MAGIC + | consts::SELINUX_MAGIC + | consts::SMACK_MAGIC + | consts::SYSFS_MAGIC => true, + _ => false, + } +} + +/// Helper function to extract file names from binary archive. +pub fn read_os_string(buffer: &[u8]) -> std::ffi::OsString { + use std::os::unix::ffi::OsStrExt; + std::ffi::OsStr::from_bytes(if buffer.ends_with(&[0]) { + &buffer[..(buffer.len() - 1)] + } else { + buffer + }) + .into() +} + +#[inline] +pub fn vec_new(size: usize) -> Vec { + let mut data = Vec::with_capacity(size); + unsafe { + data.set_len(size); + } + data +} + +pub fn io_err_other(err: E) -> io::Error { + io::Error::new(io::ErrorKind::Other, err.to_string()) +} + +pub fn poll_result_once(mut fut: T) -> io::Result +where + T: Future>, +{ + let waker = std::task::RawWaker::new(std::ptr::null(), &WAKER_VTABLE); + let waker = unsafe { std::task::Waker::from_raw(waker) }; + let mut cx = Context::from_waker(&waker); + unsafe { + match Pin::new_unchecked(&mut fut).poll(&mut cx) { + Poll::Pending => Err(io_err_other("got Poll::Pending synchronous context")), + Poll::Ready(r) => r, + } + } +} + +const WAKER_VTABLE: std::task::RawWakerVTable = + std::task::RawWakerVTable::new(forbid_clone, forbid_wake, forbid_wake, ignore_drop); + +unsafe fn forbid_clone(_: *const ()) -> std::task::RawWaker { + panic!("tried to clone waker for synchronous task"); +} + +unsafe fn forbid_wake(_: *const ()) { + panic!("tried to wake synchronous task"); +} + +unsafe fn ignore_drop(_: *const ()) {}