diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs index 35cff99..e5d69b1 100644 --- a/examples/mk-format-hashes.rs +++ b/examples/mk-format-hashes.rs @@ -1,6 +1,11 @@ use pxar::format::hash_filename; const CONSTANTS: &[(&str, &str, &str)] = &[ + ( + "Pxar format version entry, fallback to version 1 if not present", + "PXAR_FORMAT_VERSION", + "__PROXMOX_FORMAT_VERSION__", + ), ( "Beginning of an entry (current version).", "PXAR_ENTRY", diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs index c3a5e14..bf8d588 100644 --- a/src/accessor/mod.rs +++ b/src/accessor/mod.rs @@ -17,7 +17,7 @@ use endian_trait::Endian; use crate::binary_tree_array; use crate::decoder::{self, DecoderImpl}; -use crate::format::{self, GoodbyeItem}; +use crate::format::{self, FormatVersion, GoodbyeItem}; use crate::util; use crate::{Entry, EntryKind, PxarVariant}; @@ -190,6 +190,18 @@ impl AccessorImpl { io_bail!("too small to contain a pxar archive"); } + let header: format::Header = read_entry_at(input.archive(), 0).await?; + header.check_header_size()?; + + if header.htype == format::PXAR_FORMAT_VERSION { + let version: u64 = read_entry_at( + input.archive(), + size_of::() as u64, + ) + .await?; + FormatVersion::deserialize(version)?; + } + let input = input.wrap_multi( |input| input, |(payload_input, size)| (payload_input, 0..size), @@ -299,11 +311,19 @@ impl AccessorImpl { PathBuf::new(), ) .await?; - let entry = decoder + let mut entry = decoder .next() .await .ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??; + // Skip over possible Version and Prelude before the root entry of type Directory + if let EntryKind::Version(_) = entry.kind() { + entry = decoder + .next() + .await + .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??; + } + Ok(FileEntryImpl { input: self.input.clone(), entry, @@ -528,10 +548,19 @@ impl DirectoryImpl { file_name: Option<&Path>, ) -> io::Result<(Entry, DecoderImpl>)> { let mut decoder = self.get_decoder(entry_range, file_name).await?; - let entry = decoder + let mut entry = decoder .next() .await .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??; + + // Skip over possible Version and Prelude before the root entry of type Directory + if let EntryKind::Version(_) = entry.kind() { + entry = decoder + .next() + .await + .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??; + } + Ok((entry, decoder)) } diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs index 19b1b5c..a41b186 100644 --- a/src/decoder/mod.rs +++ b/src/decoder/mod.rs @@ -18,7 +18,7 @@ use std::task::{Context, Poll}; use endian_trait::Endian; -use crate::format::{self, Header}; +use crate::format::{self, FormatVersion, Header}; use crate::util::{self, io_err_other}; use crate::{Entry, EntryKind, Metadata, PxarVariant}; @@ -169,10 +169,14 @@ pub(crate) struct DecoderImpl { /// The random access code uses decoders for sub-ranges which may not end in a `PAYLOAD` for /// entries like FIFOs or sockets, so there we explicitly allow an item to terminate with EOF. eof_after_entry: bool, + /// The format version as determined by the format version header + version: format::FormatVersion, } +#[derive(Clone, PartialEq)] enum State { Begin, + Root, Default, InPayload { offset: u64, @@ -245,6 +249,7 @@ impl DecoderImpl { with_goodbye_tables: false, payload_consumed, eof_after_entry, + version: FormatVersion::default(), }) } @@ -257,7 +262,19 @@ impl DecoderImpl { loop { match self.state { State::Eof => return Ok(None), - State::Begin => return self.read_next_entry().await.map(Some), + State::Begin => { + let entry = self.read_next_entry().await.map(Some); + if let Ok(Some(ref entry)) = entry { + if let EntryKind::Version(version) = entry.kind() { + self.version = version.clone(); + self.state = State::Root; + } + } + return entry; + } + State::Root => { + return self.read_next_entry().await.map(Some); + } State::Default => { // we completely finished an entry, so now we're going "up" in the directory // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE: @@ -388,6 +405,7 @@ impl DecoderImpl { } async fn read_next_entry_or_eof(&mut self) -> io::Result> { + let previous_state = self.state.clone(); self.state = State::Default; self.entry.clear_data(); @@ -407,6 +425,14 @@ impl DecoderImpl { self.entry.metadata = Metadata::default(); self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?); + Ok(Some(self.entry.take())) + } else if header.htype == format::PXAR_FORMAT_VERSION { + if previous_state != State::Begin { + io_bail!("Got format version entry at unexpected position"); + } + self.current_header = header; + self.entry.kind = EntryKind::Version(self.read_format_version().await?); + Ok(Some(self.entry.take())) } else if header.htype == format::PXAR_ENTRY || header.htype == format::PXAR_ENTRY_V1 { if header.htype == format::PXAR_ENTRY { @@ -766,6 +792,11 @@ impl DecoderImpl { self.current_header.check_header_size()?; seq_read_entry(self.input.archive_mut()).await } + + async fn read_format_version(&mut self) -> io::Result { + let version: u64 = seq_read_entry(self.input.archive_mut()).await?; + FormatVersion::deserialize(version) + } } /// Reader for file contents inside a pxar archive. diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs index b579e18..66f2d01 100644 --- a/src/encoder/mod.rs +++ b/src/encoder/mod.rs @@ -17,7 +17,7 @@ use endian_trait::Endian; use crate::binary_tree_array; use crate::decoder::{self, SeqRead}; -use crate::format::{self, GoodbyeItem, PayloadRef}; +use crate::format::{self, FormatVersion, GoodbyeItem, PayloadRef}; use crate::{Metadata, PxarVariant}; pub mod aio; @@ -326,6 +326,8 @@ pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> { /// Since only the "current" entry can be actively writing files, we share the file copy /// buffer. file_copy_buffer: Arc>>, + /// Pxar format version to encode + version: format::FormatVersion, } impl<'a, T: SeqWrite + 'a> Drop for EncoderImpl<'a, T> { @@ -350,11 +352,14 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { } let mut state = EncoderState::default(); - if let Some(payload_output) = output.payload_mut() { + let version = if let Some(payload_output) = output.payload_mut() { let header = format::Header::with_content_size(format::PXAR_PAYLOAD_START_MARKER, 0); header.check_header_size()?; seq_write_struct(payload_output, header, &mut state.payload_write_position).await?; - } + format::FormatVersion::Version2 + } else { + format::FormatVersion::Version1 + }; let mut this = Self { output, @@ -363,8 +368,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { file_copy_buffer: Arc::new(Mutex::new(unsafe { crate::util::vec_new_uninitialized(1024 * 1024) })), + version, }; + this.encode_format_version().await?; this.encode_metadata(metadata).await?; let state = this.state_mut()?; state.files_offset = state.position(); @@ -547,6 +554,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { file_size: u64, payload_offset: PayloadOffset, ) -> io::Result { + if self.version == FormatVersion::Version1 { + io_bail!("payload references not supported in format version 1"); + } + if self.output.payload().is_none() { io_bail!("unable to add payload reference"); } @@ -762,6 +773,25 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> { Ok(()) } + async fn encode_format_version(&mut self) -> io::Result<()> { + if let Some(version_bytes) = self.version.serialize() { + let (mut output, state) = self.output_state()?; + if state.write_position != 0 { + io_bail!("format version must be encoded at the beginning of an archive"); + } + + return seq_write_pxar_entry( + output.archive_mut(), + format::PXAR_FORMAT_VERSION, + &version_bytes, + &mut state.write_position, + ) + .await; + } + + Ok(()) + } + async fn encode_metadata(&mut self, metadata: &Metadata) -> io::Result<()> { let (mut output, state) = self.output_state()?; seq_write_pxar_struct_entry( diff --git a/src/format/mod.rs b/src/format/mod.rs index 6519bfc..9a664f5 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -6,6 +6,7 @@ //! item data. //! //! An archive contains items in the following order: +//! * `FORMAT_VERSION` -- (optional for v1), version of encoding format //! * `ENTRY` -- containing general stat() data and related bits //! * `XATTR` -- one extended attribute //! * ... -- more of these when there are multiple defined @@ -80,6 +81,8 @@ pub mod mode { } // Generated by `cargo run --example mk-format-hashes` +/// Pxar format version entry, fallback to version 1 if not present +pub const PXAR_FORMAT_VERSION: u64 = 0x730f6c75df16a40d; /// Beginning of an entry (current version). pub const PXAR_ENTRY: u64 = 0xd5956474e588acef; /// Previous version of the entry struct @@ -186,6 +189,7 @@ impl Header { impl Display for Header { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let readable = match self.htype { + PXAR_FORMAT_VERSION => "FORMAT_VERSION", PXAR_FILENAME => "FILENAME", PXAR_SYMLINK => "SYMLINK", PXAR_HARDLINK => "HARDLINK", @@ -551,6 +555,31 @@ impl From<&std::fs::Metadata> for Stat { } } +#[derive(Clone, Debug, Default, PartialEq)] +pub enum FormatVersion { + #[default] + Version1, + Version2, +} + +impl FormatVersion { + pub fn deserialize(version: u64) -> Result { + match version { + 1u64 => Ok(FormatVersion::Version1), + 2u64 => Ok(FormatVersion::Version2), + version => io_bail!("unknown format version {version}") + } + } + + pub fn serialize(&self) -> Option> { + match self { + // format version 1 is not to be encoded + FormatVersion::Version1 => None, + FormatVersion::Version2 => Some(2u64.to_le_bytes().to_vec()), + } + } +} + #[derive(Clone, Debug)] pub struct Filename { pub name: Vec, diff --git a/src/lib.rs b/src/lib.rs index bafdfe4..7e5b48f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -342,6 +342,9 @@ impl Acl { /// Identifies whether the entry is a file, symlink, directory, etc. #[derive(Clone, Debug)] pub enum EntryKind { + /// Pxar file format version + Version(format::FormatVersion), + /// Symbolic links. Symlink(format::Symlink), diff --git a/tests/simple/fs.rs b/tests/simple/fs.rs index 4284805..8a8c607 100644 --- a/tests/simple/fs.rs +++ b/tests/simple/fs.rs @@ -229,6 +229,7 @@ impl Entry { })?)) }; match item.kind() { + PxarEntryKind::Version(_) => continue, PxarEntryKind::GoodbyeTable => break, PxarEntryKind::File { size, .. } => { let mut data = Vec::new();