format/encoder/decoder: new pxar entry type Version

Introduces a new pxar format entry type `Version` and the associated
encoder and decoder methods. The format version entry is only allowed
once, as the first entry of the pxar archive, marked with a
`PXAR_FORMAT_VERSION` header followed by the encoded version number.
If not present, the default format version 1 is assumed as encoding
format for the archive.

The entry allows to early detect incompatibility with an encoded
archive and bail or switch mode based on the encountered version.

The format version entry is not backwards compatible to pxar format
version 1.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
This commit is contained in:
Christian Ebner 2024-03-22 12:13:17 +01:00
parent ef8b723734
commit 0983094c87
7 changed files with 136 additions and 8 deletions

View File

@ -1,6 +1,11 @@
use pxar::format::hash_filename;
const CONSTANTS: &[(&str, &str, &str)] = &[
(
"Pxar format version entry, fallback to version 1 if not present",
"PXAR_FORMAT_VERSION",
"__PROXMOX_FORMAT_VERSION__",
),
(
"Beginning of an entry (current version).",
"PXAR_ENTRY",

View File

@ -17,7 +17,7 @@ use endian_trait::Endian;
use crate::binary_tree_array;
use crate::decoder::{self, DecoderImpl};
use crate::format::{self, GoodbyeItem};
use crate::format::{self, FormatVersion, GoodbyeItem};
use crate::util;
use crate::{Entry, EntryKind, PxarVariant};
@ -190,6 +190,18 @@ impl<T: ReadAt> AccessorImpl<T> {
io_bail!("too small to contain a pxar archive");
}
let header: format::Header = read_entry_at(input.archive(), 0).await?;
header.check_header_size()?;
if header.htype == format::PXAR_FORMAT_VERSION {
let version: u64 = read_entry_at(
input.archive(),
size_of::<format::Header>() as u64,
)
.await?;
FormatVersion::deserialize(version)?;
}
let input = input.wrap_multi(
|input| input,
|(payload_input, size)| (payload_input, 0..size),
@ -299,11 +311,19 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
PathBuf::new(),
)
.await?;
let entry = decoder
let mut entry = decoder
.next()
.await
.ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??;
// Skip over possible Version and Prelude before the root entry of type Directory
if let EntryKind::Version(_) = entry.kind() {
entry = decoder
.next()
.await
.ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
}
Ok(FileEntryImpl {
input: self.input.clone(),
entry,
@ -528,10 +548,19 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
file_name: Option<&Path>,
) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> {
let mut decoder = self.get_decoder(entry_range, file_name).await?;
let entry = decoder
let mut entry = decoder
.next()
.await
.ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
// Skip over possible Version and Prelude before the root entry of type Directory
if let EntryKind::Version(_) = entry.kind() {
entry = decoder
.next()
.await
.ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
}
Ok((entry, decoder))
}

View File

@ -18,7 +18,7 @@ use std::task::{Context, Poll};
use endian_trait::Endian;
use crate::format::{self, Header};
use crate::format::{self, FormatVersion, Header};
use crate::util::{self, io_err_other};
use crate::{Entry, EntryKind, Metadata, PxarVariant};
@ -169,10 +169,14 @@ pub(crate) struct DecoderImpl<T> {
/// The random access code uses decoders for sub-ranges which may not end in a `PAYLOAD` for
/// entries like FIFOs or sockets, so there we explicitly allow an item to terminate with EOF.
eof_after_entry: bool,
/// The format version as determined by the format version header
version: format::FormatVersion,
}
#[derive(Clone, PartialEq)]
enum State {
Begin,
Root,
Default,
InPayload {
offset: u64,
@ -245,6 +249,7 @@ impl<I: SeqRead> DecoderImpl<I> {
with_goodbye_tables: false,
payload_consumed,
eof_after_entry,
version: FormatVersion::default(),
})
}
@ -257,7 +262,19 @@ impl<I: SeqRead> DecoderImpl<I> {
loop {
match self.state {
State::Eof => return Ok(None),
State::Begin => return self.read_next_entry().await.map(Some),
State::Begin => {
let entry = self.read_next_entry().await.map(Some);
if let Ok(Some(ref entry)) = entry {
if let EntryKind::Version(version) = entry.kind() {
self.version = version.clone();
self.state = State::Root;
}
}
return entry;
}
State::Root => {
return self.read_next_entry().await.map(Some);
}
State::Default => {
// we completely finished an entry, so now we're going "up" in the directory
// hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
@ -388,6 +405,7 @@ impl<I: SeqRead> DecoderImpl<I> {
}
async fn read_next_entry_or_eof(&mut self) -> io::Result<Option<Entry>> {
let previous_state = self.state.clone();
self.state = State::Default;
self.entry.clear_data();
@ -407,6 +425,14 @@ impl<I: SeqRead> DecoderImpl<I> {
self.entry.metadata = Metadata::default();
self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
Ok(Some(self.entry.take()))
} else if header.htype == format::PXAR_FORMAT_VERSION {
if previous_state != State::Begin {
io_bail!("Got format version entry at unexpected position");
}
self.current_header = header;
self.entry.kind = EntryKind::Version(self.read_format_version().await?);
Ok(Some(self.entry.take()))
} else if header.htype == format::PXAR_ENTRY || header.htype == format::PXAR_ENTRY_V1 {
if header.htype == format::PXAR_ENTRY {
@ -766,6 +792,11 @@ impl<I: SeqRead> DecoderImpl<I> {
self.current_header.check_header_size()?;
seq_read_entry(self.input.archive_mut()).await
}
async fn read_format_version(&mut self) -> io::Result<format::FormatVersion> {
let version: u64 = seq_read_entry(self.input.archive_mut()).await?;
FormatVersion::deserialize(version)
}
}
/// Reader for file contents inside a pxar archive.

View File

@ -17,7 +17,7 @@ use endian_trait::Endian;
use crate::binary_tree_array;
use crate::decoder::{self, SeqRead};
use crate::format::{self, GoodbyeItem, PayloadRef};
use crate::format::{self, FormatVersion, GoodbyeItem, PayloadRef};
use crate::{Metadata, PxarVariant};
pub mod aio;
@ -326,6 +326,8 @@ pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> {
/// Since only the "current" entry can be actively writing files, we share the file copy
/// buffer.
file_copy_buffer: Arc<Mutex<Vec<u8>>>,
/// Pxar format version to encode
version: format::FormatVersion,
}
impl<'a, T: SeqWrite + 'a> Drop for EncoderImpl<'a, T> {
@ -350,11 +352,14 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
}
let mut state = EncoderState::default();
if let Some(payload_output) = output.payload_mut() {
let version = if let Some(payload_output) = output.payload_mut() {
let header = format::Header::with_content_size(format::PXAR_PAYLOAD_START_MARKER, 0);
header.check_header_size()?;
seq_write_struct(payload_output, header, &mut state.payload_write_position).await?;
}
format::FormatVersion::Version2
} else {
format::FormatVersion::Version1
};
let mut this = Self {
output,
@ -363,8 +368,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
file_copy_buffer: Arc::new(Mutex::new(unsafe {
crate::util::vec_new_uninitialized(1024 * 1024)
})),
version,
};
this.encode_format_version().await?;
this.encode_metadata(metadata).await?;
let state = this.state_mut()?;
state.files_offset = state.position();
@ -547,6 +554,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
file_size: u64,
payload_offset: PayloadOffset,
) -> io::Result<LinkOffset> {
if self.version == FormatVersion::Version1 {
io_bail!("payload references not supported in format version 1");
}
if self.output.payload().is_none() {
io_bail!("unable to add payload reference");
}
@ -762,6 +773,25 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
Ok(())
}
async fn encode_format_version(&mut self) -> io::Result<()> {
if let Some(version_bytes) = self.version.serialize() {
let (mut output, state) = self.output_state()?;
if state.write_position != 0 {
io_bail!("format version must be encoded at the beginning of an archive");
}
return seq_write_pxar_entry(
output.archive_mut(),
format::PXAR_FORMAT_VERSION,
&version_bytes,
&mut state.write_position,
)
.await;
}
Ok(())
}
async fn encode_metadata(&mut self, metadata: &Metadata) -> io::Result<()> {
let (mut output, state) = self.output_state()?;
seq_write_pxar_struct_entry(

View File

@ -6,6 +6,7 @@
//! item data.
//!
//! An archive contains items in the following order:
//! * `FORMAT_VERSION` -- (optional for v1), version of encoding format
//! * `ENTRY` -- containing general stat() data and related bits
//! * `XATTR` -- one extended attribute
//! * ... -- more of these when there are multiple defined
@ -80,6 +81,8 @@ pub mod mode {
}
// Generated by `cargo run --example mk-format-hashes`
/// Pxar format version entry, fallback to version 1 if not present
pub const PXAR_FORMAT_VERSION: u64 = 0x730f6c75df16a40d;
/// Beginning of an entry (current version).
pub const PXAR_ENTRY: u64 = 0xd5956474e588acef;
/// Previous version of the entry struct
@ -186,6 +189,7 @@ impl Header {
impl Display for Header {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let readable = match self.htype {
PXAR_FORMAT_VERSION => "FORMAT_VERSION",
PXAR_FILENAME => "FILENAME",
PXAR_SYMLINK => "SYMLINK",
PXAR_HARDLINK => "HARDLINK",
@ -551,6 +555,31 @@ impl From<&std::fs::Metadata> for Stat {
}
}
#[derive(Clone, Debug, Default, PartialEq)]
pub enum FormatVersion {
#[default]
Version1,
Version2,
}
impl FormatVersion {
pub fn deserialize(version: u64) -> Result<FormatVersion, io::Error> {
match version {
1u64 => Ok(FormatVersion::Version1),
2u64 => Ok(FormatVersion::Version2),
version => io_bail!("unknown format version {version}")
}
}
pub fn serialize(&self) -> Option<Vec<u8>> {
match self {
// format version 1 is not to be encoded
FormatVersion::Version1 => None,
FormatVersion::Version2 => Some(2u64.to_le_bytes().to_vec()),
}
}
}
#[derive(Clone, Debug)]
pub struct Filename {
pub name: Vec<u8>,

View File

@ -342,6 +342,9 @@ impl Acl {
/// Identifies whether the entry is a file, symlink, directory, etc.
#[derive(Clone, Debug)]
pub enum EntryKind {
/// Pxar file format version
Version(format::FormatVersion),
/// Symbolic links.
Symlink(format::Symlink),

View File

@ -229,6 +229,7 @@ impl Entry {
})?))
};
match item.kind() {
PxarEntryKind::Version(_) => continue,
PxarEntryKind::GoodbyeTable => break,
PxarEntryKind::File { size, .. } => {
let mut data = Vec::new();