From 23af572d3fbaaaecfa77662ae40e6b0f540b5549 Mon Sep 17 00:00:00 2001 From: Dominik Csapak Date: Tue, 12 Apr 2022 13:04:16 +0200 Subject: [PATCH] pbs-client: add 'create_tar' helper function similar to create_zip, uses an accessor to write a tar into an output that implements AsyncWrite, but we use a Decoder to iterate instead of having a recursive function. This is done so that we get the entries in the correct order, and it should be faster as well. Includes files, directories, symlinks, hardlink, block/char devs, fifos into the tar. If the hardlink points to outside the current dir to archive, promote the first instance to a 'real' file, and use a hardlink for the rest. Signed-off-by: Dominik Csapak Signed-off-by: Wolfgang Bumiller --- pbs-client/Cargo.toml | 1 + pbs-client/src/pxar/extract.rs | 211 ++++++++++++++++++++++++++++++++- pbs-client/src/pxar/mod.rs | 2 +- 3 files changed, 211 insertions(+), 3 deletions(-) diff --git a/pbs-client/Cargo.toml b/pbs-client/Cargo.toml index c63556d1..65f36419 100644 --- a/pbs-client/Cargo.toml +++ b/pbs-client/Cargo.toml @@ -28,6 +28,7 @@ tokio = { version = "1.6", features = [ "fs", "signal" ] } tokio-stream = "0.1.0" tower-service = "0.3.0" xdg = "2.2" +tar = "0.4" pathpatterns = "0.1.2" diff --git a/pbs-client/src/pxar/extract.rs b/pbs-client/src/pxar/extract.rs index b1f8718e..a0efcbe4 100644 --- a/pbs-client/src/pxar/extract.rs +++ b/pbs-client/src/pxar/extract.rs @@ -1,9 +1,10 @@ //! Code for extraction of pxar contents onto the file system. +use std::collections::HashMap; use std::convert::TryFrom; use std::ffi::{CStr, CString, OsStr, OsString}; use std::io; -use std::os::unix::ffi::OsStrExt; +use std::os::unix::ffi::{OsStrExt, OsStringExt}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; @@ -17,7 +18,7 @@ use nix::sys::stat::Mode; use pathpatterns::{MatchEntry, MatchList, MatchType}; use pxar::accessor::aio::{Accessor, FileContents, FileEntry}; -use pxar::decoder::aio::Decoder; +use pxar::decoder::{aio::Decoder, Contents}; use pxar::format::Device; use pxar::{Entry, EntryKind, Metadata}; @@ -501,6 +502,212 @@ impl Extractor { } } +fn add_metadata_to_header(header: &mut tar::Header, metadata: &Metadata) { + header.set_mode(metadata.stat.mode as u32); + header.set_mtime(metadata.stat.mtime.secs as u64); + header.set_uid(metadata.stat.uid as u64); + header.set_gid(metadata.stat.gid as u64); +} + +async fn tar_add_file<'a, W, T>( + tar: &mut proxmox_compression::tar::Builder, + contents: Option>, + size: u64, + metadata: &Metadata, + path: &Path, +) -> Result<(), Error> +where + T: pxar::decoder::SeqRead + Unpin + Send + Sync + 'static, + W: tokio::io::AsyncWrite + Unpin + Send + 'static, +{ + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Regular); + header.set_size(size); + add_metadata_to_header(&mut header, metadata); + header.set_cksum(); + match contents { + Some(content) => tar.add_entry(&mut header, path, content).await, + None => tar.add_entry(&mut header, path, tokio::io::empty()).await, + } + .map_err(|err| format_err!("could not send file entry: {}", err))?; + Ok(()) +} + +// converts to a pathbuf and removes the trailing '\0' +fn link_to_pathbuf(link: &[u8]) -> PathBuf { + let len = link.len(); + let mut buf = Vec::with_capacity(len); + buf.extend_from_slice(&link[..len - 1]); + OsString::from_vec(buf).into() +} + +/// Creates a tar file from `path` and writes it into `output` +pub async fn create_tar( + output: W, + accessor: Accessor, + path: P, + verbose: bool, +) -> Result<(), Error> +where + T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static, + W: tokio::io::AsyncWrite + Unpin + Send + 'static, + P: AsRef, +{ + let root = accessor.open_root().await?; + let file = root + .lookup(&path) + .await? + .ok_or(format_err!("error opening '{:?}'", path.as_ref()))?; + + let mut prefix = PathBuf::new(); + let mut components = file.entry().path().components(); + components.next_back(); // discard last + for comp in components { + prefix.push(comp); + } + + let mut tarencoder = proxmox_compression::tar::Builder::new(output); + let mut hardlinks: HashMap = HashMap::new(); + + if let Ok(dir) = file.enter_directory().await { + let mut decoder = dir.decode_full().await?; + decoder.enable_goodbye_entries(false); + while let Some(entry) = decoder.next().await { + let entry = entry.map_err(|err| format_err!("cannot decode entry: {}", err))?; + + let metadata = entry.metadata(); + let path = entry.path().strip_prefix(&prefix)?.to_path_buf(); + + match entry.kind() { + EntryKind::File { .. } => { + let size = decoder.content_size().unwrap_or(0); + tar_add_file(&mut tarencoder, decoder.contents(), size, &metadata, &path) + .await? + } + EntryKind::Hardlink(link) => { + if !link.data.is_empty() { + let entry = root + .lookup(&path) + .await? + .ok_or(format_err!("error looking up '{:?}'", path))?; + let realfile = accessor.follow_hardlink(&entry).await?; + let metadata = realfile.entry().metadata(); + let realpath = link_to_pathbuf(&link.data); + + if verbose { + eprintln!("adding '{}' to tar", path.display()); + } + + let stripped_path = match realpath.strip_prefix(&prefix) { + Ok(path) => path, + Err(_) => { + // outside of our tar archive, add the first occurrance to the tar + if let Some(path) = hardlinks.get(&realpath) { + path + } else { + let size = decoder.content_size().unwrap_or(0); + tar_add_file( + &mut tarencoder, + decoder.contents(), + size, + metadata, + &path, + ) + .await?; + hardlinks.insert(realpath, path); + continue; + } + } + }; + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Link); + add_metadata_to_header(&mut header, metadata); + header.set_size(0); + tarencoder + .add_link(&mut header, path, stripped_path) + .await + .map_err(|err| format_err!("could not send hardlink entry: {}", err))?; + } + } + EntryKind::Symlink(link) if !link.data.is_empty() => { + if verbose { + eprintln!("adding '{}' to tar", path.display()); + } + let realpath = link_to_pathbuf(&link.data); + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Symlink); + add_metadata_to_header(&mut header, metadata); + header.set_size(0); + tarencoder + .add_link(&mut header, path, realpath) + .await + .map_err(|err| format_err!("could not send symlink entry: {}", err))?; + } + EntryKind::Fifo => { + if verbose { + eprintln!("adding '{}' to tar", path.display()); + } + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Fifo); + add_metadata_to_header(&mut header, metadata); + header.set_size(0); + header.set_device_major(0)?; + header.set_device_minor(0)?; + header.set_cksum(); + tarencoder + .add_entry(&mut header, path, tokio::io::empty()) + .await + .map_err(|err| format_err!("could not send fifo entry: {}", err))?; + } + EntryKind::Directory => { + if verbose { + eprintln!("adding '{}' to tar", path.display()); + } + // we cannot add the root path itself + if path != Path::new("/") { + let mut header = tar::Header::new_gnu(); + header.set_entry_type(tar::EntryType::Directory); + add_metadata_to_header(&mut header, metadata); + header.set_size(0); + header.set_cksum(); + tarencoder + .add_entry(&mut header, path, tokio::io::empty()) + .await + .map_err(|err| format_err!("could not send dir entry: {}", err))?; + } + } + EntryKind::Device(device) => { + if verbose { + eprintln!("adding '{}' to tar", path.display()); + } + let entry_type = if metadata.stat.is_chardev() { + tar::EntryType::Char + } else { + tar::EntryType::Block + }; + let mut header = tar::Header::new_gnu(); + header.set_entry_type(entry_type); + header.set_device_major(device.major as u32)?; + header.set_device_minor(device.minor as u32)?; + add_metadata_to_header(&mut header, metadata); + header.set_size(0); + tarencoder + .add_entry(&mut header, path, tokio::io::empty()) + .await + .map_err(|err| format_err!("could not send device entry: {}", err))?; + } + _ => {} // ignore all else + } + } + } + + tarencoder.finish().await.map_err(|err| { + eprintln!("error during finishing of zip: {}", err); + err + })?; + Ok(()) +} + pub async fn create_zip( output: W, decoder: Accessor, diff --git a/pbs-client/src/pxar/mod.rs b/pbs-client/src/pxar/mod.rs index f20a1f9e..725fc2d9 100644 --- a/pbs-client/src/pxar/mod.rs +++ b/pbs-client/src/pxar/mod.rs @@ -59,7 +59,7 @@ pub use flags::Flags; pub use create::{create_archive, PxarCreateOptions}; pub use extract::{ - create_zip, extract_archive, extract_sub_dir, extract_sub_dir_seq, ErrorHandler, + create_tar, create_zip, extract_archive, extract_sub_dir, extract_sub_dir_seq, ErrorHandler, PxarExtractOptions, };