diff --git a/src/pxar.rs b/src/pxar.rs index 481c0480..b760a9c7 100644 --- a/src/pxar.rs +++ b/src/pxar.rs @@ -62,4 +62,7 @@ pub use sequential_decoder::*; mod decoder; pub use decoder::*; +mod exclude_pattern; +pub use exclude_pattern::*; + mod helper; diff --git a/src/pxar/encoder.rs b/src/pxar/encoder.rs index 16548e87..1482f2fa 100644 --- a/src/pxar/encoder.rs +++ b/src/pxar/encoder.rs @@ -9,6 +9,7 @@ use std::collections::HashMap; use super::format_definition::*; use super::binary_search_tree::*; use super::helper::*; +use super::exclude_pattern::*; use crate::tools::fs; use crate::tools::acl; use crate::tools::xattr; @@ -116,7 +117,7 @@ impl <'a, W: Write> Encoder<'a, W> { if verbose { println!("{:?}", me.full_path()); } - me.encode_dir(dir, &stat, magic)?; + me.encode_dir(dir, &stat, magic, Vec::new())?; Ok(()) } @@ -560,7 +561,7 @@ impl <'a, W: Write> Encoder<'a, W> { Ok(()) } - fn encode_dir(&mut self, dir: &mut nix::dir::Dir, dir_stat: &FileStat, magic: i64) -> Result<(), Error> { + fn encode_dir(&mut self, dir: &mut nix::dir::Dir, dir_stat: &FileStat, magic: i64, match_pattern: Vec) -> Result<(), Error> { //println!("encode_dir: {:?} start {}", self.full_path(), self.writer_pos); @@ -622,14 +623,19 @@ impl <'a, W: Write> Encoder<'a, W> { include_children = (self.root_st_dev == dir_stat.st_dev) || self.all_file_systems; } + // Expand the exclude match pattern inherited from the parent by local entries, if present + let mut local_match_pattern = match_pattern.clone(); + let pxar_exclude = match PxarExcludePattern::from_file(rawfd, ".pxarexclude") { + Ok(Some((mut excludes, buffer, stat))) => { + local_match_pattern.append(&mut excludes); + Some((buffer, stat)) + }, + Ok(None) => None, + Err(err) => bail!("error while reading exclude file - {}", err), + }; + if include_children { for entry in dir.iter() { - dir_count += 1; - if dir_count > MAX_DIRECTORY_ENTRIES { - bail!("too many directory items in {:?} (> {})", - self.full_path(), MAX_DIRECTORY_ENTRIES); - } - let entry = entry.map_err(|err| { format_err!("readir {:?} failed - {}", self.full_path(), err) })?; @@ -640,30 +646,68 @@ impl <'a, W: Write> Encoder<'a, W> { continue; } - name_list.push(filename); + let stat = match nix::sys::stat::fstatat(rawfd, filename.as_ref(), nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) { + Ok(stat) => stat, + Err(nix::Error::Sys(Errno::ENOENT)) => { + let filename_osstr = std::ffi::OsStr::from_bytes(filename.to_bytes()); + self.report_vanished_file(&self.full_path().join(filename_osstr))?; + continue; + }, + Err(err) => bail!("fstat {:?} failed - {}", self.full_path(), err), + }; + + match self.match_exclude_pattern(&filename, &stat, &local_match_pattern) { + (MatchType::Exclude, _) => { + let filename_osstr = std::ffi::OsStr::from_bytes(filename.to_bytes()); + eprintln!("matched by .pxarexclude entry - skipping: {:?}", self.full_path().join(filename_osstr)); + }, + (_, pattern_list) => name_list.push((filename, stat, pattern_list)), + } + + dir_count += 1; + if dir_count > MAX_DIRECTORY_ENTRIES { + bail!("too many directory items in {:?} (> {})", self.full_path(), MAX_DIRECTORY_ENTRIES); + } } } else { eprintln!("skip mount point: {:?}", self.full_path()); } - name_list.sort_unstable_by(|a, b| a.cmp(&b)); + name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0)); let mut goodbye_items = vec![]; - for filename in &name_list { + for (filename, stat, exclude_list) in name_list { + if filename.as_bytes() == b".pxarexclude" { + if let Some((ref content, ref stat)) = pxar_exclude { + let filefd = match nix::fcntl::openat(rawfd, filename.as_ref(), OFlag::O_NOFOLLOW, Mode::empty()) { + Ok(filefd) => filefd, + Err(nix::Error::Sys(Errno::ENOENT)) => { + self.report_vanished_file(&self.full_path())?; + continue; + }, + Err(err) => { + let filename_osstr = std::ffi::OsStr::from_bytes(filename.to_bytes()); + bail!("open file {:?} failed - {}", self.full_path().join(filename_osstr), err); + }, + }; + + let child_magic = if dir_stat.st_dev != stat.st_dev { + detect_fs_type(filefd)? + } else { + magic + }; + + self.write_filename(&filename)?; + self.encode_pxar_exclude(filefd, stat, child_magic, content)?; + continue; + } + } + self.relative_path.push(std::ffi::OsStr::from_bytes(filename.as_bytes())); if self.verbose { println!("{:?}", self.full_path()); } - let stat = match nix::sys::stat::fstatat(rawfd, filename.as_ref(), nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) { - Ok(stat) => stat, - Err(nix::Error::Sys(Errno::ENOENT)) => { - self.report_vanished_file(&self.full_path())?; - continue; - } - Err(err) => bail!("fstat {:?} failed - {}", self.full_path(), err), - }; - let start_pos = self.writer_pos; if is_directory(&stat) { @@ -684,7 +728,7 @@ impl <'a, W: Write> Encoder<'a, W> { }; self.write_filename(&filename)?; - self.encode_dir(&mut dir, &stat, child_magic)?; + self.encode_dir(&mut dir, &stat, child_magic, exclude_list)?; } else if is_reg_file(&stat) { @@ -786,6 +830,36 @@ impl <'a, W: Write> Encoder<'a, W> { Ok(()) } + // If there is a match, an updated PxarExcludePattern list to pass to the matched child is returned. + fn match_exclude_pattern(&mut self, filename: &CStr, stat: &FileStat, match_pattern: &Vec) -> (MatchType, Vec) { + let mut child_pattern = Vec::new(); + let mut match_type = MatchType::None; + let is_dir = is_directory(&stat); + + for pattern in match_pattern { + match pattern.matches_filename(filename, is_dir) { + MatchType::None => {}, + MatchType::Exclude => match_type = MatchType::Exclude, + MatchType::Include => match_type = MatchType::Include, + MatchType::PartialExclude => { + if match_type != MatchType::Include && match_type != MatchType::Exclude { + match_type = MatchType::PartialExclude; + } + child_pattern.push(pattern.get_rest_pattern()); + }, + MatchType::PartialInclude => { + if match_type != MatchType::Include && match_type != MatchType::Exclude { + // always include partial matches, as we need to match children to decide + match_type = MatchType::PartialInclude; + } + child_pattern.push(pattern.get_rest_pattern()); + }, + } + } + + (match_type, child_pattern) + } + fn encode_file(&mut self, filefd: RawFd, stat: &FileStat, magic: i64) -> Result<(), Error> { //println!("encode_file: {:?}", self.full_path()); @@ -916,6 +990,54 @@ impl <'a, W: Write> Encoder<'a, W> { Ok(()) } + fn encode_pxar_exclude(&mut self, filefd: RawFd, stat: &FileStat, magic: i64, content: &[u8]) -> Result<(), Error> { + let mut entry = self.create_entry(&stat)?; + + self.read_chattr(filefd, &mut entry)?; + self.read_fat_attr(filefd, magic, &mut entry)?; + let (xattrs, fcaps) = self.read_xattrs(filefd, &stat)?; + let acl_access = self.read_acl(filefd, &stat, acl::ACL_TYPE_ACCESS)?; + let projid = self.read_quota_project_id(filefd, magic, &stat)?; + + self.write_entry(entry)?; + for xattr in xattrs { + self.write_xattr(xattr)?; + } + self.write_fcaps(fcaps)?; + for user in acl_access.users { + self.write_acl_user(user)?; + } + for group in acl_access.groups { + self.write_acl_group(group)?; + } + if let Some(group_obj) = acl_access.group_obj { + self.write_acl_group_obj(group_obj)?; + } + if let Some(projid) = projid { + self.write_quota_project_id(projid)?; + } + + let include_payload; + if is_virtual_file_system(magic) { + include_payload = false; + } else { + include_payload = (stat.st_dev == self.root_st_dev) || self.all_file_systems; + } + + if !include_payload { + eprintln!("skip content: {:?}", self.full_path()); + self.write_header(CA_FORMAT_PAYLOAD, 0)?; + return Ok(()); + } + + let size = content.len(); + self.write_header(CA_FORMAT_PAYLOAD, size as u64)?; + self.writer.write_all(content)?; + self.writer_pos += size; + + Ok(()) + } + // the report_XXX method may raise and error - depending on encoder configuration fn report_vanished_file(&self, path: &Path) -> Result<(), Error> { diff --git a/src/pxar/exclude_pattern.rs b/src/pxar/exclude_pattern.rs new file mode 100644 index 00000000..7a3b3aaa --- /dev/null +++ b/src/pxar/exclude_pattern.rs @@ -0,0 +1,207 @@ +use std::io::Read; +use std::ffi::{CStr, CString}; +use std::fs::File; +use std::os::unix::io::{FromRawFd, RawFd}; + +use failure::*; +use libc::{c_char, c_int}; +use nix::fcntl::OFlag; +use nix::errno::Errno; +use nix::NixPath; +use nix::sys::stat::{FileStat, Mode}; + +pub const FNM_NOMATCH: c_int = 1; + +extern "C" { + fn fnmatch(pattern: *const c_char, string: *const c_char, flags: c_int) -> c_int; +} + +#[derive(Debug, PartialEq)] +pub enum MatchType { + None, + Exclude, + Include, + PartialExclude, + PartialInclude, +} + +#[derive(Clone)] +pub struct PxarExcludePattern { + pattern: CString, + match_exclude: bool, + match_dir_only: bool, + split_pattern: (CString, CString), +} + +impl PxarExcludePattern { + pub fn from_file(parent_fd: RawFd, filename: &P) -> Result, Vec, FileStat)>, Error> { + let stat = match nix::sys::stat::fstatat(parent_fd, filename, nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) { + Ok(stat) => stat, + Err(nix::Error::Sys(Errno::ENOENT)) => return Ok(None), + Err(err) => bail!("stat failed - {}", err), + }; + + let filefd = nix::fcntl::openat(parent_fd, filename, OFlag::O_NOFOLLOW, Mode::empty())?; + let mut file = unsafe { + File::from_raw_fd(filefd) + }; + + let mut content_buffer = Vec::new(); + let _bytes = file.read_to_end(&mut content_buffer)?; + + let mut exclude_pattern = Vec::new(); + for line in content_buffer.split(|&c| c == b'\n') { + if line.is_empty() { + continue; + } + if let Some(pattern) = Self::from_line(line)? { + exclude_pattern.push(pattern); + } + } + + Ok(Some((exclude_pattern, content_buffer, stat))) + } + + pub fn from_line(line: &[u8]) -> Result, Error> { + let mut input = line; + + if input.starts_with(b"#") { + return Ok(None); + } + + let match_exclude = if input.starts_with(b"!") { + // Reduce slice view to exclude "!" + input = &input[1..]; + false + } else { + true + }; + + // Paths ending in / match only directory names (no filenames) + let match_dir_only = if input.ends_with(b"/") { + let len = input.len(); + input = &input[..len - 1]; + true + } else { + false + }; + + // Ignore initial slash + if input.starts_with(b"/") { + input = &input[1..]; + } + + if input.is_empty() || input == b"." || + input == b".." || input.contains(&b'\0') { + bail!("invalid path component encountered"); + } + + // This will fail if the line contains b"\0" + let pattern = CString::new(input)?; + let split_pattern = split_at_slash(&pattern); + + Ok(Some(PxarExcludePattern { + pattern, + match_exclude, + match_dir_only, + split_pattern, + })) + } + + pub fn get_front_pattern(&self) -> PxarExcludePattern { + let pattern = split_at_slash(&self.split_pattern.0); + PxarExcludePattern { + pattern: self.split_pattern.0.clone(), + match_exclude: self.match_exclude, + match_dir_only: self.match_dir_only, + split_pattern: pattern, + } + } + + pub fn get_rest_pattern(&self) -> PxarExcludePattern { + let pattern = split_at_slash(&self.split_pattern.1); + PxarExcludePattern { + pattern: self.split_pattern.1.clone(), + match_exclude: self.match_exclude, + match_dir_only: self.match_dir_only, + split_pattern: pattern, + } + } + + pub fn dump(&self) { + match (self.match_exclude, self.match_dir_only) { + (true, true) => println!("{:#?}/", self.pattern), + (true, false) => println!("{:#?}", self.pattern), + (false, true) => println!("!{:#?}/", self.pattern), + (false, false) => println!("!{:#?}", self.pattern), + } + } + + pub fn matches_filename(&self, filename: &CStr, is_dir: bool) -> MatchType { + let mut res = MatchType::None; + let (front, _) = &self.split_pattern; + + let fnmatch_res = unsafe { + fnmatch(front.as_ptr() as *const libc::c_char, filename.as_ptr() as *const libc::c_char, 0) + }; + // TODO error cases + if fnmatch_res == 0 { + res = if self.match_exclude { + MatchType::PartialExclude + } else { + MatchType::PartialInclude + }; + } + + let full = if self.pattern.to_bytes().starts_with(b"**/") { + CString::new(&self.pattern.to_bytes()[3..]).unwrap() + } else { + CString::new(&self.pattern.to_bytes()[..]).unwrap() + }; + let fnmatch_res = unsafe { + fnmatch(full.as_ptr() as *const libc::c_char, filename.as_ptr() as *const libc::c_char, 0) + }; + // TODO error cases + if fnmatch_res == 0 { + res = if self.match_exclude { + MatchType::Exclude + } else { + MatchType::Include + }; + } + + if !is_dir && self.match_dir_only { + res = MatchType::None; + } + + res + } +} + +fn split_at_slash(match_pattern: &CStr) -> (CString, CString) { + let match_pattern = match_pattern.to_bytes(); + + let pattern = if match_pattern.starts_with(b"./") { + &match_pattern[2..] + } else { + match_pattern + }; + + let (mut front, mut rest) = match pattern.iter().position(|&c| c == b'/') { + Some(ind) => { + let (front, rest) = pattern.split_at(ind); + (front, &rest[1..]) + }, + None => (pattern, &pattern[0..0]), + }; + // '**' is treated such that it maches any directory + if front == b"**" { + front = b"*"; + rest = pattern; + } + + // Pattern where valid CStrings before, so it is safe to unwrap the Result + let front_pattern = CString::new(front).unwrap(); + let rest_pattern = CString::new(rest).unwrap(); + (front_pattern, rest_pattern) +}