initial import

Signed-off-by: Wolfgang Bumiller <w.bumiller@errno.eu>
This commit is contained in:
Wolfgang Bumiller 2020-04-24 17:48:01 +02:00
commit d5c0e44a9d
5 changed files with 1106 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
Cargo.lock

9
Cargo.toml Normal file
View File

@ -0,0 +1,9 @@
[package]
name = "pathpatterns"
version = "0.1.0"
authors = ["Wolfgang Bumiller <w.bumiller@errno.eu>"]
edition = "2018"
[dependencies]
bitflags = "1.2"
libc = "0.2"

98
src/lib.rs Normal file
View File

@ -0,0 +1,98 @@
//! Include/Exclude file pattern matching.
//!
//! This implements a glob `Pattern` similar to `git`'s matching done in include/exclude files, and
//! some helper methods to figure out whether a file should be included given its designated path
//! and a list of include/exclude patterns.
//!
//! Here's a rather long matching example:
//!
//! ```
//! # use pathpatterns::*;
//! # fn test() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> {
//! let file_list: &'static [&'static [u8]] = &[
//! b"/things",
//! b"/things/file1.dat",
//! b"/things/file2.dat",
//! b"/things/shop",
//! b"/things/shop/info.txt",
//! b"/things/shop/apples",
//! b"/things/shop/apples/gala.txt",
//! b"/things/shop/apples/golden-delicious.txt",
//! b"/things/shop/bananas",
//! b"/things/shop/bananas/straight.txt",
//! b"/things/shop/bananas/curved.txt",
//! b"/things/shop/bananas/curved.bak",
//! b"/things/shop/bananas/other.txt",
//! ];
//!
//! let mut list = MatchList::new(vec![
//! MatchEntry::include(Pattern::path("shop")?),
//! MatchEntry::exclude(Pattern::path("bananas")?),
//! MatchEntry::include(Pattern::path("bananas/curved.*")?),
//! ]);
//!
//! assert_eq!(list.matches("/things", None), None);
//! assert_eq!(list.matches("/things/shop", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/bananas", None), Some(MatchType::Exclude));
//! assert_eq!(list.matches("/things/shop/bananas/curved.txt", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/bananas/curved.bak", None), Some(MatchType::Include));
//!
//! // this will exclude the curved.bak file
//! list.push(MatchEntry::exclude(Pattern::path("curved.bak")?));
//! assert_eq!(list.matches("/things/shop/bananas/curved.bak", None), Some(MatchType::Exclude));
//! list.pop();
//! # assert_eq!(list.matches("/things/shop/bananas/curved.bak", None), Some(MatchType::Include));
//!
//! // but this will not:
//! list.push(MatchEntry::new(Pattern::path("curved.bak")?, MatchType::Exclude, MatchFlag::ANCHORED));
//! assert_eq!(list.matches("/things/shop/bananas/curved.bak", None), Some(MatchType::Include));
//! list.pop();
//!
//! // let's check some patterns, anything starting with a 'c', 'f' or 's':
//! let mut list = MatchList::new(vec![
//! MatchEntry::include(Pattern::path("[cfs]*")?),
//! ]);
//! assert_eq!(list.matches("/things", None), None);
//! assert_eq!(list.matches("/things/file1.dat", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/file2.dat", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/info.txt", None), None);
//! assert_eq!(list.matches("/things/shop/apples", None), None);
//! assert_eq!(list.matches("/things/shop/apples/gala.txt", None), None);
//! assert_eq!(list.matches("/things/shop/apples/golden-delicious.txt", None), None);
//! assert_eq!(list.matches("/things/shop/bananas", None), None);
//! assert_eq!(list.matches("/things/shop/bananas/straight.txt", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/bananas/curved.txt", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/shop/bananas/curved.bak", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/bananas/other.txt", None), None);
//!
//! // If we add `**` we end up including the entire `shop/` subtree:
//! list.push(MatchEntry::include(Pattern::path("[cfs]*/**")?));
//! assert_eq!(list.matches("/things", None), None);
//! assert_eq!(list.matches("/things/file1.dat", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/file2.dat", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/info.txt", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/apples", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/apples/gala.txt", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/shop/apples/golden-delicious.txt", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/bananas", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/bananas/straight.txt", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/things/shop/bananas/curved.txt", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/shop/bananas/curved.bak", None), Some(MatchType::Include));
//! assert_eq!(list.matches("/shop/bananas/other.txt", None), Some(MatchType::Include));
//!
//! //
//! # Ok(())
//! # }
//! # test().unwrap()
//! ```
mod match_list;
mod pattern;
#[doc(inline)]
pub use match_list::{MatchEntry, MatchFlag, MatchList, MatchListRef, MatchPattern, MatchType};
#[doc(inline)]
pub use pattern::{ParseError, Pattern, PatternFlag};

293
src/match_list.rs Normal file
View File

@ -0,0 +1,293 @@
//! Helpers for include/exclude lists.
use bitflags::bitflags;
#[rustfmt::skip]
bitflags! {
/// These flags influence what kind of paths should be matched.
pub struct MatchFlag: u16 {
/// Match only a complete entry. The pattern `bar` will not match `/foo/bar`.
const ANCHORED = 0x00_01;
const MATCH_DIRECTORIES = 0x01_00;
const MATCH_REGULAR_FILES = 0x02_00;
const MATCH_SYMLINKS = 0x04_00;
const MATCH_SOCKETS = 0x08_00;
const MATCH_FIFOS = 0x10_00;
const MATCH_CHARDEVS = 0x20_00;
const MATCH_BLOCKDEVS = 0x40_00;
const MATCH_DEVICES =
MatchFlag::MATCH_CHARDEVS.bits() | MatchFlag::MATCH_BLOCKDEVS.bits();
/// This is the default.
const ANY_FILE_TYPE =
MatchFlag::MATCH_DIRECTORIES.bits()
| MatchFlag::MATCH_REGULAR_FILES.bits()
| MatchFlag::MATCH_SYMLINKS.bits()
| MatchFlag::MATCH_SOCKETS.bits()
| MatchFlag::MATCH_FIFOS.bits()
| MatchFlag::MATCH_CHARDEVS.bits()
| MatchFlag::MATCH_BLOCKDEVS.bits();
}
}
impl Default for MatchFlag {
fn default() -> Self {
Self::ANY_FILE_TYPE
}
}
/// A pattern entry. For now this only contains glob patterns, but we may want to add regex
/// patterns or user defined callback functions later on as well.
///
/// For regex we'd likely use the POSIX extended REs via `regexec(3)`, since we're targetting
/// command line interfaces and want something command line users are used to.
#[derive(Clone, Debug)]
pub enum MatchPattern {
/// A glob pattern.
Pattern(crate::Pattern),
}
impl From<crate::Pattern> for MatchPattern {
fn from(pattern: crate::Pattern) -> Self {
MatchPattern::Pattern(pattern)
}
}
/// A pattern can be used as an include or an exclude pattern. In a list of `MatchEntry`s, later
/// patterns take precedence over earlier patterns and the order of includes vs excludes makes a
/// difference.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum MatchType {
Include,
Exclude,
}
/// Convenience helpers
impl MatchType {
pub fn is_include(self) -> bool {
self == MatchType::Include
}
pub fn is_exclude(self) -> bool {
self == MatchType::Exclude
}
}
/// A single entry in a `MatchList`.
#[derive(Clone, Debug)]
pub struct MatchEntry {
pattern: MatchPattern,
ty: MatchType,
flags: MatchFlag,
}
impl MatchEntry {
/// Create a new match entry.
pub fn new<T: Into<MatchPattern>>(pattern: T, ty: MatchType, flags: MatchFlag) -> Self {
Self {
pattern: pattern.into(),
ty,
flags,
}
}
/// Create a new include-type match entry with default flags.
pub fn include<T: Into<MatchPattern>>(pattern: T) -> Self {
Self::new(pattern.into(), MatchType::Include, MatchFlag::default())
}
/// Create a new exclude-type match entry with default flags.
pub fn exclude<T: Into<MatchPattern>>(pattern: T) -> Self {
Self::new(pattern.into(), MatchType::Exclude, MatchFlag::default())
}
#[inline]
pub fn match_type(&self) -> MatchType {
self.ty
}
/// Test this entry's file type restrictions against a file mode retrieved from `stat()`.
pub fn matches_mode(&self, file_mode: u32) -> bool {
// bitflags' `.contains` means ALL bits must be set, if they are all set we don't
// need to check the mode...
if self.flags.contains(MatchFlag::ANY_FILE_TYPE) {
return true;
}
let flag = match file_mode & libc::S_IFMT {
libc::S_IFDIR => MatchFlag::MATCH_DIRECTORIES,
libc::S_IFREG => MatchFlag::MATCH_REGULAR_FILES,
libc::S_IFLNK => MatchFlag::MATCH_SYMLINKS,
libc::S_IFSOCK => MatchFlag::MATCH_SOCKETS,
libc::S_IFIFO => MatchFlag::MATCH_FIFOS,
libc::S_IFCHR => MatchFlag::MATCH_CHARDEVS,
libc::S_IFBLK => MatchFlag::MATCH_BLOCKDEVS,
_unknown => return false,
};
self.flags.intersects(flag)
}
/// Test whether this entry's pattern matches any complete suffix of a path.
///
/// For the path `/foo/bar/baz`, this tests whether `baz`, `bar/baz` or `foo/bar/baz` is
/// matched.
pub fn matches_path_suffix<T: AsRef<[u8]>>(&self, path: T) -> bool {
self.matches_path_suffix_do(path.as_ref())
}
fn matches_path_suffix_do(&self, path: &[u8]) -> bool {
if self.flags.intersects(MatchFlag::ANCHORED) {
return self.matches_path_exact(path);
}
if path.is_empty() {
return false;
}
for start in (0..path.len()).rev() {
if path[start] == b'/' && self.matches_path_exact(&path[(start + 1)..]) {
return true;
}
}
if path[0] != b'/' {
// we had "foo/bar", so we haven't yet tried to match the whole string:
self.matches_path_exact(path)
} else {
false
}
}
/// Test whether this entry's pattern matches a path exactly.
pub fn matches_path_exact<T: AsRef<[u8]>>(&self, path: T) -> bool {
self.matches_path_exact_do(path.as_ref())
}
fn matches_path_exact_do(&self, path: &[u8]) -> bool {
match &self.pattern {
MatchPattern::Pattern(pattern) => pattern.matches(path),
}
}
/// Check whether the path contains a matching suffix and the file mode match the expected file modes.
/// This is a combination of using `.matches_mode()` and `.matches_path_suffix()`.
pub fn matches<T: AsRef<[u8]>>(&self, path: T, file_mode: Option<u32>) -> bool {
self.matches_do(path.as_ref(), file_mode)
}
fn matches_do(&self, path: &[u8], file_mode: Option<u32>) -> bool {
if let Some(mode) = file_mode {
if !self.matches_mode(mode) {
return false;
}
}
self.matches_path_suffix(path)
}
/// Check whether the path contains a matching suffix and the file mode match the expected file modes.
/// This is a combination of using `.matches_mode()` and `.matches_path_exact()`.
pub fn matches_exact<T: AsRef<[u8]>>(&self, path: T, file_mode: Option<u32>) -> bool {
self.matches_exact_do(path.as_ref(), file_mode)
}
fn matches_exact_do(&self, path: &[u8], file_mode: Option<u32>) -> bool {
if let Some(mode) = file_mode {
if !self.matches_mode(mode) {
return false;
}
}
self.matches_path_exact(path)
}
}
/// Convenience type for an ordered list of `MatchEntry`s. This is just a `Vec<MatchEntry>`.
#[derive(Clone, Debug, Default)]
pub struct MatchList {
list: Vec<MatchEntry>,
}
impl MatchList {
pub fn new<T: Into<Vec<MatchEntry>>>(list: T) -> Self {
Self { list: list.into() }
}
pub fn push(&mut self, entry: MatchEntry) {
self.list.push(entry)
}
pub fn pop(&mut self) -> Option<MatchEntry> {
self.list.pop()
}
}
impl From<Vec<MatchEntry>> for MatchList {
fn from(list: Vec<MatchEntry>) -> Self {
Self { list }
}
}
impl std::ops::Deref for MatchList {
type Target = MatchListRef;
fn deref(&self) -> &Self::Target {
(&self.list[..]).into()
}
}
/// Helper to provide the `matches` method on slices of `MatchEntry`s.
#[repr(transparent)]
pub struct MatchListRef([MatchEntry]);
impl std::ops::Deref for MatchListRef {
type Target = [MatchEntry];
fn deref(&self) -> &Self::Target {
&self.0[..]
}
}
impl<'a> From<&'a [MatchEntry]> for &'a MatchListRef {
fn from(entries: &'a [MatchEntry]) -> &'a MatchListRef {
unsafe { &*(entries as *const [MatchEntry] as *const MatchListRef) }
}
}
impl MatchListRef {
/// Check whether this list contains anything matching a prefix of the specified path, and the
/// specified file mode.
pub fn matches<T: AsRef<[u8]>>(&self, path: T, file_mode: Option<u32>) -> Option<MatchType> {
self.matches_do(path.as_ref(), file_mode)
}
fn matches_do(&self, path: &[u8], file_mode: Option<u32>) -> Option<MatchType> {
for m in self.iter().rev() {
if m.matches(path, file_mode) {
return Some(m.match_type());
}
}
None
}
/// Check whether this list contains anything exactly matching the path and mode.
pub fn matches_exact<T: AsRef<[u8]>>(
&self,
path: T,
file_mode: Option<u32>,
) -> Option<MatchType> {
self.matches_exact_do(path.as_ref(), file_mode)
}
fn matches_exact_do(&self, path: &[u8], file_mode: Option<u32>) -> Option<MatchType> {
for m in self.iter().rev() {
if m.matches_exact(path, file_mode) {
return Some(m.match_type());
}
}
None
}
}

704
src/pattern.rs Normal file
View File

@ -0,0 +1,704 @@
//! This implements the pattern matching algorithm found in git's `wildmatch.c`
use std::fmt;
use std::mem;
use std::ops::RangeInclusive;
use bitflags::bitflags;
/// A character class can be a list of characters `[abc]`, ranges of characters `[a-z]`, or named
/// classes `[[:digit:]]`, or a combination of them all. Additionally they can be negated with a
/// `^` at the beginning.
#[derive(Clone, Debug, Default)]
struct CharacterClass {
negated: bool,
named: Vec<fn(u8) -> bool>,
listed: Vec<u8>,
ranges: Vec<RangeInclusive<u8>>,
}
impl CharacterClass {
/// Check if a byte match this character class.
pub fn matches(&self, ch: u8) -> bool {
self.matches_do(ch) != self.negated
}
fn matches_do(&self, ch: u8) -> bool {
self.listed.contains(&ch)
|| self.ranges.iter().any(|range| range.contains(&ch))
|| self.named.iter().any(|func| func(ch))
}
}
/// One component of a pattern.
#[derive(Clone, Debug)]
enum Component {
/// A literal match. The `/a` and `.txt` in `/a*.txt`.
Literal(Vec<u8>),
/// A question mark should match exactly one byte. If desired it may also match slashes, but
/// this property is part of the whole pattern, not the component.
QuestionMark,
/// A "star" normally matches everything except when matching path names, where it does not
/// match slashes.
Star,
/// A double star always matches everything, even slashes.
StarStar,
// SlashStarStarSlash, // maybe?
/// A character class matches one byte out of a set of allowed or disallowed bytes.
Class(CharacterClass),
}
impl Component {
/// Check if this is a literal component ending with a slash.
fn ends_with_slash(&self) -> bool {
match self {
Component::Literal(lit) => lit.last().copied() == Some(b'/'),
_ => false,
}
}
/// Check if this is a literal component starting with a slash.
fn starts_with_slash(&self) -> bool {
match self {
Component::Literal(lit) => {
lit.first().copied() == Some(b'/')
|| (lit.first().copied() == Some(b'\\') && lit.get(1).copied() == Some(b'/'))
}
_ => false,
}
}
}
bitflags! {
/// Flags affecting how a pattern should match.
pub struct PatternFlag: u8 {
/// Ignore upper/lower case on the pattern. Note that this only affects ascii characters.
/// We do not normalize/casefold unicode here. If you need this, case-fold your input
/// strings and patterns first.
const IGNORE_CASE = 0x01;
/// This pattern is used for paths, meaning that `*` and `?` do not match slashes. Only
/// explicit slashes and `**` can match slashes.
const PATH_NAME = 0x02;
}
}
/// Error cases which may happen while parsing a pattern.
#[derive(Clone, Debug)]
pub enum ParseError {
NulByteError,
TrailingBackslash,
UnclosedCharacterClass(usize),
MalformedNamedCharacterClass(usize),
}
impl std::error::Error for ParseError {}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ParseError::NulByteError => write!(f, "null-byte in pattern"),
ParseError::TrailingBackslash => write!(f, "trailing backslash in pattern"),
ParseError::UnclosedCharacterClass(begin) => write!(
f,
"unclosed character class in pattern, starting at byte {}",
begin
),
ParseError::MalformedNamedCharacterClass(begin) => write!(
f,
"malformed named character class in pattern, starting at byte {}",
begin
),
}
}
}
/// See the match function for the algorithm.
enum MatchResult {
Match,
NoMatch,
AbortAll,
AbortToStarStar,
}
/// An `fnmatch`-like pattern working like in `git`, with support for `*` vs `**` distinction for
/// paths.
///
/// Note that patterns are treated as ASCII strings, so unicode characters have no special effect.
/// You can use it for UTF-8 paths, but there's no direct support for `PatternFlag::IGNORE_CASE` on
/// unicode text.
#[derive(Clone, Debug)]
pub struct Pattern {
/// Original pattern the user provided.
pattern: std::ffi::CString,
/// Matching components we parsed out of the string.
components: Vec<Component>,
/// Flags used for the pattern.
///
/// In the future we may want to optimize the `components` based on this, so `IGNORE_CASE`
/// happens at "compile time" already.
flags: PatternFlag,
}
impl Pattern {
/// Get the original input pattern.
pub fn pattern(&self) -> &std::ffi::CStr {
&self.pattern
}
/// Create a new pattern.
pub fn new<T: AsRef<[u8]>>(pattern: T, flags: PatternFlag) -> Result<Self, ParseError> {
Self::new_do(pattern.as_ref(), flags)
}
/// Convenience shortcut to create a new pattern with `PatternFlag::PATH_NAME`.
pub fn path<T: AsRef<[u8]>>(pattern: T) -> Result<Self, ParseError> {
Self::new_do(pattern.as_ref(), PatternFlag::PATH_NAME)
}
fn new_do(pattern: &[u8], flags: PatternFlag) -> Result<Self, ParseError> {
let c_pattern = std::ffi::CString::new(pattern).map_err(|_| ParseError::NulByteError)?;
let mut components = Vec::<Component>::new();
let mut literal = Vec::<u8>::new();
fn push_literal(
literal: &mut Vec<u8>,
components: &mut Vec<Component>,
flags: PatternFlag,
) {
if !literal.is_empty() {
if flags.intersects(PatternFlag::IGNORE_CASE) {
for b in &mut literal[..] {
*b = b.to_ascii_lowercase();
}
}
components.push(Component::Literal(mem::take(literal)));
}
}
let mut i = 0;
while i != pattern.len() {
match pattern[i] {
0 => return Err(ParseError::NulByteError),
b'\\' => {
i += 1;
let mut ch = *pattern.get(i).ok_or(ParseError::TrailingBackslash)?;
if flags.intersects(PatternFlag::IGNORE_CASE) {
ch = ch.to_ascii_lowercase()
}
literal.push(ch);
}
b'?' => {
push_literal(&mut literal, &mut components, flags);
components.push(Component::QuestionMark);
}
b'*' => {
push_literal(&mut literal, &mut components, flags);
if pattern.get(i + 1).copied() == Some(b'*') {
let beg = i;
i += 1;
// swallow following stars as well:
while pattern.get(i + 1).copied() == Some(b'*') {
i += 1;
}
// git doesn't allow `**` attached to anything other than slashes to match
// subdirectories, so only `**`, `.../**`, `.../**/...` and `**/...` are
// valid.
if (beg == 0 || pattern[beg - 1] == b'/')
&& ((i + 1) == pattern.len() || pattern[i + 1] == b'/')
{
components.push(Component::StarStar)
} else {
components.push(Component::Star)
}
} else {
components.push(Component::Star)
}
}
b'[' => {
push_literal(&mut literal, &mut components, flags);
let (component, new_i) = Self::parse_char_class(pattern, i, flags)?;
i = new_i;
components.push(component);
}
ch => literal.push(if flags.intersects(PatternFlag::IGNORE_CASE) {
ch.to_ascii_lowercase()
} else {
ch
}),
}
i += 1;
}
push_literal(&mut literal, &mut components, flags);
Ok(Self {
pattern: c_pattern,
components,
flags,
})
}
fn parse_char_class(
pattern: &[u8],
begin_i: usize,
flags: PatternFlag,
) -> Result<(Component, usize), ParseError> {
let mut i = begin_i + 1;
let negated = if pattern.get(i).copied() == Some(b'^') {
i += 1;
true
} else {
false
};
if i == pattern.len() {
return Err(ParseError::UnclosedCharacterClass(begin_i));
}
let mut class = CharacterClass {
negated,
..Default::default()
};
let mut prev = None;
while i != pattern.len() {
let mut new_prev = None;
match pattern[i] {
0 => return Err(ParseError::NulByteError),
b'[' if pattern[(i + 1)..].starts_with(b":alnum:]") => {
i += 8;
class.named.push(|b| b.is_ascii_alphanumeric());
}
b'[' if pattern[(i + 1)..].starts_with(b":alpha:]") => {
i += 8;
class.named.push(|b| b.is_ascii_alphabetic());
}
b'[' if pattern[(i + 1)..].starts_with(b":blank:]") => {
i += 8;
class.named.push(|b| b == b' ' || b == b'\t');
}
b'[' if pattern[(i + 1)..].starts_with(b":cntrl:]") => {
i += 8;
class.named.push(|b| b.is_ascii_control());
}
b'[' if pattern[(i + 1)..].starts_with(b":digit:]") => {
i += 8;
class.named.push(|b| b.is_ascii_digit());
}
b'[' if pattern[(i + 1)..].starts_with(b":graph:]") => {
i += 8;
class.named.push(|b| b.is_ascii_graphic());
}
b'[' if pattern[(i + 1)..].starts_with(b":lower:]") => {
i += 8;
class.named.push(|b| b.is_ascii_lowercase());
}
b'[' if pattern[(i + 1)..].starts_with(b":print:]") => {
i += 8;
class.named.push(|b| b >= 0x20 && b <= 0x7f);
}
b'[' if pattern[(i + 1)..].starts_with(b":punct:]") => {
i += 8;
class.named.push(|b| b.is_ascii_punctuation());
}
b'[' if pattern[(i + 1)..].starts_with(b":space:]") => {
i += 8;
class.named.push(|b| b.is_ascii_whitespace());
}
b'[' if pattern[(i + 1)..].starts_with(b":upper:]") => {
i += 8;
if flags.intersects(PatternFlag::IGNORE_CASE) {
class.named.push(|b| b.is_ascii_lowercase());
} else {
class.named.push(|b| b.is_ascii_uppercase());
}
}
b'[' if pattern[(i + 1)..].starts_with(b":xdigit:]") => {
i += 9;
class.named.push(|b| b.is_ascii_hexdigit());
}
b'[' if pattern.get(i + 1).copied() == Some(b':') => {
return Err(ParseError::MalformedNamedCharacterClass(begin_i));
}
b']' => break,
b'\\' => {
i += 1;
let ch = *pattern.get(i).ok_or(ParseError::TrailingBackslash)?;
class.listed.push(ch);
new_prev = Some(ch);
}
b'-' => match prev {
None => {
new_prev = Some(b'-');
class.listed.push(b'-');
}
Some(beg) => {
// The previous character was also pushed to `class.listed`, so remove it:
class.listed.pop();
i += 1;
let mut end = *pattern
.get(i)
.ok_or(ParseError::UnclosedCharacterClass(begin_i))?;
if end == b'\\' {
i += 1;
end = *pattern
.get(i)
.ok_or(ParseError::UnclosedCharacterClass(begin_i))?;
}
if flags.intersects(PatternFlag::IGNORE_CASE) {
end = end.to_ascii_lowercase();
}
if beg <= end {
class.ranges.push(beg..=end);
} else {
class.ranges.push(end..=beg);
}
}
},
mut ch => {
if flags.intersects(PatternFlag::IGNORE_CASE) {
ch = ch.to_ascii_lowercase();
}
new_prev = Some(ch);
class.listed.push(ch);
}
}
prev = new_prev;
i += 1;
}
Ok((Component::Class(class), i))
}
/// Check whether this pattern matches a text.
pub fn matches<T: AsRef<[u8]>>(&self, text: T) -> bool {
match self.do_matches(&self.components, 0, text.as_ref()) {
MatchResult::Match => true,
_ => false,
}
}
// The algorithm is ported from git's wildmatch.c.
fn do_matches(&self, components: &[Component], mut ci: usize, mut text: &[u8]) -> MatchResult {
if self.flags.intersects(PatternFlag::PATH_NAME) {
// If we match a path then we want the pattern `"foo"` to match the path `"/foo"`.
}
while ci != components.len() {
//eprintln!("Matching: {:?} at text: {:?}", components[ci], unsafe {
// std::str::from_utf8_unchecked(text)
//},);
match &components[ci] {
Component::Literal(literal) => {
if text.is_empty() {
// The '*' implementation is NON-greedy, so if the text is empty, we
// already tried all shorter possible matches, so anything other than a '*'
// match can `AbortAll` if the text is empty.
return MatchResult::AbortAll;
}
if !starts_with(text, &literal, self.flags) {
return MatchResult::NoMatch;
}
text = &text[literal.len()..];
}
Component::QuestionMark => {
if text.is_empty() {
// See Literal case
return MatchResult::AbortAll;
}
if text[0] == b'/' && self.flags.intersects(PatternFlag::PATH_NAME) {
return MatchResult::NoMatch;
}
text = &text[1..];
}
Component::Class(class) => {
if text.is_empty() {
// See Literal case
return MatchResult::AbortAll;
}
let mut ch = text[0];
if self.flags.intersects(PatternFlag::IGNORE_CASE) {
ch = ch.to_ascii_lowercase();
}
if !class.matches(ch) {
return MatchResult::NoMatch;
}
text = &text[1..];
}
Component::Star if self.flags.intersects(PatternFlag::PATH_NAME) => {
// FIXME: Optimization: Instead of .contains, fast-skip to its index, like git
// does.
if (ci + 1) == components.len() && !text.contains(&b'/') {
return MatchResult::Match;
}
loop {
if text.is_empty() {
// We still abort all here, but git has some optimizations we could
// do instead before reaching this.
return MatchResult::AbortAll;
}
// FIXME: Optimization: Add the "try to advance faster" optimization from
// git here.
match self.do_matches(components, ci + 1, text) {
MatchResult::NoMatch => {
if text[0] == b'/' {
return MatchResult::AbortToStarStar;
}
}
other => return other,
}
text = &text[1..];
}
}
Component::Star | Component::StarStar => {
if (ci + 1) == components.len() {
return MatchResult::Match;
}
if let Component::StarStar = components[ci] {
if ci > 0
&& components[ci - 1].ends_with_slash()
&& ((ci + 1) == components.len()
|| components[ci + 1].starts_with_slash())
{
// Assuming we matched `foo/` and are at `/` `**` `/`, see if we an let
// it match nothing, so that `foo/` `**` `/bar` can match `foo/bar`.
//
// Under the condition that the previous component ended with a slash
// (`components[ci - 1].ends_with_slash()`) we can safely move back by
// a byte in `text`.
let text = unsafe {
std::slice::from_raw_parts(text.as_ptr().offset(-1), text.len() + 1)
};
#[allow(clippy::single_match)]
match self.do_matches(components, ci + 1, text) {
MatchResult::Match => return MatchResult::Match,
_ => (), // or just continue regularly
}
}
}
loop {
if text.is_empty() {
// See Literal case
return MatchResult::AbortAll;
}
match self.do_matches(components, ci + 1, text) {
MatchResult::NoMatch => (),
MatchResult::AbortToStarStar => (), // continue from here
other => return other,
}
text = &text[1..];
}
}
}
ci += 1;
}
if text.is_empty() {
MatchResult::Match
} else {
MatchResult::NoMatch
}
}
}
fn starts_with(text: &[u8], with: &[u8], flags: PatternFlag) -> bool {
if flags.intersects(PatternFlag::IGNORE_CASE) {
starts_with_caseless(text, with)
} else {
text.starts_with(with)
}
}
fn starts_with_caseless(text: &[u8], with: &[u8]) -> bool {
if text.len() < with.len() {
return false;
}
for i in 0..with.len() {
if text[i].to_ascii_lowercase() != with[i].to_ascii_lowercase() {
return false;
}
}
true
}
#[test]
fn test() {
let pattern = Pattern::new("/hey/*/you", PatternFlag::PATH_NAME).unwrap();
assert!(pattern.matches("/hey/asdf/you"));
assert!(!pattern.matches("/hey/asdf/more/you"));
assert!(!pattern.matches("/heyasdf/you"));
assert!(!pattern.matches("/heyasdfyou"));
assert!(!pattern.matches("/hey/asdfyou"));
assert!(!pattern.matches("/hey/you"));
assert!(pattern.matches("/hey//you"));
let pattern = Pattern::new("/hey/*/you", PatternFlag::empty()).unwrap();
assert!(pattern.matches("/hey/asdf/you"));
assert!(pattern.matches("/hey/asdf/more/you")); // different to PATH_NAME
assert!(!pattern.matches("/heyasdf/you"));
assert!(!pattern.matches("/heyasdfyou"));
assert!(!pattern.matches("/hey/asdfyou"));
assert!(!pattern.matches("/hey/you"));
assert!(pattern.matches("/hey//you"));
let pattern = Pattern::new("/hey/**/you", PatternFlag::PATH_NAME).unwrap();
assert!(pattern.matches("/hey/asdf/you"));
assert!(pattern.matches("/hey/asdf/more/you"));
assert!(!pattern.matches("/heyasdf/you"));
assert!(!pattern.matches("/heyasdfyou"));
assert!(!pattern.matches("/hey/asdfyou"));
assert!(pattern.matches("/hey/you"));
assert!(pattern.matches("/hey//you"));
let pattern = Pattern::new("/he[yx]/**/you", PatternFlag::PATH_NAME).unwrap();
assert!(pattern.matches("/hey/asdf/you"));
assert!(pattern.matches("/hey/asdf/more/you"));
assert!(!pattern.matches("/heyasdf/you"));
assert!(!pattern.matches("/heyasdfyou"));
assert!(!pattern.matches("/hey/asdfyou"));
assert!(pattern.matches("/hey/you"));
assert!(pattern.matches("/hey//you"));
assert!(pattern.matches("/hex/asdf/you"));
assert!(pattern.matches("/hex/asdf/more/you"));
assert!(!pattern.matches("/hexasdf/you"));
assert!(!pattern.matches("/hexasdfyou"));
assert!(!pattern.matches("/hex/asdfyou"));
assert!(pattern.matches("/hex/you"));
assert!(pattern.matches("/hex//you"));
assert!(!pattern.matches("/hez/asdf/you"));
assert!(!pattern.matches("/hez/asdf/more/you"));
assert!(!pattern.matches("/hezasdf/you"));
assert!(!pattern.matches("/hezasdfyou"));
assert!(!pattern.matches("/hez/asdfyou"));
assert!(!pattern.matches("/hez/you"));
assert!(!pattern.matches("/hez//you"));
let pattern = Pattern::new("/he[^yx]/**/you", PatternFlag::PATH_NAME).unwrap();
assert!(!pattern.matches("/hey/asdf/you"));
assert!(!pattern.matches("/hey/asdf/more/you"));
assert!(!pattern.matches("/heyasdf/you"));
assert!(!pattern.matches("/heyasdfyou"));
assert!(!pattern.matches("/hey/asdfyou"));
assert!(!pattern.matches("/hey/you"));
assert!(!pattern.matches("/hey//you"));
assert!(!pattern.matches("/hex/asdf/you"));
assert!(!pattern.matches("/hex/asdf/more/you"));
assert!(!pattern.matches("/hexasdf/you"));
assert!(!pattern.matches("/hexasdfyou"));
assert!(!pattern.matches("/hex/asdfyou"));
assert!(!pattern.matches("/hex/you"));
assert!(!pattern.matches("/hex//you"));
assert!(pattern.matches("/hez/asdf/you"));
assert!(pattern.matches("/hez/asdf/more/you"));
assert!(!pattern.matches("/hezasdf/you"));
assert!(!pattern.matches("/hezasdfyou"));
assert!(!pattern.matches("/hez/asdfyou"));
assert!(pattern.matches("/hez/you"));
assert!(pattern.matches("/hez//you"));
let wrong = b"/hez/";
for i in 0..wrong.len() {
assert!(!pattern.matches(&wrong[..i]));
}
let pattern = Pattern::new("/tes[a-t]", PatternFlag::PATH_NAME).unwrap();
assert!(!pattern.matches("/testoolong"));
assert!(!pattern.matches("/tes"));
assert!(!pattern.matches("/t"));
assert!(!pattern.matches("/"));
assert!(!pattern.matches(""));
assert!(pattern.matches("/tesa"));
assert!(pattern.matches("/test"));
assert!(!pattern.matches("/tesu"));
let pattern_path = Pattern::new("/tes[a-t]/a?a", PatternFlag::PATH_NAME).unwrap();
let pattern_nopath = Pattern::new("/tes[a-t]/a?a", PatternFlag::empty()).unwrap();
assert!(!pattern_path.matches("/tesu"));
assert!(!pattern_nopath.matches("/tesu"));
assert!(!pattern_path.matches("/tesu/aaa"));
assert!(!pattern_nopath.matches("/tesu/aaa"));
assert!(!pattern_path.matches("/tesu/xax"));
assert!(!pattern_nopath.matches("/tesu/xax"));
assert!(!pattern_path.matches("/test/xax"));
assert!(!pattern_nopath.matches("/test/xax"));
assert!(!pattern_path.matches("/test/a"));
assert!(!pattern_nopath.matches("/test/a"));
assert!(!pattern_path.matches("/test/ab"));
assert!(!pattern_nopath.matches("/test/ab"));
assert!(pattern_path.matches("/test/aba"));
assert!(pattern_nopath.matches("/test/aba"));
assert!(pattern_path.matches("/test/aaa"));
assert!(pattern_nopath.matches("/test/aaa"));
assert!(pattern_path.matches("/test/aba"));
assert!(pattern_nopath.matches("/test/aba"));
// the difference is here:
assert!(!pattern_path.matches("/test/a/a"));
assert!(pattern_nopath.matches("/test/a/a"));
let pattern = Pattern::new("a*b*c", PatternFlag::PATH_NAME).unwrap();
assert!(pattern.matches("axxbxxc"));
assert!(!pattern.matches("axxbxxcxx"));
assert!(pattern.matches("axxbxxbxxc"));
assert!(!pattern.matches("axxbxxbxxcxx"));
assert!(pattern.matches("axxbxxbxxcxxc"));
assert!(!pattern.matches("axxbxxbxxcxxcxx"));
let pattern = Pattern::new("a*b*c*", PatternFlag::PATH_NAME).unwrap();
assert!(pattern.matches("axxbxxc"));
assert!(pattern.matches("axxbxxcxx"));
assert!(pattern.matches("axxbxxbxxc"));
assert!(pattern.matches("axxbxxbxxcxx"));
assert!(pattern.matches("axxbxxbxxcxxc"));
assert!(pattern.matches("axxbxxbxxcxxcxx"));
let pattern = Pattern::new(
"aB[c-fX-Z][[:upper:]][[:lower:]][[:digit:]k]",
PatternFlag::PATH_NAME | PatternFlag::IGNORE_CASE,
)
.unwrap();
eprintln!("{:#?}", pattern);
assert!(pattern.matches("aBcUl3"));
assert!(pattern.matches("AbCuL9"));
assert!(!pattern.matches("aBgUl3"));
assert!(!pattern.matches("aBgUl3"));
assert!(!pattern.matches("aBcUlx"));
assert!(pattern.matches("abculk"));
assert!(pattern.matches("abxulk"));
assert!(!pattern.matches("abxul"));
let pattern = Pattern::new("a/b**/c", PatternFlag::PATH_NAME).unwrap();
assert!(pattern.matches("a/bxx/c"));
assert!(!pattern.matches("a/bxx/yy/c"));
}