mirror of
https://git.proxmox.com/git/rustc
synced 2025-10-21 18:57:47 +00:00
134 lines
4.3 KiB
Rust
134 lines
4.3 KiB
Rust
use std::error::Error;
|
|
|
|
use regex_automata::{
|
|
dfa::{dense, regex::Regex, Automaton, OverlappingState},
|
|
nfa::thompson,
|
|
HalfMatch, MatchError, MatchKind, MultiMatch,
|
|
};
|
|
|
|
use crate::util::{BunkPrefilter, SubstringPrefilter};
|
|
|
|
// Tests that quit bytes in the forward direction work correctly.
|
|
#[test]
|
|
fn quit_fwd() -> Result<(), Box<dyn Error>> {
|
|
let dfa = dense::Builder::new()
|
|
.configure(dense::Config::new().quit(b'x', true))
|
|
.build("[[:word:]]+$")?;
|
|
|
|
assert_eq!(
|
|
dfa.find_earliest_fwd(b"abcxyz"),
|
|
Err(MatchError::Quit { byte: b'x', offset: 3 })
|
|
);
|
|
assert_eq!(
|
|
dfa.find_leftmost_fwd(b"abcxyz"),
|
|
Err(MatchError::Quit { byte: b'x', offset: 3 })
|
|
);
|
|
assert_eq!(
|
|
dfa.find_overlapping_fwd(b"abcxyz", &mut OverlappingState::start()),
|
|
Err(MatchError::Quit { byte: b'x', offset: 3 })
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Tests that quit bytes in the reverse direction work correctly.
|
|
#[test]
|
|
fn quit_rev() -> Result<(), Box<dyn Error>> {
|
|
let dfa = dense::Builder::new()
|
|
.configure(dense::Config::new().quit(b'x', true))
|
|
.thompson(thompson::Config::new().reverse(true))
|
|
.build("^[[:word:]]+")?;
|
|
|
|
assert_eq!(
|
|
dfa.find_earliest_rev(b"abcxyz"),
|
|
Err(MatchError::Quit { byte: b'x', offset: 3 })
|
|
);
|
|
assert_eq!(
|
|
dfa.find_leftmost_rev(b"abcxyz"),
|
|
Err(MatchError::Quit { byte: b'x', offset: 3 })
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
// Tests that if we heuristically enable Unicode word boundaries but then
|
|
// instruct that a non-ASCII byte should NOT be a quit byte, then the builder
|
|
// will panic.
|
|
#[test]
|
|
#[should_panic]
|
|
fn quit_panics() {
|
|
dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false);
|
|
}
|
|
|
|
// Tests that if we attempt an overlapping search using a regex without a
|
|
// reverse DFA compiled with 'starts_for_each_pattern', then we get a panic.
|
|
#[test]
|
|
#[should_panic]
|
|
fn incorrect_config_overlapping_search_panics() {
|
|
let forward = dense::DFA::new(r"abca").unwrap();
|
|
let reverse = dense::Builder::new()
|
|
.configure(
|
|
dense::Config::new()
|
|
.anchored(true)
|
|
.match_kind(MatchKind::All)
|
|
.starts_for_each_pattern(false),
|
|
)
|
|
.thompson(thompson::Config::new().reverse(true))
|
|
.build(r"abca")
|
|
.unwrap();
|
|
|
|
let re = Regex::builder().build_from_dfas(forward, reverse);
|
|
let haystack = "bar abcabcabca abca foo".as_bytes();
|
|
re.find_overlapping(haystack, &mut OverlappingState::start());
|
|
}
|
|
|
|
// This tests an intesting case where even if the Unicode word boundary option
|
|
// is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode
|
|
// word boundaries to be enabled.
|
|
#[test]
|
|
fn unicode_word_implicitly_works() -> Result<(), Box<dyn Error>> {
|
|
let mut config = dense::Config::new();
|
|
for b in 0x80..=0xFF {
|
|
config = config.quit(b, true);
|
|
}
|
|
let dfa = dense::Builder::new().configure(config).build(r"\b")?;
|
|
let expected = HalfMatch::must(0, 1);
|
|
assert_eq!(dfa.find_leftmost_fwd(b" a"), Ok(Some(expected)));
|
|
Ok(())
|
|
}
|
|
|
|
// Tests that we can provide a prefilter to a Regex, and the search reports
|
|
// correct results.
|
|
#[test]
|
|
fn prefilter_works() -> Result<(), Box<dyn Error>> {
|
|
let re = Regex::new(r"a[0-9]+")
|
|
.unwrap()
|
|
.with_prefilter(SubstringPrefilter::new("a"));
|
|
let text = b"foo abc foo a1a2a3 foo a123 bar aa456";
|
|
let matches: Vec<(usize, usize)> =
|
|
re.find_leftmost_iter(text).map(|m| (m.start(), m.end())).collect();
|
|
assert_eq!(
|
|
matches,
|
|
vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),]
|
|
);
|
|
Ok(())
|
|
}
|
|
|
|
// This test confirms that a prefilter is active by using a prefilter that
|
|
// reports false negatives.
|
|
#[test]
|
|
fn prefilter_is_active() -> Result<(), Box<dyn Error>> {
|
|
let text = b"za123";
|
|
let re = Regex::new(r"a[0-9]+")
|
|
.unwrap()
|
|
.with_prefilter(SubstringPrefilter::new("a"));
|
|
assert_eq!(re.find_leftmost(b"za123"), Some(MultiMatch::must(0, 1, 5)));
|
|
assert_eq!(re.find_leftmost(b"a123"), Some(MultiMatch::must(0, 0, 4)));
|
|
let re = re.with_prefilter(BunkPrefilter::new());
|
|
assert_eq!(re.find_leftmost(b"za123"), None);
|
|
// This checks that the prefilter is used when first starting the search,
|
|
// instead of waiting until at least one transition has occurred.
|
|
assert_eq!(re.find_leftmost(b"a123"), None);
|
|
Ok(())
|
|
}
|