use std::error::Error; use regex_automata::{ dfa::{dense, regex::Regex, Automaton, OverlappingState}, nfa::thompson, HalfMatch, MatchError, MatchKind, MultiMatch, }; use crate::util::{BunkPrefilter, SubstringPrefilter}; // Tests that quit bytes in the forward direction work correctly. #[test] fn quit_fwd() -> Result<(), Box> { let dfa = dense::Builder::new() .configure(dense::Config::new().quit(b'x', true)) .build("[[:word:]]+$")?; assert_eq!( dfa.find_earliest_fwd(b"abcxyz"), Err(MatchError::Quit { byte: b'x', offset: 3 }) ); assert_eq!( dfa.find_leftmost_fwd(b"abcxyz"), Err(MatchError::Quit { byte: b'x', offset: 3 }) ); assert_eq!( dfa.find_overlapping_fwd(b"abcxyz", &mut OverlappingState::start()), Err(MatchError::Quit { byte: b'x', offset: 3 }) ); Ok(()) } // Tests that quit bytes in the reverse direction work correctly. #[test] fn quit_rev() -> Result<(), Box> { let dfa = dense::Builder::new() .configure(dense::Config::new().quit(b'x', true)) .thompson(thompson::Config::new().reverse(true)) .build("^[[:word:]]+")?; assert_eq!( dfa.find_earliest_rev(b"abcxyz"), Err(MatchError::Quit { byte: b'x', offset: 3 }) ); assert_eq!( dfa.find_leftmost_rev(b"abcxyz"), Err(MatchError::Quit { byte: b'x', offset: 3 }) ); Ok(()) } // Tests that if we heuristically enable Unicode word boundaries but then // instruct that a non-ASCII byte should NOT be a quit byte, then the builder // will panic. #[test] #[should_panic] fn quit_panics() { dense::Config::new().unicode_word_boundary(true).quit(b'\xFF', false); } // Tests that if we attempt an overlapping search using a regex without a // reverse DFA compiled with 'starts_for_each_pattern', then we get a panic. #[test] #[should_panic] fn incorrect_config_overlapping_search_panics() { let forward = dense::DFA::new(r"abca").unwrap(); let reverse = dense::Builder::new() .configure( dense::Config::new() .anchored(true) .match_kind(MatchKind::All) .starts_for_each_pattern(false), ) .thompson(thompson::Config::new().reverse(true)) .build(r"abca") .unwrap(); let re = Regex::builder().build_from_dfas(forward, reverse); let haystack = "bar abcabcabca abca foo".as_bytes(); re.find_overlapping(haystack, &mut OverlappingState::start()); } // This tests an intesting case where even if the Unicode word boundary option // is disabled, setting all non-ASCII bytes to be quit bytes will cause Unicode // word boundaries to be enabled. #[test] fn unicode_word_implicitly_works() -> Result<(), Box> { let mut config = dense::Config::new(); for b in 0x80..=0xFF { config = config.quit(b, true); } let dfa = dense::Builder::new().configure(config).build(r"\b")?; let expected = HalfMatch::must(0, 1); assert_eq!(dfa.find_leftmost_fwd(b" a"), Ok(Some(expected))); Ok(()) } // Tests that we can provide a prefilter to a Regex, and the search reports // correct results. #[test] fn prefilter_works() -> Result<(), Box> { let re = Regex::new(r"a[0-9]+") .unwrap() .with_prefilter(SubstringPrefilter::new("a")); let text = b"foo abc foo a1a2a3 foo a123 bar aa456"; let matches: Vec<(usize, usize)> = re.find_leftmost_iter(text).map(|m| (m.start(), m.end())).collect(); assert_eq!( matches, vec![(12, 14), (14, 16), (16, 18), (23, 27), (33, 37),] ); Ok(()) } // This test confirms that a prefilter is active by using a prefilter that // reports false negatives. #[test] fn prefilter_is_active() -> Result<(), Box> { let text = b"za123"; let re = Regex::new(r"a[0-9]+") .unwrap() .with_prefilter(SubstringPrefilter::new("a")); assert_eq!(re.find_leftmost(b"za123"), Some(MultiMatch::must(0, 1, 5))); assert_eq!(re.find_leftmost(b"a123"), Some(MultiMatch::must(0, 0, 4))); let re = re.with_prefilter(BunkPrefilter::new()); assert_eq!(re.find_leftmost(b"za123"), None); // This checks that the prefilter is used when first starting the search, // instead of waiting until at least one transition has occurred. assert_eq!(re.find_leftmost(b"a123"), None); Ok(()) }