mirror of
https://git.proxmox.com/git/rustc
synced 2025-10-22 05:37:57 +00:00
213 lines
7.2 KiB
Rust
213 lines
7.2 KiB
Rust
use regex_automata::{
|
|
hybrid::{
|
|
dfa::DFA,
|
|
regex::{self, Regex},
|
|
},
|
|
nfa::thompson,
|
|
MatchKind, SyntaxConfig,
|
|
};
|
|
use regex_syntax as syntax;
|
|
|
|
use regex_test::{
|
|
bstr::{BString, ByteSlice},
|
|
CompiledRegex, Match, MatchKind as TestMatchKind, RegexTest, RegexTests,
|
|
SearchKind as TestSearchKind, TestResult, TestRunner,
|
|
};
|
|
|
|
use crate::{suite, Result};
|
|
|
|
/// Tests the default configuration of the hybrid NFA/DFA.
|
|
#[test]
|
|
fn default() -> Result<()> {
|
|
let builder = Regex::builder();
|
|
TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
|
|
Ok(())
|
|
}
|
|
|
|
/// Tests the hybrid NFA/DFA with NFA shrinking disabled.
|
|
///
|
|
/// This is actually the typical configuration one wants for a lazy DFA. NFA
|
|
/// shrinking is mostly only advantageous when building a full DFA since it
|
|
/// can sharply decrease the amount of time determinization takes. But NFA
|
|
/// shrinking is itself otherwise fairly expensive. Since a lazy DFA has
|
|
/// no compilation time (other than for building the NFA of course) before
|
|
/// executing a search, it's usually worth it to forgo NFA shrinking.
|
|
#[test]
|
|
fn no_nfa_shrink() -> Result<()> {
|
|
let mut builder = Regex::builder();
|
|
builder.thompson(thompson::Config::new().shrink(false));
|
|
TestRunner::new()?
|
|
// Without NFA shrinking, this test blows the default cache capacity.
|
|
.blacklist("expensive/regression-many-repeat-no-stack-overflow")
|
|
.test_iter(suite()?.iter(), compiler(builder))
|
|
.assert();
|
|
Ok(())
|
|
}
|
|
|
|
/// Tests the hybrid NFA/DFA when 'starts_for_each_pattern' is enabled.
|
|
#[test]
|
|
fn starts_for_each_pattern() -> Result<()> {
|
|
let mut builder = Regex::builder();
|
|
builder.dfa(DFA::config().starts_for_each_pattern(true));
|
|
TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
|
|
Ok(())
|
|
}
|
|
|
|
/// Tests the hybrid NFA/DFA when byte classes are disabled.
|
|
///
|
|
/// N.B. Disabling byte classes doesn't avoid any indirection at search time.
|
|
/// All it does is cause every byte value to be its own distinct equivalence
|
|
/// class.
|
|
#[test]
|
|
fn no_byte_classes() -> Result<()> {
|
|
let mut builder = Regex::builder();
|
|
builder.dfa(DFA::config().byte_classes(false));
|
|
TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
|
|
Ok(())
|
|
}
|
|
|
|
/// Tests that hybrid NFA/DFA never clears its cache for any test with the
|
|
/// default capacity.
|
|
///
|
|
/// N.B. If a regex suite test is added that causes the cache to be cleared,
|
|
/// then this should just skip that test. (Which can be done by calling the
|
|
/// 'blacklist' method on 'TestRunner'.)
|
|
#[test]
|
|
fn no_cache_clearing() -> Result<()> {
|
|
let mut builder = Regex::builder();
|
|
builder.dfa(DFA::config().minimum_cache_clear_count(Some(0)));
|
|
TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
|
|
Ok(())
|
|
}
|
|
|
|
/// Tests the hybrid NFA/DFA when the minimum cache capacity is set.
|
|
#[test]
|
|
fn min_cache_capacity() -> Result<()> {
|
|
let mut builder = Regex::builder();
|
|
builder
|
|
.dfa(DFA::config().cache_capacity(0).skip_cache_capacity_check(true));
|
|
TestRunner::new()?.test_iter(suite()?.iter(), compiler(builder)).assert();
|
|
Ok(())
|
|
}
|
|
|
|
fn compiler(
|
|
mut builder: regex::Builder,
|
|
) -> impl FnMut(&RegexTest, &[BString]) -> Result<CompiledRegex> {
|
|
move |test, regexes| {
|
|
let regexes = regexes
|
|
.iter()
|
|
.map(|r| r.to_str().map(|s| s.to_string()))
|
|
.collect::<std::result::Result<Vec<String>, _>>()?;
|
|
|
|
// Check if our regex contains things that aren't supported by DFAs.
|
|
// That is, Unicode word boundaries when searching non-ASCII text.
|
|
let mut thompson = thompson::Builder::new();
|
|
thompson.syntax(config_syntax(test)).configure(config_thompson(test));
|
|
if let Ok(nfa) = thompson.build_many(®exes) {
|
|
let non_ascii = test.input().iter().any(|&b| !b.is_ascii());
|
|
if nfa.has_word_boundary_unicode() && non_ascii {
|
|
return Ok(CompiledRegex::skip());
|
|
}
|
|
}
|
|
if !configure_regex_builder(test, &mut builder) {
|
|
return Ok(CompiledRegex::skip());
|
|
}
|
|
let re = builder.build_many(®exes)?;
|
|
let mut cache = re.create_cache();
|
|
Ok(CompiledRegex::compiled(move |test| -> Vec<TestResult> {
|
|
run_test(&re, &mut cache, test)
|
|
}))
|
|
}
|
|
}
|
|
|
|
fn run_test(
|
|
re: &Regex,
|
|
cache: &mut regex::Cache,
|
|
test: &RegexTest,
|
|
) -> Vec<TestResult> {
|
|
let is_match = if re.is_match(cache, test.input()) {
|
|
TestResult::matched()
|
|
} else {
|
|
TestResult::no_match()
|
|
};
|
|
let is_match = is_match.name("is_match");
|
|
|
|
let find_matches = match test.search_kind() {
|
|
TestSearchKind::Earliest => {
|
|
let it = re
|
|
.find_earliest_iter(cache, test.input())
|
|
.take(test.match_limit().unwrap_or(std::usize::MAX))
|
|
.map(|m| Match {
|
|
id: m.pattern().as_usize(),
|
|
start: m.start(),
|
|
end: m.end(),
|
|
});
|
|
TestResult::matches(it).name("find_earliest_iter")
|
|
}
|
|
TestSearchKind::Leftmost => {
|
|
let it = re
|
|
.find_leftmost_iter(cache, test.input())
|
|
.take(test.match_limit().unwrap_or(std::usize::MAX))
|
|
.map(|m| Match {
|
|
id: m.pattern().as_usize(),
|
|
start: m.start(),
|
|
end: m.end(),
|
|
});
|
|
TestResult::matches(it).name("find_leftmost_iter")
|
|
}
|
|
TestSearchKind::Overlapping => {
|
|
let it = re
|
|
.find_overlapping_iter(cache, test.input())
|
|
.take(test.match_limit().unwrap_or(std::usize::MAX))
|
|
.map(|m| Match {
|
|
id: m.pattern().as_usize(),
|
|
start: m.start(),
|
|
end: m.end(),
|
|
});
|
|
TestResult::matches(it).name("find_overlapping_iter")
|
|
}
|
|
};
|
|
vec![is_match, find_matches]
|
|
}
|
|
|
|
/// Configures the given regex builder with all relevant settings on the given
|
|
/// regex test.
|
|
///
|
|
/// If the regex test has a setting that is unsupported, then this returns
|
|
/// false (implying the test should be skipped).
|
|
fn configure_regex_builder(
|
|
test: &RegexTest,
|
|
builder: &mut regex::Builder,
|
|
) -> bool {
|
|
let match_kind = match test.match_kind() {
|
|
TestMatchKind::All => MatchKind::All,
|
|
TestMatchKind::LeftmostFirst => MatchKind::LeftmostFirst,
|
|
TestMatchKind::LeftmostLongest => return false,
|
|
};
|
|
|
|
let dense_config = DFA::config()
|
|
.anchored(test.anchored())
|
|
.match_kind(match_kind)
|
|
.unicode_word_boundary(true);
|
|
let regex_config = Regex::config().utf8(test.utf8());
|
|
builder
|
|
.configure(regex_config)
|
|
.syntax(config_syntax(test))
|
|
.thompson(config_thompson(test))
|
|
.dfa(dense_config);
|
|
true
|
|
}
|
|
|
|
/// Configuration of a Thompson NFA compiler from a regex test.
|
|
fn config_thompson(test: &RegexTest) -> thompson::Config {
|
|
thompson::Config::new().utf8(test.utf8())
|
|
}
|
|
|
|
/// Configuration of the regex parser from a regex test.
|
|
fn config_syntax(test: &RegexTest) -> SyntaxConfig {
|
|
SyntaxConfig::new()
|
|
.case_insensitive(test.case_insensitive())
|
|
.unicode(test.unicode())
|
|
.utf8(test.utf8())
|
|
}
|