mirror of
https://git.proxmox.com/git/rustc
synced 2025-08-17 02:40:45 +00:00
60 lines
1.5 KiB
Rust
60 lines
1.5 KiB
Rust
//! Compares the performance of `UnicodeSegmentation::graphemes` with stdlib's UTF-8 scalar-based
|
|
//! `std::str::chars`.
|
|
//!
|
|
//! It is expected that `std::str::chars` is faster than `UnicodeSegmentation::graphemes` since it
|
|
//! does not consider the complexity of grapheme clusters. The question in this benchmark
|
|
//! is how much slower full unicode handling is.
|
|
|
|
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
|
|
|
|
use std::fs;
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
const FILES: &[&str] = &[
|
|
"arabic",
|
|
"english",
|
|
"hindi",
|
|
"japanese",
|
|
"korean",
|
|
"mandarin",
|
|
"russian",
|
|
"source_code",
|
|
];
|
|
|
|
#[inline(always)]
|
|
fn grapheme(text: &str) {
|
|
for c in UnicodeSegmentation::graphemes(black_box(text), true) {
|
|
black_box(c);
|
|
}
|
|
}
|
|
|
|
#[inline(always)]
|
|
fn scalar(text: &str) {
|
|
for c in black_box(text).chars() {
|
|
black_box(c);
|
|
}
|
|
}
|
|
|
|
fn bench_all(c: &mut Criterion) {
|
|
let mut group = c.benchmark_group("chars");
|
|
|
|
for file in FILES {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("grapheme", file),
|
|
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
|
|
|b, content| b.iter(|| grapheme(content)),
|
|
);
|
|
}
|
|
|
|
for file in FILES {
|
|
group.bench_with_input(
|
|
BenchmarkId::new("scalar", file),
|
|
&fs::read_to_string(format!("benches/texts/{}.txt", file)).unwrap(),
|
|
|b, content| b.iter(|| scalar(content)),
|
|
);
|
|
}
|
|
}
|
|
|
|
criterion_group!(benches, bench_all);
|
|
criterion_main!(benches);
|