Skip to content

Commit e783cb1

Browse files
committed
Port tendril benchmarks to criterion
Signed-off-by: Nico Burns <nico@nicoburns.com>
1 parent 9acddfd commit e783cb1

7 files changed

Lines changed: 245 additions & 226 deletions

File tree

tendril/Cargo.toml

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,15 @@ utf-8 = { workspace = true }
2222

2323
[dev-dependencies]
2424
rand = { workspace = true }
25+
criterion = { workspace = true }
26+
tendril = { workspace = true }
27+
28+
[[bench]]
29+
name = "futf"
30+
harness = false
31+
32+
[[bench]]
33+
name = "tendril"
34+
harness = false
35+
2536

26-
[features]
27-
bench = []

tendril/benches/futf.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
extern crate criterion;
2+
extern crate tendril;
3+
4+
use criterion::{criterion_group, criterion_main, Bencher, Criterion};
5+
use tendril::futf::classify;
6+
7+
static TEXT: &str = "
8+
All human beings are born free and equal in dignity and rights.
9+
They are endowed with reason and conscience and should act
10+
towards one another in a spirit of brotherhood.
11+
12+
Minden emberi lény szabadon születik és egyenlő méltósága és
13+
joga van. Az emberek, ésszel és lelkiismerettel bírván,
14+
egymással szemben testvéri szellemben kell hogy viseltessenek.
15+
16+
เราทุกคนเกิดมาอย่างอิสระ เราทุกคนมีความคิดและความเข้าใจเป็นของเราเอง
17+
เราทุกคนควรได้รับการปฏิบัติในทางเดียวกัน.
18+
19+
모든 인간은 태어날 때부터 자유로우며 그 존엄과 권리에 있어
20+
동등하다. 인간은 천부적으로 이성과 양심을 부여받았으며 서로
21+
형제애의 정신으로 행동하여야 한다.
22+
23+
ro remna cu se jinzi co zifre je simdu'i be le ry. nilselsi'a
24+
.e lei ry. selcru .i ry. se menli gi'e se sezmarde .i .ei
25+
jeseki'ubo ry. simyzu'e ta'i le tunba
26+
27+
ᏂᎦᏓ ᎠᏂᏴᏫ ᏂᎨᎫᏓᎸᎾ ᎠᎴ ᎤᏂᏠᏱ ᎤᎾᏕᎿ ᏚᏳᎧᏛ ᎨᏒᎢ. ᎨᏥᏁᎳ ᎤᎾᏓᏅᏖᏗ ᎠᎴ ᎤᏃᏟᏍᏗ
28+
ᎠᎴ ᏌᏊ ᎨᏒ ᏧᏂᎸᏫᏍᏓᏁᏗ ᎠᎾᏟᏅᏢ ᎠᏓᏅᏙ ᎬᏗ.";
29+
30+
// random
31+
static IXES: &[usize] = &[
32+
778, 156, 87, 604, 1216, 365, 884, 311, 469, 515, 709, 162, 871, 206, 634, 442,
33+
];
34+
35+
static BOUNDARY: &[bool] = &[
36+
false, true, true, false, false, true, true, true, true, false, false, true, true, true, false,
37+
false,
38+
];
39+
40+
fn std_utf8_check(b: &mut Bencher) {
41+
b.iter(|| {
42+
assert!(IXES
43+
.iter()
44+
.zip(BOUNDARY.iter())
45+
.all(|(&ix, &expect)| { expect == TEXT.is_char_boundary(ix) }));
46+
});
47+
}
48+
49+
// We don't expect to be as fast as is_char_boundary, because we provide more
50+
// information. But we shouldn't be tremendously slower, either. A factor of
51+
// 5-10 is expected on this text.
52+
fn futf_check(b: &mut Bencher) {
53+
b.iter(|| {
54+
assert!(IXES.iter().zip(BOUNDARY.iter()).all(|(&ix, &expect)| {
55+
expect == (classify(TEXT.as_bytes(), ix).unwrap().rewind == 0)
56+
}));
57+
});
58+
}
59+
60+
fn tendril_benchmarks(c: &mut Criterion) {
61+
c.bench_function("std_utf8_check", std_utf8_check);
62+
c.bench_function("futf_check", futf_check);
63+
}
64+
65+
criterion_group!(benches, tendril_benchmarks);
66+
criterion_main!(benches);

tendril/benches/tendril.rs

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
// // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
2+
// // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
3+
// // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
4+
// // option. This file may not be copied, modified, or distributed
5+
// // except according to those terms.
6+
7+
// use std::borrow::ToOwned;
8+
// use std::collections::hash_map::{Entry, HashMap};
9+
10+
#![allow(clippy::manual_pattern_char_comparison)]
11+
12+
extern crate criterion;
13+
extern crate tendril;
14+
use std::collections::{hash_map::Entry, HashMap};
15+
16+
use criterion::{criterion_group, criterion_main, Bencher, Criterion};
17+
use tendril::StrTendril;
18+
19+
static EN_1: &str = "Days turn to nights turn to paper into rocks into plastic";
20+
21+
static EN_2: &str = "Here the notes in my laboratory journal cease. I was able to write the last \
22+
words only with great effort. By now it was already clear to me that LSD had \
23+
been the cause of the remarkable experience of the previous Friday, for the \
24+
altered perceptions were of the same type as before, only much more intense. I \
25+
had to struggle to speak intelligibly. I asked my laboratory assistant, who was \
26+
informed of the self-experiment, to escort me home. We went by bicycle, no \
27+
automobile being available because of wartime restrictions on their use. On the \
28+
way home, my condition began to assume threatening forms. Everything in my \
29+
field of vision wavered and was distorted as if seen in a curved mirror. I also \
30+
had the sensation of being unable to move from the spot. Nevertheless, my \
31+
assistant later told me that we had traveled very rapidly. Finally, we arrived \
32+
at home safe and sound, and I was just barely capable of asking my companion to \
33+
summon our family doctor and request milk from the neighbors.\n\n\
34+
In spite of my delirious, bewildered condition, I had brief periods of clear \
35+
and effective thinking—and chose milk as a nonspecific antidote for poisoning.";
36+
37+
static KR_1: &str = "러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, 메모리-안전하고 병렬 \
38+
프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. 아직 \
39+
개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.";
40+
41+
static HTML_KR_1: &str = "<p>러스트(<a href=\"http://rust-lang.org\">Rust</a>)는 모질라(<a href=\"\
42+
https://www.mozilla.org/\">mozilla.org</a>)에서 개발하고 있는, \
43+
메모리-안전하고 병렬 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. \
44+
아직 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.</p>";
45+
46+
const SMALL_SIZE: usize = 65536;
47+
const LARGE_SIZE: usize = 1 << 20;
48+
49+
fn index_words_string(input: &str) -> HashMap<char, Vec<String>> {
50+
let mut index = HashMap::new();
51+
for word in input.split(|c| c == ' ') {
52+
if word.is_empty() {
53+
continue;
54+
}
55+
let word = word.to_owned();
56+
match index.entry(word.chars().next().unwrap()) {
57+
Entry::Occupied(mut e) => {
58+
let x: &mut Vec<String> = e.get_mut();
59+
x.push(word);
60+
},
61+
Entry::Vacant(e) => {
62+
e.insert(vec![word]);
63+
},
64+
}
65+
}
66+
index
67+
}
68+
69+
fn index_words_tendril(input: &StrTendril) -> HashMap<char, Vec<StrTendril>> {
70+
let mut index = HashMap::new();
71+
let mut t = input.clone();
72+
loop {
73+
match t.pop_front_char_run(|c| c != ' ') {
74+
None => return index,
75+
Some((_, false)) => (),
76+
Some((word, true)) => match index.entry(word.chars().next().unwrap()) {
77+
Entry::Occupied(mut e) => {
78+
e.get_mut().push(word);
79+
},
80+
Entry::Vacant(e) => {
81+
e.insert(vec![word]);
82+
},
83+
},
84+
}
85+
}
86+
}
87+
88+
fn test_correctness(txt: &str) {
89+
use std::borrow::ToOwned;
90+
use tendril::SliceExt;
91+
92+
let input_string = txt.to_owned();
93+
let count_s = index_words_string(&input_string);
94+
let mut keys: Vec<char> = count_s.keys().cloned().collect();
95+
keys.sort();
96+
97+
let input_tendril = txt.to_tendril();
98+
let count_t = index_words_tendril(&input_tendril);
99+
let mut keys_t: Vec<char> = count_t.keys().cloned().collect();
100+
keys_t.sort();
101+
102+
assert_eq!(keys, keys_t);
103+
104+
for k in &keys {
105+
let vs = &count_s[k];
106+
let vt = &count_t[k];
107+
assert_eq!(vs.len(), vt.len());
108+
assert!(vs.iter().zip(vt.iter()).all(|(s, t)| **s == **t));
109+
}
110+
}
111+
112+
fn index_words_small_string(b: &mut Bencher, txt: &str) {
113+
let mut s = String::new();
114+
while s.len() < SMALL_SIZE {
115+
s.push_str(txt);
116+
}
117+
b.iter(|| index_words_string(&s));
118+
}
119+
120+
fn index_words_small_tendril(b: &mut Bencher, txt: &str) {
121+
let mut t = StrTendril::new();
122+
while t.len() < SMALL_SIZE {
123+
t.push_slice(txt);
124+
}
125+
b.iter(|| index_words_tendril(&t));
126+
}
127+
128+
fn index_words_big_string(b: &mut Bencher, txt: &str) {
129+
let mut s = String::new();
130+
while s.len() < LARGE_SIZE {
131+
s.push_str(txt);
132+
}
133+
b.iter(|| index_words_string(&s));
134+
}
135+
136+
fn index_words_big_tendril(b: &mut Bencher, txt: &str) {
137+
let mut t = StrTendril::new();
138+
while t.len() < LARGE_SIZE {
139+
t.push_slice(txt);
140+
}
141+
b.iter(|| index_words_tendril(&t));
142+
}
143+
144+
fn run_bench_group(c: &mut Criterion, group_name: &str, txt: &str) {
145+
let mut group = c.benchmark_group(group_name);
146+
147+
test_correctness(txt);
148+
149+
group.bench_with_input("index_words_small_string", txt, index_words_small_string);
150+
group.bench_with_input("index_words_small_tendril", txt, index_words_small_tendril);
151+
group.bench_with_input("index_words_big_string", txt, index_words_big_string);
152+
group.bench_with_input("index_words_big_tendril", txt, index_words_big_tendril);
153+
}
154+
155+
fn tendril_benchmarks(c: &mut Criterion) {
156+
run_bench_group(c, "en_1", EN_1);
157+
run_bench_group(c, "en_2", EN_2);
158+
run_bench_group(c, "kr_1", KR_1);
159+
run_bench_group(c, "html_kr_1", HTML_KR_1);
160+
}
161+
162+
criterion_group!(benches, tendril_benchmarks);
163+
criterion_main!(benches);

0 commit comments

Comments
 (0)