Skip to content

Commit ffc64b6

Browse files
author
jeffhuang
committed
tr: add codspeed benchmark for ASCII range translation
Adds a divan-based benchmark suite under src/uu/tr/benches/ that codspeed can build and run, and registers uu_tr in the benchmarks workflow matrix. Each bench redirects fd 0 to a prepared file and fd 1 to /dev/null around uumain since tr only reads stdin. Covers the AVX2 ASCII-range fast path, single-char replace, multi-char table translation, and ASCII range delete, at 1/16/64 MB.
1 parent 14453fe commit ffc64b6

5 files changed

Lines changed: 148 additions & 0 deletions

File tree

.github/workflows/benchmarks.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ jobs:
4949
uu_sort,
5050
uu_split,
5151
uu_timeout,
52+
uu_tr,
5253
uu_tsort,
5354
uu_unexpand,
5455
uu_uniq,

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/uu/tr/BENCHMARKING.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,23 @@
44

55
`tr` performance is critical for large data processing pipelines. The implementation uses lookup tables for O(1) character operations.
66

7+
## CodSpeed (CI)
8+
9+
A divan-based benchmark suite at `benches/tr_bench.rs` runs in CI via
10+
[CodSpeed](https://codspeed.io/). To run it locally:
11+
12+
```shell
13+
cargo bench -p uu_tr
14+
```
15+
16+
Or to mirror the CI invocation exactly:
17+
18+
```shell
19+
cargo install cargo-codspeed
20+
cargo codspeed build -p uu_tr
21+
cargo codspeed run -p uu_tr
22+
```
23+
724
## Building
825

926
```shell

src/uu/tr/Cargo.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,18 @@ uucore = { workspace = true, features = ["fs", "signals"] }
2626
fluent = { workspace = true }
2727
bytecount = { workspace = true, features = ["runtime-dispatch-simd"] }
2828

29+
[dev-dependencies]
30+
divan = { workspace = true }
31+
tempfile = { workspace = true }
32+
uucore = { workspace = true, features = ["benchmark"] }
33+
34+
[target.'cfg(unix)'.dev-dependencies]
35+
libc = { workspace = true }
36+
2937
[[bin]]
3038
name = "tr"
3139
path = "src/main.rs"
40+
41+
[[bench]]
42+
name = "tr_bench"
43+
harness = false

src/uu/tr/benches/tr_bench.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
// This file is part of the uutils coreutils package.
2+
//
3+
// For the full copyright and license information, please view the LICENSE
4+
// file that was distributed with this source code.
5+
6+
// spell-checker:ignore aeiou
7+
8+
//! Benchmarks for `tr`.
9+
//!
10+
//! `tr` only reads stdin, so each bench redirects fd 0 onto a prepared
11+
//! input file before invoking `uumain`. fd 1 is redirected to /dev/null
12+
//! to keep benchmark output off the harness's terminal. Both fds are
13+
//! restored at the end of the benchmark.
14+
15+
use divan::{Bencher, black_box};
16+
use uu_tr::uumain;
17+
use uucore::benchmark::{run_util_function, setup_test_file, text_data};
18+
19+
#[cfg(unix)]
20+
mod stdio {
21+
use std::fs::{File, OpenOptions};
22+
use std::os::unix::io::AsRawFd;
23+
use std::path::Path;
24+
25+
/// Redirects fd 0 to read from `path` and fd 1 to /dev/null for the
26+
/// duration of `f`. Saves and restores both fds.
27+
pub fn with_redirected_stdio<R>(path: &Path, f: impl FnOnce() -> R) -> R {
28+
let saved_stdin = unsafe { libc::dup(0) };
29+
let saved_stdout = unsafe { libc::dup(1) };
30+
assert!(saved_stdin >= 0 && saved_stdout >= 0);
31+
32+
let input = File::open(path).unwrap();
33+
let devnull = OpenOptions::new().write(true).open("/dev/null").unwrap();
34+
35+
unsafe {
36+
assert!(libc::dup2(input.as_raw_fd(), 0) >= 0);
37+
assert!(libc::dup2(devnull.as_raw_fd(), 1) >= 0);
38+
}
39+
40+
let result = f();
41+
42+
unsafe {
43+
libc::dup2(saved_stdin, 0);
44+
libc::dup2(saved_stdout, 1);
45+
libc::close(saved_stdin);
46+
libc::close(saved_stdout);
47+
}
48+
result
49+
}
50+
}
51+
52+
/// ASCII lowercase->uppercase range translation.
53+
/// Exercises the AVX2 ASCII-range fast path on x86_64 hosts that
54+
/// support it, and the scalar range fallback on other targets.
55+
#[cfg(unix)]
56+
#[divan::bench(args = [1, 16, 64])]
57+
fn tr_ascii_range_lower_to_upper(bencher: Bencher, size_mb: usize) {
58+
let data = text_data::generate_by_size(size_mb, 80);
59+
let file_path = setup_test_file(&data);
60+
61+
bencher.bench(|| {
62+
stdio::with_redirected_stdio(&file_path, || {
63+
black_box(run_util_function(uumain, &["a-z", "A-Z"]));
64+
});
65+
});
66+
}
67+
68+
/// Single-character replacement. Exercises the existing
69+
/// `process_single_char_replace` SIMD path; guards against
70+
/// regressions outside the new range fast path.
71+
#[cfg(unix)]
72+
#[divan::bench(args = [1, 16, 64])]
73+
fn tr_single_char_replace(bencher: Bencher, size_mb: usize) {
74+
let data = text_data::generate_by_size(size_mb, 80);
75+
let file_path = setup_test_file(&data);
76+
77+
bencher.bench(|| {
78+
stdio::with_redirected_stdio(&file_path, || {
79+
black_box(run_util_function(uumain, &["a", "b"]));
80+
});
81+
});
82+
}
83+
84+
/// Multi-character set translation. Falls through to the
85+
/// 256-byte translation table path (no fast path applies).
86+
#[cfg(unix)]
87+
#[divan::bench(args = [1, 16, 64])]
88+
fn tr_multi_char_translate(bencher: Bencher, size_mb: usize) {
89+
let data = text_data::generate_by_size(size_mb, 80);
90+
let file_path = setup_test_file(&data);
91+
92+
bencher.bench(|| {
93+
stdio::with_redirected_stdio(&file_path, || {
94+
black_box(run_util_function(uumain, &["aeiou", "AEIOU"]));
95+
});
96+
});
97+
}
98+
99+
/// Delete an ASCII range — covers the deletion path.
100+
#[cfg(unix)]
101+
#[divan::bench(args = [1, 16, 64])]
102+
fn tr_delete_ascii_range(bencher: Bencher, size_mb: usize) {
103+
let data = text_data::generate_by_size(size_mb, 80);
104+
let file_path = setup_test_file(&data);
105+
106+
bencher.bench(|| {
107+
stdio::with_redirected_stdio(&file_path, || {
108+
black_box(run_util_function(uumain, &["-d", "a-z"]));
109+
});
110+
});
111+
}
112+
113+
fn main() {
114+
divan::main();
115+
}

0 commit comments

Comments
 (0)