Skip to content

Commit db4aa4d

Browse files
author
Gunter Schmidt
committed
feat: Criterion Benchmark
1 parent 6f082c6 commit db4aa4d

2 files changed

Lines changed: 293 additions & 2 deletions

File tree

Cargo.toml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@ same-file = "1.0.6"
2323
unicode-width = "0.2.0"
2424

2525
[dev-dependencies]
26-
pretty_assertions = "1.4.0"
2726
assert_cmd = "2.0.14"
27+
criterion = { version = "0.8.2", features = ["html_reports"] }
28+
pretty_assertions = "1.4.0"
2829
predicates = "3.1.0"
29-
tempfile = "3.10.1"
30+
rand = "0.10.0"
31+
tempfile = "3.26.0"
3032

3133
[profile.release]
3234
lto = "thin"
@@ -40,3 +42,8 @@ panic = "abort"
4042
[profile.dist]
4143
inherits = "release"
4244
lto = "thin"
45+
46+
[[bench]]
47+
name = "bench_cmp"
48+
path = "benches/benchmarks-criterion.rs"
49+
harness = false

benches/benchmarks-criterion.rs

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
/// Benchmarks, currently only for cmp
2+
///
3+
/// Provides some general functions, e.g. to create files to compare in different sizes.
4+
///
5+
/// use hyperfine to benchmark against cmp
6+
/// * hyperfine -i "target/release/diffutils cmp from_file_10000000.txt to_file_10000000.txt"
7+
/// * hyperfine -i "cmp from_file_10000000.txt to_file_10000000.txt"
8+
///
9+
/// The Rust version seems twice as slow.
10+
use criterion::{criterion_group, criterion_main, Criterion};
11+
// use std::env;
12+
// use std::hint::black_box;
13+
use rand::RngExt;
14+
use std::io::{BufWriter, Write};
15+
use std::path::Path;
16+
use std::process::Command;
17+
use std::{ffi::OsString, fs::File, time::Duration};
18+
19+
const WARM_UP_TIME_MS: u64 = 500;
20+
#[allow(unused)]
21+
const MEASUREMENT_TIME_MS: u64 = 2000;
22+
23+
// file lines and .txt will be added
24+
const FROM_FILE: &str = "from_file";
25+
const TO_FILE: &str = "to_file";
26+
27+
criterion_group!(
28+
benches,
29+
bench_parser,
30+
bench_cmp // , bench_diff
31+
);
32+
criterion_main!(benches);
33+
34+
// All results are a few microseconds, so negligible.
35+
fn bench_parser(c: &mut Criterion) {
36+
let mut group = c.benchmark_group("Bench parser");
37+
38+
group.warm_up_time(Duration::from_millis(WARM_UP_TIME_MS));
39+
// group.measurement_time(Duration::from_millis(MEASUREMENT_TIME_MS));
40+
// group.sample_size(10);
41+
42+
group.bench_function("Parse cmp", |b| {
43+
b.iter(|| {
44+
cmp_parse_only(
45+
"cmd file_1.txt file_2.txt -bl --bytes=2048 --ignore-initial=100KiB:1MiB",
46+
)
47+
})
48+
});
49+
50+
group.bench_function("Parse diff", |b| {
51+
b.iter(|| diff_parse_only("diff file_1.txt file_2.txt"))
52+
});
53+
// group.bench_function("Parse error", |b| {
54+
// b.iter(|| parse_single_arg("cmd file_1.txt file_2.txt --something-unknown"))
55+
// });
56+
// group.bench_function("Parse help", |b| b.iter(|| parse_single_arg("cmd --help")));
57+
58+
group.finish();
59+
}
60+
61+
// This is the interesting part.
62+
fn bench_cmp(c: &mut Criterion) {
63+
let mut group = c.benchmark_group("Bench cmp");
64+
// uses tmp
65+
// let dir_path = tempfile::tempdir().unwrap().path();
66+
// uses current directory, the generated files are kept
67+
let dir_path = Path::new("");
68+
// let curr = env::current_dir().unwrap();
69+
// let dir_path = curr.as_path();
70+
let num_lines = 100_000;
71+
// The more differences, the faster cmp returns, as it stops after the first difference is found.
72+
let num_differences = 1;
73+
74+
group.warm_up_time(Duration::from_millis(WARM_UP_TIME_MS));
75+
// group.measurement_time(Duration::from_millis(MEASUREMENT_TIME_MS));
76+
// group.sample_size(10);
77+
78+
let (from, to) =
79+
generate_test_files(num_lines, 0, dir_path).expect("generate_test_files failed");
80+
let cmd = format!("cmp {from} {to}");
81+
let opts = str_to_args(&cmd).into_iter().peekable();
82+
let params = diffutilslib::cmp::parse_params(opts).unwrap();
83+
84+
group.bench_function(format!("cmp files unchanged, lines: {num_lines}"), |b| {
85+
b.iter(|| diffutilslib::cmp::cmp(&params).unwrap())
86+
});
87+
88+
let (from, to) = generate_test_files(num_lines, num_differences, dir_path)
89+
.expect("generate_test_files failed");
90+
let cmd = format!("cmp {from} {to} -s");
91+
let opts = str_to_args(&cmd).into_iter().peekable();
92+
let params = diffutilslib::cmp::parse_params(opts).unwrap();
93+
94+
group.bench_function(format!("cmp files changed, lines: {num_lines}"), |b| {
95+
b.iter(|| diffutilslib::cmp::cmp(&params).unwrap())
96+
});
97+
98+
group.finish();
99+
100+
// Optional bench by executing the file as cmd
101+
bench_binary_execution_cmp(c);
102+
}
103+
104+
// // This is the interesting part.
105+
// fn bench_diff(c: &mut Criterion) {
106+
// let mut group = c.benchmark_group("Bench cmp");
107+
// // uses tmp
108+
// // let dir_path = tempfile::tempdir().unwrap().path();
109+
// // uses current directory, the generated files are kept
110+
// let dir_path = Path::new("");
111+
// // let curr = env::current_dir().unwrap();
112+
// // let dir_path = curr.as_path();
113+
// let num_lines = 100_000;
114+
// // The more differences, the faster cmp returns, as it stops after the first difference is found.
115+
// let num_differences = 1;
116+
//
117+
// group.warm_up_time(Duration::from_millis(WARM_UP_TIME_MS));
118+
// // group.measurement_time(Duration::from_millis(MEASUREMENT_TIME_MS));
119+
// // group.sample_size(10);
120+
//
121+
// let (from, to) =
122+
// generate_test_files(num_lines, 0, dir_path).expect("generate_test_files failed");
123+
// let cmd = format!("diff {from} {to}");
124+
// let opts = str_to_args(&cmd).into_iter().peekable();
125+
// let params = diffutilslib::params::parse_params(opts).unwrap();
126+
//
127+
// // TODO need function because main cannot be called.
128+
// group.bench_function(format!("diff files unchanged, lines: {num_lines}"), |b| {
129+
// b.iter(|| diffutilslib::<diff>::cmp(&params).unwrap())
130+
// });
131+
//
132+
// let (from, to) = generate_test_files(num_lines, num_differences, dir_path)
133+
// .expect("generate_test_files failed");
134+
// let cmd = format!("diff {from} {to} -s");
135+
// let opts = str_to_args(&cmd).into_iter().peekable();
136+
// let params = diffutilslib::params::parse_params(opts).unwrap();
137+
//
138+
// // TODO need function because main cannot be called.
139+
// group.bench_function(format!("diff files changed, lines: {num_lines}"), |b| {
140+
// b.iter(|| diffutilslib::<diff>::cmp(&params).unwrap())
141+
// });
142+
//
143+
// group.finish();
144+
// }
145+
146+
fn cmp_parse_only(cmd: &str) -> String {
147+
let args = str_to_args(cmd).into_iter().peekable();
148+
let _params = match diffutilslib::cmp::parse_params(args) {
149+
Ok(params) => params,
150+
Err(e) => {
151+
return e.to_string();
152+
}
153+
};
154+
return "ok".to_string();
155+
}
156+
157+
fn diff_parse_only(cmd: &str) -> String {
158+
let args = str_to_args(cmd).into_iter().peekable();
159+
let _params = match diffutilslib::params::parse_params(args) {
160+
Ok(params) => params,
161+
Err(e) => {
162+
return e.to_string();
163+
}
164+
};
165+
return "ok".to_string();
166+
}
167+
168+
fn str_to_args(opt: &str) -> Vec<OsString> {
169+
let s: Vec<OsString> = opt
170+
.split(" ")
171+
.into_iter()
172+
.map(|s| OsString::from(s))
173+
.collect();
174+
175+
s
176+
}
177+
178+
/// Generates two test files for comparison.
179+
///
180+
/// Each line consists of 10 words with 5 letters, giving a line length of 60 bytes.
181+
/// If num_differences is set, '*' will be inserted between the first two words of a line,
182+
/// evenly spaced in the file. 1 will add the change in the last line, so the comparison takes longest.
183+
fn generate_test_files(
184+
lines: usize,
185+
num_differences: usize,
186+
dir: &Path,
187+
) -> std::io::Result<(String, String)> {
188+
let f1 = format!("{FROM_FILE}_{lines}.txt");
189+
let f2 = format!("{TO_FILE}_{lines}.txt");
190+
let from_path = dir.join(f1);
191+
let to_path = dir.join(f2);
192+
193+
generate_file_fast(&from_path, &to_path, lines, num_differences)?;
194+
195+
Ok((
196+
from_path.to_string_lossy().to_string(),
197+
to_path.to_string_lossy().to_string(),
198+
))
199+
}
200+
201+
// Largely Gemini AI Generated
202+
fn generate_file_fast(
203+
from_name: &Path,
204+
to_name: &Path,
205+
line_count: usize,
206+
num_differences: usize,
207+
) -> std::io::Result<()> {
208+
let file_from = File::create(from_name)?;
209+
let file_to = File::create(to_name)?;
210+
let change = if num_differences == 0 {
211+
0
212+
} else {
213+
line_count / num_differences
214+
};
215+
// Use a larger 128KB buffer for massive files
216+
let mut writer_from = BufWriter::with_capacity(128 * 1024, file_from);
217+
let mut writer_to = BufWriter::with_capacity(128 * 1024, file_to);
218+
let mut rng = rand::rng();
219+
220+
// Each line: (5 chars * 10 words) + 9 spaces + 1 newline = 60 bytes
221+
let mut line_buffer = [b' '; 60];
222+
line_buffer[59] = b'\n'; // Set the newline once at the end
223+
224+
for i in (0..line_count).rev() {
225+
// Fill only the letter positions, skipping spaces and the newline
226+
for word_idx in 0..10 {
227+
let start = word_idx * 6; // Each word + space block is 6 bytes
228+
for i in 0..5 {
229+
line_buffer[start + i] = rng.random_range(b'a'..b'z' + 1);
230+
}
231+
}
232+
233+
// Write the raw bytes directly to both files
234+
writer_from.write_all(&line_buffer)?;
235+
// make changes in the file
236+
if num_differences == 0 {
237+
writer_to.write_all(&line_buffer)?;
238+
} else {
239+
if i % change == 0 {
240+
line_buffer[5] = b'*';
241+
}
242+
writer_to.write_all(&line_buffer)?;
243+
line_buffer[5] = b' ';
244+
}
245+
}
246+
247+
writer_from.flush()?;
248+
writer_to.flush()?;
249+
250+
Ok(())
251+
}
252+
253+
#[allow(unused)]
254+
// fn bench_binary_execution(c: &mut BenchmarkGroup<'_, WallTime>) {
255+
fn bench_binary_execution_cmp(c: &mut Criterion) {
256+
c.bench_function("GNU cmp", |b| {
257+
b.iter(|| {
258+
let _status = Command::new("cmp")
259+
.arg("from_file_100000.txt")
260+
.arg("to_file_100000.txt")
261+
.arg("-s")
262+
.status()
263+
.expect("Failed to execute binary");
264+
265+
// assert!(status.success());
266+
})
267+
});
268+
269+
c.bench_function("cmp binary", |b| {
270+
b.iter(|| {
271+
let _status = Command::new("target/release/diffutils")
272+
.arg("cmp")
273+
.arg("from_file_100000.txt")
274+
.arg("to_file_100000.txt")
275+
.arg("-s")
276+
// .arg("--lines")
277+
// .arg(black_box("10000"))
278+
.status()
279+
.expect("Failed to execute binary");
280+
281+
// assert!(status.success());
282+
})
283+
});
284+
}

0 commit comments

Comments
 (0)