Skip to content

Commit 673c244

Browse files
committed
Fix random-access benchmark console display
Signed-off-by: Robert Kruszewski <github@robertk.io>
1 parent 24a6ec9 commit 673c244

5 files changed

Lines changed: 385 additions & 67 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

benchmarks/random-access-bench/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ indicatif = { workspace = true }
2121
lance-bench = { path = "../lance-bench", optional = true }
2222
rand = { workspace = true }
2323
rand_distr = { workspace = true }
24+
tabled = { workspace = true }
2425
tokio = { workspace = true, features = ["full"] }
2526
vortex-bench = { workspace = true }
2627

benchmarks/random-access-bench/src/main.rs

Lines changed: 72 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ use vortex_bench::datasets::nested_structs::NestedStructsData;
2424
use vortex_bench::datasets::taxi_data::TaxiData;
2525
use vortex_bench::display::DisplayFormat;
2626
use vortex_bench::display::print_measurements_json;
27-
use vortex_bench::display::render_table;
2827
use vortex_bench::measurements::TimingMeasurement;
2928
use vortex_bench::random_access::BenchDataset;
3029
use vortex_bench::random_access::ParquetRandomAccessor;
@@ -34,13 +33,18 @@ use vortex_bench::setup_logging_and_tracing;
3433
use vortex_bench::utils::constants::STORAGE_NVME;
3534
use vortex_bench::v3;
3635

36+
use crate::render::RandomAccessRun;
37+
use crate::render::render_random_access_table;
38+
39+
mod render;
40+
3741
// ---------------------------------------------------------------------------
3842
// Access patterns
3943
// ---------------------------------------------------------------------------
4044

4145
/// Access pattern for random access benchmarks.
42-
#[derive(Clone, Copy, Debug)]
43-
enum AccessPattern {
46+
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
47+
pub enum AccessPattern {
4448
/// Multiple clusters of sequential indices scattered across the dataset,
4549
/// simulating workloads with spatial locality (e.g. scanning nearby records).
4650
Correlated,
@@ -225,15 +229,17 @@ async fn main() -> Result<()> {
225229
/// collecting timing for each run. When `reopen` is true, the accessor is
226230
/// recreated from scratch before each iteration so that file metadata
227231
/// parsing is included in the timing.
232+
#[expect(clippy::too_many_arguments)]
228233
async fn benchmark_random_access(
229234
dataset: &dyn BenchDataset,
230235
format: Format,
231236
measurement_name: &str,
237+
pattern: Option<AccessPattern>,
232238
indices: &[u64],
233239
time_limit_secs: u64,
234240
storage: &str,
235241
reopen: bool,
236-
) -> Result<TimingMeasurement> {
242+
) -> Result<RandomAccessRun> {
237243
let time_limit = Duration::from_secs(time_limit_secs);
238244
let overall_start = Instant::now();
239245
let mut runs = Vec::new();
@@ -253,14 +259,31 @@ async fn benchmark_random_access(
253259
}
254260
}
255261

256-
Ok(TimingMeasurement {
262+
let timing = TimingMeasurement {
257263
name: measurement_name.to_string(),
258264
storage: storage.to_string(),
259265
target: Target::new(format_to_engine(format), format),
260266
runs,
267+
};
268+
Ok(RandomAccessRun {
269+
display_name: display_name(dataset.name(), pattern),
270+
dataset: dataset.name().to_string(),
271+
pattern,
272+
reopen,
273+
timing,
261274
})
262275
}
263276

277+
/// Row label for the table view. Format is implied by the column header, so
278+
/// it is omitted from the row label even though it stays in the
279+
/// [`TimingMeasurement::name`] used for JSON back-compat.
280+
fn display_name(dataset: &str, pattern: Option<AccessPattern>) -> String {
281+
match pattern {
282+
Some(p) => format!("random-access/{}/{}", dataset, p.name()),
283+
None => format!("random-access/{}", dataset),
284+
}
285+
}
286+
264287
/// Build a measurement name for a benchmark run.
265288
///
266289
/// For taxi (legacy), the name is `random-access/{format}-tokio-local-disk` to preserve
@@ -287,19 +310,13 @@ fn v3_random_access_dataset_name(dataset: &str, pattern: Option<AccessPattern>)
287310
}
288311
}
289312

290-
fn push_v3_random_access_record(
291-
records: &mut Vec<v3::V3Record>,
292-
measurement: &TimingMeasurement,
293-
dataset: &str,
294-
pattern: Option<AccessPattern>,
295-
reopen: bool,
296-
) {
297-
if reopen {
313+
fn push_v3_random_access_record(records: &mut Vec<v3::V3Record>, run: &RandomAccessRun) {
314+
if run.reopen {
298315
return;
299316
}
300317

301-
let dataset = v3_random_access_dataset_name(dataset, pattern);
302-
records.push(v3::random_access_record(measurement, &dataset));
318+
let dataset = v3_random_access_dataset_name(&run.dataset, run.pattern);
319+
records.push(v3::random_access_record(&run.timing, &dataset));
303320
}
304321

305322
/// Map format to the appropriate engine for random access benchmarks.
@@ -313,13 +330,6 @@ fn format_to_engine(format: Format) -> Engine {
313330
}
314331
}
315332

316-
fn table_targets(formats: &[Format]) -> Vec<Target> {
317-
formats
318-
.iter()
319-
.map(|format| Target::new(format_to_engine(*format), *format))
320-
.collect()
321-
}
322-
323333
/// Open a random accessor for any supported format.
324334
///
325335
/// For Vortex and Parquet, the path comes from [`BenchDataset::path`].
@@ -392,10 +402,11 @@ async fn run_random_access(
392402
.sum();
393403
let progress = ProgressBar::new(total_steps as u64);
394404

395-
let targets = table_targets(&formats);
396-
let mut measurements = Vec::new();
405+
let mut runs: Vec<RandomAccessRun> = Vec::new();
397406
let mut v3_records: Vec<v3::V3Record> = Vec::new();
398407

408+
// Iteration order matters for the table renderer: row order is set by the
409+
// first time each `(dataset, pattern)` pair is observed.
399410
for dataset in datasets {
400411
for format in &formats {
401412
if dataset.name() == "taxi" {
@@ -406,25 +417,20 @@ async fn run_random_access(
406417
} else {
407418
name.clone()
408419
};
409-
let measurement = benchmark_random_access(
420+
let run = benchmark_random_access(
410421
dataset.as_ref(),
411422
*format,
412423
&bench_name,
424+
None,
413425
&FIXED_TAXI_INDICES,
414426
time_limit,
415427
STORAGE_NVME,
416428
reopen,
417429
)
418430
.await?;
419431

420-
push_v3_random_access_record(
421-
&mut v3_records,
422-
&measurement,
423-
dataset.name(),
424-
None,
425-
reopen,
426-
);
427-
measurements.push(measurement);
432+
push_v3_random_access_record(&mut v3_records, &run);
433+
runs.push(run);
428434
progress.inc(1);
429435
}
430436
}
@@ -438,25 +444,20 @@ async fn run_random_access(
438444
} else {
439445
name.clone()
440446
};
441-
let measurement = benchmark_random_access(
447+
let run = benchmark_random_access(
442448
dataset.as_ref(),
443449
*format,
444450
&bench_name,
451+
Some(*pattern),
445452
&indices,
446453
time_limit,
447454
STORAGE_NVME,
448455
reopen,
449456
)
450457
.await?;
451458

452-
push_v3_random_access_record(
453-
&mut v3_records,
454-
&measurement,
455-
dataset.name(),
456-
Some(*pattern),
457-
reopen,
458-
);
459-
measurements.push(measurement);
459+
push_v3_random_access_record(&mut v3_records, &run);
460+
runs.push(run);
460461
progress.inc(1);
461462
}
462463
}
@@ -473,10 +474,11 @@ async fn run_random_access(
473474

474475
match display_format {
475476
DisplayFormat::Table => {
476-
render_table(&mut writer, measurements, &targets)?;
477+
render_random_access_table(&mut writer, &runs, &formats, reopen_variants)?;
477478
}
478479
DisplayFormat::GhJson => {
479-
print_measurements_json(&mut writer, measurements)?;
480+
let timings: Vec<TimingMeasurement> = runs.into_iter().map(|r| r.timing).collect();
481+
print_measurements_json(&mut writer, timings)?;
480482
}
481483
}
482484

@@ -500,30 +502,33 @@ mod tests {
500502
);
501503
}
502504

505+
fn fake_run(dataset: &str, pattern: Option<AccessPattern>, reopen: bool) -> RandomAccessRun {
506+
RandomAccessRun {
507+
timing: TimingMeasurement {
508+
name: format!("random-access/{dataset}/parquet-tokio-local-disk"),
509+
target: Target::new(Engine::Arrow, Format::Parquet),
510+
storage: STORAGE_NVME.to_string(),
511+
runs: vec![Duration::from_nanos(10)],
512+
},
513+
dataset: dataset.to_string(),
514+
pattern,
515+
reopen,
516+
display_name: display_name(dataset, pattern),
517+
}
518+
}
519+
503520
#[test]
504521
fn v3_random_access_records_skip_reopen_variants() {
505-
let measurement = TimingMeasurement {
506-
name: "random-access/taxi/uniform/parquet-tokio-local-disk".to_string(),
507-
target: Target::new(Engine::Arrow, Format::Parquet),
508-
storage: STORAGE_NVME.to_string(),
509-
runs: vec![Duration::from_nanos(10)],
510-
};
511522
let mut records = Vec::new();
512523

513-
push_v3_random_access_record(&mut records, &measurement, "taxi", None, false);
524+
push_v3_random_access_record(&mut records, &fake_run("taxi", None, false));
514525
push_v3_random_access_record(
515526
&mut records,
516-
&measurement,
517-
"taxi",
518-
Some(AccessPattern::Uniform),
519-
false,
527+
&fake_run("taxi", Some(AccessPattern::Uniform), false),
520528
);
521529
push_v3_random_access_record(
522530
&mut records,
523-
&measurement,
524-
"taxi",
525-
Some(AccessPattern::Correlated),
526-
true,
531+
&fake_run("taxi", Some(AccessPattern::Correlated), true),
527532
);
528533

529534
assert_eq!(records.len(), 2);
@@ -538,15 +543,15 @@ mod tests {
538543
}
539544

540545
#[test]
541-
fn table_targets_has_one_column_per_format() {
542-
let targets = table_targets(&[Format::Parquet, Format::OnDiskVortex]);
543-
546+
fn display_name_drops_format_extension() {
547+
assert_eq!(display_name("taxi", None), "random-access/taxi");
548+
assert_eq!(
549+
display_name("taxi", Some(AccessPattern::Uniform)),
550+
"random-access/taxi/uniform"
551+
);
544552
assert_eq!(
545-
targets,
546-
vec![
547-
Target::new(Engine::Arrow, Format::Parquet),
548-
Target::new(Engine::Vortex, Format::OnDiskVortex),
549-
]
553+
display_name("feature-vectors", Some(AccessPattern::Correlated)),
554+
"random-access/feature-vectors/correlated"
550555
);
551556
}
552557
}

0 commit comments

Comments
 (0)