@@ -24,7 +24,6 @@ use vortex_bench::datasets::nested_structs::NestedStructsData;
2424use vortex_bench:: datasets:: taxi_data:: TaxiData ;
2525use vortex_bench:: display:: DisplayFormat ;
2626use vortex_bench:: display:: print_measurements_json;
27- use vortex_bench:: display:: render_table;
2827use vortex_bench:: measurements:: TimingMeasurement ;
2928use vortex_bench:: random_access:: BenchDataset ;
3029use vortex_bench:: random_access:: ParquetRandomAccessor ;
@@ -34,13 +33,18 @@ use vortex_bench::setup_logging_and_tracing;
3433use vortex_bench:: utils:: constants:: STORAGE_NVME ;
3534use vortex_bench:: v3;
3635
36+ use crate :: render:: RandomAccessRun ;
37+ use crate :: render:: render_random_access_table;
38+
39+ mod render;
40+
3741// ---------------------------------------------------------------------------
3842// Access patterns
3943// ---------------------------------------------------------------------------
4044
4145/// Access pattern for random access benchmarks.
42- #[ derive( Clone , Copy , Debug ) ]
43- enum AccessPattern {
46+ #[ derive( Clone , Copy , Debug , Hash , PartialEq , Eq ) ]
47+ pub enum AccessPattern {
4448 /// Multiple clusters of sequential indices scattered across the dataset,
4549 /// simulating workloads with spatial locality (e.g. scanning nearby records).
4650 Correlated ,
@@ -225,15 +229,17 @@ async fn main() -> Result<()> {
225229/// collecting timing for each run. When `reopen` is true, the accessor is
226230/// recreated from scratch before each iteration so that file metadata
227231/// parsing is included in the timing.
232+ #[ expect( clippy:: too_many_arguments) ]
228233async fn benchmark_random_access (
229234 dataset : & dyn BenchDataset ,
230235 format : Format ,
231236 measurement_name : & str ,
237+ pattern : Option < AccessPattern > ,
232238 indices : & [ u64 ] ,
233239 time_limit_secs : u64 ,
234240 storage : & str ,
235241 reopen : bool ,
236- ) -> Result < TimingMeasurement > {
242+ ) -> Result < RandomAccessRun > {
237243 let time_limit = Duration :: from_secs ( time_limit_secs) ;
238244 let overall_start = Instant :: now ( ) ;
239245 let mut runs = Vec :: new ( ) ;
@@ -253,14 +259,31 @@ async fn benchmark_random_access(
253259 }
254260 }
255261
256- Ok ( TimingMeasurement {
262+ let timing = TimingMeasurement {
257263 name : measurement_name. to_string ( ) ,
258264 storage : storage. to_string ( ) ,
259265 target : Target :: new ( format_to_engine ( format) , format) ,
260266 runs,
267+ } ;
268+ Ok ( RandomAccessRun {
269+ display_name : display_name ( dataset. name ( ) , pattern) ,
270+ dataset : dataset. name ( ) . to_string ( ) ,
271+ pattern,
272+ reopen,
273+ timing,
261274 } )
262275}
263276
277+ /// Row label for the table view. Format is implied by the column header, so
278+ /// it is omitted from the row label even though it stays in the
279+ /// [`TimingMeasurement::name`] used for JSON back-compat.
280+ fn display_name ( dataset : & str , pattern : Option < AccessPattern > ) -> String {
281+ match pattern {
282+ Some ( p) => format ! ( "random-access/{}/{}" , dataset, p. name( ) ) ,
283+ None => format ! ( "random-access/{}" , dataset) ,
284+ }
285+ }
286+
264287/// Build a measurement name for a benchmark run.
265288///
266289/// For taxi (legacy), the name is `random-access/{format}-tokio-local-disk` to preserve
@@ -287,19 +310,13 @@ fn v3_random_access_dataset_name(dataset: &str, pattern: Option<AccessPattern>)
287310 }
288311}
289312
290- fn push_v3_random_access_record (
291- records : & mut Vec < v3:: V3Record > ,
292- measurement : & TimingMeasurement ,
293- dataset : & str ,
294- pattern : Option < AccessPattern > ,
295- reopen : bool ,
296- ) {
297- if reopen {
313+ fn push_v3_random_access_record ( records : & mut Vec < v3:: V3Record > , run : & RandomAccessRun ) {
314+ if run. reopen {
298315 return ;
299316 }
300317
301- let dataset = v3_random_access_dataset_name ( dataset, pattern) ;
302- records. push ( v3:: random_access_record ( measurement , & dataset) ) ;
318+ let dataset = v3_random_access_dataset_name ( & run . dataset , run . pattern ) ;
319+ records. push ( v3:: random_access_record ( & run . timing , & dataset) ) ;
303320}
304321
305322/// Map format to the appropriate engine for random access benchmarks.
@@ -313,13 +330,6 @@ fn format_to_engine(format: Format) -> Engine {
313330 }
314331}
315332
316- fn table_targets ( formats : & [ Format ] ) -> Vec < Target > {
317- formats
318- . iter ( )
319- . map ( |format| Target :: new ( format_to_engine ( * format) , * format) )
320- . collect ( )
321- }
322-
323333/// Open a random accessor for any supported format.
324334///
325335/// For Vortex and Parquet, the path comes from [`BenchDataset::path`].
@@ -392,10 +402,11 @@ async fn run_random_access(
392402 . sum ( ) ;
393403 let progress = ProgressBar :: new ( total_steps as u64 ) ;
394404
395- let targets = table_targets ( & formats) ;
396- let mut measurements = Vec :: new ( ) ;
405+ let mut runs: Vec < RandomAccessRun > = Vec :: new ( ) ;
397406 let mut v3_records: Vec < v3:: V3Record > = Vec :: new ( ) ;
398407
408+ // Iteration order matters for the table renderer: row order is set by the
409+ // first time each `(dataset, pattern)` pair is observed.
399410 for dataset in datasets {
400411 for format in & formats {
401412 if dataset. name ( ) == "taxi" {
@@ -406,25 +417,20 @@ async fn run_random_access(
406417 } else {
407418 name. clone ( )
408419 } ;
409- let measurement = benchmark_random_access (
420+ let run = benchmark_random_access (
410421 dataset. as_ref ( ) ,
411422 * format,
412423 & bench_name,
424+ None ,
413425 & FIXED_TAXI_INDICES ,
414426 time_limit,
415427 STORAGE_NVME ,
416428 reopen,
417429 )
418430 . await ?;
419431
420- push_v3_random_access_record (
421- & mut v3_records,
422- & measurement,
423- dataset. name ( ) ,
424- None ,
425- reopen,
426- ) ;
427- measurements. push ( measurement) ;
432+ push_v3_random_access_record ( & mut v3_records, & run) ;
433+ runs. push ( run) ;
428434 progress. inc ( 1 ) ;
429435 }
430436 }
@@ -438,25 +444,20 @@ async fn run_random_access(
438444 } else {
439445 name. clone ( )
440446 } ;
441- let measurement = benchmark_random_access (
447+ let run = benchmark_random_access (
442448 dataset. as_ref ( ) ,
443449 * format,
444450 & bench_name,
451+ Some ( * pattern) ,
445452 & indices,
446453 time_limit,
447454 STORAGE_NVME ,
448455 reopen,
449456 )
450457 . await ?;
451458
452- push_v3_random_access_record (
453- & mut v3_records,
454- & measurement,
455- dataset. name ( ) ,
456- Some ( * pattern) ,
457- reopen,
458- ) ;
459- measurements. push ( measurement) ;
459+ push_v3_random_access_record ( & mut v3_records, & run) ;
460+ runs. push ( run) ;
460461 progress. inc ( 1 ) ;
461462 }
462463 }
@@ -473,10 +474,11 @@ async fn run_random_access(
473474
474475 match display_format {
475476 DisplayFormat :: Table => {
476- render_table ( & mut writer, measurements , & targets ) ?;
477+ render_random_access_table ( & mut writer, & runs , & formats , reopen_variants ) ?;
477478 }
478479 DisplayFormat :: GhJson => {
479- print_measurements_json ( & mut writer, measurements) ?;
480+ let timings: Vec < TimingMeasurement > = runs. into_iter ( ) . map ( |r| r. timing ) . collect ( ) ;
481+ print_measurements_json ( & mut writer, timings) ?;
480482 }
481483 }
482484
@@ -500,30 +502,33 @@ mod tests {
500502 ) ;
501503 }
502504
505+ fn fake_run ( dataset : & str , pattern : Option < AccessPattern > , reopen : bool ) -> RandomAccessRun {
506+ RandomAccessRun {
507+ timing : TimingMeasurement {
508+ name : format ! ( "random-access/{dataset}/parquet-tokio-local-disk" ) ,
509+ target : Target :: new ( Engine :: Arrow , Format :: Parquet ) ,
510+ storage : STORAGE_NVME . to_string ( ) ,
511+ runs : vec ! [ Duration :: from_nanos( 10 ) ] ,
512+ } ,
513+ dataset : dataset. to_string ( ) ,
514+ pattern,
515+ reopen,
516+ display_name : display_name ( dataset, pattern) ,
517+ }
518+ }
519+
503520 #[ test]
504521 fn v3_random_access_records_skip_reopen_variants ( ) {
505- let measurement = TimingMeasurement {
506- name : "random-access/taxi/uniform/parquet-tokio-local-disk" . to_string ( ) ,
507- target : Target :: new ( Engine :: Arrow , Format :: Parquet ) ,
508- storage : STORAGE_NVME . to_string ( ) ,
509- runs : vec ! [ Duration :: from_nanos( 10 ) ] ,
510- } ;
511522 let mut records = Vec :: new ( ) ;
512523
513- push_v3_random_access_record ( & mut records, & measurement , "taxi" , None , false ) ;
524+ push_v3_random_access_record ( & mut records, & fake_run ( "taxi" , None , false ) ) ;
514525 push_v3_random_access_record (
515526 & mut records,
516- & measurement,
517- "taxi" ,
518- Some ( AccessPattern :: Uniform ) ,
519- false ,
527+ & fake_run ( "taxi" , Some ( AccessPattern :: Uniform ) , false ) ,
520528 ) ;
521529 push_v3_random_access_record (
522530 & mut records,
523- & measurement,
524- "taxi" ,
525- Some ( AccessPattern :: Correlated ) ,
526- true ,
531+ & fake_run ( "taxi" , Some ( AccessPattern :: Correlated ) , true ) ,
527532 ) ;
528533
529534 assert_eq ! ( records. len( ) , 2 ) ;
@@ -538,15 +543,15 @@ mod tests {
538543 }
539544
540545 #[ test]
541- fn table_targets_has_one_column_per_format ( ) {
542- let targets = table_targets ( & [ Format :: Parquet , Format :: OnDiskVortex ] ) ;
543-
546+ fn display_name_drops_format_extension ( ) {
547+ assert_eq ! ( display_name( "taxi" , None ) , "random-access/taxi" ) ;
548+ assert_eq ! (
549+ display_name( "taxi" , Some ( AccessPattern :: Uniform ) ) ,
550+ "random-access/taxi/uniform"
551+ ) ;
544552 assert_eq ! (
545- targets,
546- vec![
547- Target :: new( Engine :: Arrow , Format :: Parquet ) ,
548- Target :: new( Engine :: Vortex , Format :: OnDiskVortex ) ,
549- ]
553+ display_name( "feature-vectors" , Some ( AccessPattern :: Correlated ) ) ,
554+ "random-access/feature-vectors/correlated"
550555 ) ;
551556 }
552557}
0 commit comments