@@ -43,7 +43,7 @@ mod render;
4343// ---------------------------------------------------------------------------
4444
4545/// Access pattern for random access benchmarks.
46- #[ derive( Clone , Copy , Debug , Hash , PartialEq , Eq ) ]
46+ #[ derive( Clone , Copy , Debug , Hash , PartialEq , Eq , ValueEnum ) ]
4747pub enum AccessPattern {
4848 /// Multiple clusters of sequential indices scattered across the dataset,
4949 /// simulating workloads with spatial locality (e.g. scanning nearby records).
@@ -62,8 +62,6 @@ impl AccessPattern {
6262 }
6363}
6464
65- const ACCESS_PATTERNS : [ AccessPattern ; 2 ] = [ AccessPattern :: Correlated , AccessPattern :: Uniform ] ;
66-
6765/// Number of clusters for the correlated pattern.
6866const NUM_CLUSTERS : usize = 5 ;
6967
@@ -190,6 +188,14 @@ struct Args {
190188 default_values_t = vec![ DatasetArg :: Taxi , DatasetArg :: FeatureVectors , DatasetArg :: NestedLists , DatasetArg :: NestedStructs ]
191189 ) ]
192190 datasets : Vec < DatasetArg > ,
191+ /// Which access patterns to benchmark.
192+ #[ arg(
193+ long,
194+ value_delimiter = ',' ,
195+ value_enum,
196+ default_values_t = vec![ AccessPattern :: Correlated , AccessPattern :: Uniform ]
197+ ) ]
198+ patterns : Vec < AccessPattern > ,
193199 /// Whether to reopen the file on each iteration, use a cached handle, or run both.
194200 #[ arg( long, value_enum, default_value_t = OpenMode :: Both ) ]
195201 open_mode : OpenMode ,
@@ -201,22 +207,22 @@ async fn main() -> Result<()> {
201207
202208 setup_logging_and_tracing ( args. verbose , args. tracing ) ?;
203209
204- let datasets : Vec < Box < dyn BenchDataset > > = args
205- . datasets
206- . into_iter ( )
207- . map ( |d| d . into_dataset ( ) )
208- . collect ( ) ;
209-
210- run_random_access (
211- & datasets ,
212- args. formats ,
213- args. time_limit ,
214- args. open_mode ,
215- args. display_format ,
216- args. output_path ,
217- args . gh_json_v3 ,
218- )
219- . await
210+ let config = RunConfig {
211+ datasets : args
212+ . datasets
213+ . into_iter ( )
214+ . map ( |d| d . into_dataset ( ) )
215+ . collect ( ) ,
216+ formats : args . formats ,
217+ patterns : args . patterns ,
218+ time_limit : args. time_limit ,
219+ open_mode : args. open_mode ,
220+ display_format : args. display_format ,
221+ output_path : args. output_path ,
222+ gh_json_v3 : args. gh_json_v3 ,
223+ } ;
224+
225+ run_random_access ( config ) . await
220226}
221227
222228// ---------------------------------------------------------------------------
@@ -379,15 +385,30 @@ const BENCHMARK_ID: &str = "random-access";
379385/// Fixed indices used by the original taxi benchmark (preserved for historical continuity).
380386const FIXED_TAXI_INDICES : [ u64 ; 6 ] = [ 10 , 11 , 12 , 13 , 100_000 , 3_000_000 ] ;
381387
382- async fn run_random_access (
383- datasets : & [ Box < dyn BenchDataset > ] ,
388+ /// Resolved configuration for a single random-access benchmark invocation.
389+ struct RunConfig {
390+ datasets : Vec < Box < dyn BenchDataset > > ,
384391 formats : Vec < Format > ,
392+ patterns : Vec < AccessPattern > ,
385393 time_limit : u64 ,
386394 open_mode : OpenMode ,
387395 display_format : DisplayFormat ,
388396 output_path : Option < PathBuf > ,
389397 gh_json_v3 : Option < PathBuf > ,
390- ) -> Result < ( ) > {
398+ }
399+
400+ async fn run_random_access ( config : RunConfig ) -> Result < ( ) > {
401+ let RunConfig {
402+ datasets,
403+ formats,
404+ patterns,
405+ time_limit,
406+ open_mode,
407+ display_format,
408+ output_path,
409+ gh_json_v3,
410+ } = config;
411+
391412 let reopen_variants: & [ bool ] = match open_mode {
392413 OpenMode :: Cached => & [ false ] ,
393414 OpenMode :: Reopen => & [ true ] ,
@@ -398,7 +419,7 @@ async fn run_random_access(
398419 . iter ( )
399420 . map ( |d| {
400421 let legacy_extra = if d. name ( ) == "taxi" { formats. len ( ) } else { 0 } ;
401- ( formats. len ( ) * ACCESS_PATTERNS . len ( ) + legacy_extra) * reopen_variants. len ( )
422+ ( formats. len ( ) * patterns . len ( ) + legacy_extra) * reopen_variants. len ( )
402423 } )
403424 . sum ( ) ;
404425 let progress = ProgressBar :: new ( total_steps as u64 ) ;
@@ -408,7 +429,7 @@ async fn run_random_access(
408429
409430 // Iteration order matters for the table renderer: row order is set by the
410431 // first time each `(dataset, pattern)` pair is observed.
411- for dataset in datasets {
432+ for dataset in & datasets {
412433 for format in & formats {
413434 if dataset. name ( ) == "taxi" {
414435 let name = measurement_name ( dataset. name ( ) , None , * format) ;
@@ -436,7 +457,7 @@ async fn run_random_access(
436457 }
437458 }
438459
439- for pattern in & ACCESS_PATTERNS {
460+ for pattern in & patterns {
440461 let indices = generate_indices ( dataset. as_ref ( ) , * pattern) ;
441462 let name = measurement_name ( dataset. name ( ) , Some ( * pattern) , * format) ;
442463 for & reopen in reopen_variants {
0 commit comments