Skip to content

Commit d257d09

Browse files
authored
--patterns flag for random-access-bench (#8446)
Flag to benchmark correlated or uniform vectors separately. Useful for building flamegraphs with samply Signed-off-by: Mikhail Kot <mikhail@spiraldb.com>
1 parent 7dbe5ac commit d257d09

1 file changed

Lines changed: 46 additions & 25 deletions

File tree

  • benchmarks/random-access-bench/src

benchmarks/random-access-bench/src/main.rs

Lines changed: 46 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ mod render;
4343
// ---------------------------------------------------------------------------
4444

4545
/// Access pattern for random access benchmarks.
46-
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
46+
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, ValueEnum)]
4747
pub enum AccessPattern {
4848
/// Multiple clusters of sequential indices scattered across the dataset,
4949
/// simulating workloads with spatial locality (e.g. scanning nearby records).
@@ -62,8 +62,6 @@ impl AccessPattern {
6262
}
6363
}
6464

65-
const ACCESS_PATTERNS: [AccessPattern; 2] = [AccessPattern::Correlated, AccessPattern::Uniform];
66-
6765
/// Number of clusters for the correlated pattern.
6866
const NUM_CLUSTERS: usize = 5;
6967

@@ -190,6 +188,14 @@ struct Args {
190188
default_values_t = vec![DatasetArg::Taxi, DatasetArg::FeatureVectors, DatasetArg::NestedLists, DatasetArg::NestedStructs]
191189
)]
192190
datasets: Vec<DatasetArg>,
191+
/// Which access patterns to benchmark.
192+
#[arg(
193+
long,
194+
value_delimiter = ',',
195+
value_enum,
196+
default_values_t = vec![AccessPattern::Correlated, AccessPattern::Uniform]
197+
)]
198+
patterns: Vec<AccessPattern>,
193199
/// Whether to reopen the file on each iteration, use a cached handle, or run both.
194200
#[arg(long, value_enum, default_value_t = OpenMode::Both)]
195201
open_mode: OpenMode,
@@ -201,22 +207,22 @@ async fn main() -> Result<()> {
201207

202208
setup_logging_and_tracing(args.verbose, args.tracing)?;
203209

204-
let datasets: Vec<Box<dyn BenchDataset>> = args
205-
.datasets
206-
.into_iter()
207-
.map(|d| d.into_dataset())
208-
.collect();
209-
210-
run_random_access(
211-
&datasets,
212-
args.formats,
213-
args.time_limit,
214-
args.open_mode,
215-
args.display_format,
216-
args.output_path,
217-
args.gh_json_v3,
218-
)
219-
.await
210+
let config = RunConfig {
211+
datasets: args
212+
.datasets
213+
.into_iter()
214+
.map(|d| d.into_dataset())
215+
.collect(),
216+
formats: args.formats,
217+
patterns: args.patterns,
218+
time_limit: args.time_limit,
219+
open_mode: args.open_mode,
220+
display_format: args.display_format,
221+
output_path: args.output_path,
222+
gh_json_v3: args.gh_json_v3,
223+
};
224+
225+
run_random_access(config).await
220226
}
221227

222228
// ---------------------------------------------------------------------------
@@ -379,15 +385,30 @@ const BENCHMARK_ID: &str = "random-access";
379385
/// Fixed indices used by the original taxi benchmark (preserved for historical continuity).
380386
const FIXED_TAXI_INDICES: [u64; 6] = [10, 11, 12, 13, 100_000, 3_000_000];
381387

382-
async fn run_random_access(
383-
datasets: &[Box<dyn BenchDataset>],
388+
/// Resolved configuration for a single random-access benchmark invocation.
389+
struct RunConfig {
390+
datasets: Vec<Box<dyn BenchDataset>>,
384391
formats: Vec<Format>,
392+
patterns: Vec<AccessPattern>,
385393
time_limit: u64,
386394
open_mode: OpenMode,
387395
display_format: DisplayFormat,
388396
output_path: Option<PathBuf>,
389397
gh_json_v3: Option<PathBuf>,
390-
) -> Result<()> {
398+
}
399+
400+
async fn run_random_access(config: RunConfig) -> Result<()> {
401+
let RunConfig {
402+
datasets,
403+
formats,
404+
patterns,
405+
time_limit,
406+
open_mode,
407+
display_format,
408+
output_path,
409+
gh_json_v3,
410+
} = config;
411+
391412
let reopen_variants: &[bool] = match open_mode {
392413
OpenMode::Cached => &[false],
393414
OpenMode::Reopen => &[true],
@@ -398,7 +419,7 @@ async fn run_random_access(
398419
.iter()
399420
.map(|d| {
400421
let legacy_extra = if d.name() == "taxi" { formats.len() } else { 0 };
401-
(formats.len() * ACCESS_PATTERNS.len() + legacy_extra) * reopen_variants.len()
422+
(formats.len() * patterns.len() + legacy_extra) * reopen_variants.len()
402423
})
403424
.sum();
404425
let progress = ProgressBar::new(total_steps as u64);
@@ -408,7 +429,7 @@ async fn run_random_access(
408429

409430
// Iteration order matters for the table renderer: row order is set by the
410431
// first time each `(dataset, pattern)` pair is observed.
411-
for dataset in datasets {
432+
for dataset in &datasets {
412433
for format in &formats {
413434
if dataset.name() == "taxi" {
414435
let name = measurement_name(dataset.name(), None, *format);
@@ -436,7 +457,7 @@ async fn run_random_access(
436457
}
437458
}
438459

439-
for pattern in &ACCESS_PATTERNS {
460+
for pattern in &patterns {
440461
let indices = generate_indices(dataset.as_ref(), *pattern);
441462
let name = measurement_name(dataset.name(), Some(*pattern), *format);
442463
for &reopen in reopen_variants {

0 commit comments

Comments
 (0)