diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml index 5f57862937a..e2da59838e9 100644 --- a/.github/workflows/sql-benchmarks.yml +++ b/.github/workflows/sql-benchmarks.yml @@ -377,6 +377,7 @@ jobs: --targets-json '${{ steps.targets.outputs.targets_json }}' \ --output results.json \ --no-build \ + --runner "ec2_${{ inputs.machine_type }}" \ ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \ ${{ matrix.scale_factor && format('--opt scale-factor={0}', matrix.scale_factor) || '' }} @@ -395,6 +396,7 @@ jobs: --targets-json '${{ steps.targets.outputs.targets_json }}' \ --output results.json \ --no-build \ + --runner "ec2_${{ inputs.machine_type }}" \ ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \ --opt remote-data-dir=${{ matrix.remote_storage }} \ ${{ matrix.scale_factor && format('--opt scale-factor={0}', matrix.scale_factor) || '' }} diff --git a/bench-orchestrator/bench_orchestrator/cli.py b/bench-orchestrator/bench_orchestrator/cli.py index 8ee8a7ef055..d497d85ed13 100644 --- a/bench-orchestrator/bench_orchestrator/cli.py +++ b/bench-orchestrator/bench_orchestrator/cli.py @@ -202,6 +202,10 @@ def run( str | None, typer.Option("--targets-json", help="Exact benchmark targets as a JSON array"), ] = None, + runner: Annotated[ + str | None, + typer.Option("--runner", help="Benchmark runner ID (e.g., ec2_c6id.8xlarge)"), + ] = None, output: Annotated[ Path | None, typer.Option("--output", help="Optional path for compatibility JSONL output"), @@ -289,6 +293,7 @@ def run( samply=samply, sample_rate=sample_rate, tracing=tracing, + runner=runner, on_result=lambda line, store_writer=ctx.write_raw_json, compatibility=compatibility_file: ( write_result_line( line, diff --git a/bench-orchestrator/bench_orchestrator/comparison/analyzer.py b/bench-orchestrator/bench_orchestrator/comparison/analyzer.py index 7ac62f188ff..e4dcdb1936e 100644 --- a/bench-orchestrator/bench_orchestrator/comparison/analyzer.py +++ b/bench-orchestrator/bench_orchestrator/comparison/analyzer.py @@ -10,6 +10,9 @@ import numpy as np import pandas as pd +OLD_QUERY_NAME = re.compile(r"_q(\d+)/") +NEW_QUERY_NAME = re.compile(r"/q(\d+)(?::memory|/memory)?/") + @dataclass class TargetRef: @@ -64,9 +67,14 @@ def extract_target_fields(df: pd.DataFrame) -> pd.DataFrame: # Extract query number from name if present if "name" in df.columns: - # Pattern: dataset_qNN/engine:format - pattern = r"_q(\d+)/" - df["query"] = df["name"].apply(lambda n: int(m.group(1)) if (m := re.search(pattern, str(n))) else None) + # Patterns: + # - dataset_qNN/engine:format + # - dataset/.../qNN/engine:format + df["query"] = df["name"].apply( + lambda n: int(m.group(1)) + if (m := (OLD_QUERY_NAME.search(str(n)) or NEW_QUERY_NAME.search(str(n)))) + else None + ) return df diff --git a/bench-orchestrator/bench_orchestrator/runner/executor.py b/bench-orchestrator/bench_orchestrator/runner/executor.py index 4fa87076d64..b895afdc2e1 100644 --- a/bench-orchestrator/bench_orchestrator/runner/executor.py +++ b/bench-orchestrator/bench_orchestrator/runner/executor.py @@ -39,6 +39,7 @@ def build_command( samply: bool = False, sample_rate: int | None = None, tracing: bool = False, + runner: str | None = None, ) -> list[str]: """Build the command used to execute a benchmark binary.""" cmd = [ @@ -64,6 +65,8 @@ def build_command( cmd.append("--track-memory") if tracing: cmd.append("--tracing") + if runner: + cmd.extend(["--runner", runner]) if options: for key, value in options.items(): cmd.extend(["--opt", f"{key}={value}"]) @@ -94,6 +97,7 @@ def run( samply: bool = False, sample_rate: int | None = None, tracing: bool = False, + runner: str | None = None, on_result: Callable[[str], None] | None = None, ) -> list[str]: """ @@ -123,6 +127,7 @@ def run( samply=samply, sample_rate=sample_rate, tracing=tracing, + runner=runner, ) if self.verbose: diff --git a/benchmarks-website/server.js b/benchmarks-website/server.js index a9af96234fe..1b9a5ce86b3 100644 --- a/benchmarks-website/server.js +++ b/benchmarks-website/server.js @@ -22,6 +22,8 @@ const COMMITS_URL = const REFRESH_INTERVAL = process.env.REFRESH_INTERVAL || 5 * 60 * 1000; const MAX_POINTS = 200; const USE_LOCAL_DATA = process.env.USE_LOCAL_DATA === "true"; +const LOCAL_DATA_FILE = process.env.LOCAL_DATA_FILE || null; +const LOCAL_COMMITS_FILE = process.env.LOCAL_COMMITS_FILE || null; // Benchmark groups: non-query groups + simple suites + fan-out suites const GROUPS = [ @@ -64,8 +66,50 @@ const geoMean = (arr) => ) : null; +function queryLabel(suite, queryNumber) { + return `${suite.queryPrefix} Q${queryNumber}`; +} + +function parseSqlBenchmarkId(id) { + let parts = id.split("/"); + if (parts[0]?.toLowerCase() === "memory") parts = parts.slice(1); + if (parts.length < 4) return null; + + const suitePrefix = parts[0].toLowerCase(); + const suite = QUERY_SUITES.find( + (querySuite) => querySuite.prefix.toLowerCase() === suitePrefix, + ); + if (!suite) return null; + + const querySegment = parts.at(-3); + const runner = parts.at(-2); + const seriesName = parts.at(-1); + const queryMatch = querySegment?.match(/^q(\d+)$/i); + if (!queryMatch || !runner || !seriesName?.includes(":")) return null; + + const queryNumber = parseInt(queryMatch[1], 10); + return { + suite, + queryNumber, + datasetSegments: parts.slice(1, -3), + chartName: queryLabel(suite, queryNumber), + seriesName, + sortPosition: queryNumber, + }; +} + +function scaleFactorFromBenchmarkId(parsedSqlId) { + const sfSegment = parsedSqlId.datasetSegments.find((segment) => + segment.toLowerCase().startsWith("sf_"), + ); + if (!sfSegment) return 1; + + const sf = parseFloat(sfSegment.slice(3).replace(/_/g, ".")); + return Number.isFinite(sf) ? Math.round(sf) : 1; +} + // Categorize benchmarks based on name patterns and metadata -function getGroup(benchmark) { +function getGroup(benchmark, parsedSqlId = parseSqlBenchmarkId(benchmark.name)) { const name = benchmark.name; const lower = name.toLowerCase(); @@ -105,26 +149,20 @@ function getGroup(benchmark) { return "Compression"; } - // SQL query suites: match "{prefix}_q..." or "{prefix}/..." - for (const suite of QUERY_SUITES) { - if ( - !lower.startsWith(suite.prefix + "_q") && - !lower.startsWith(suite.prefix + "/") - ) - continue; + if (parsedSqlId) { + const suite = parsedSqlId.suite; if (suite.skip) return null; if (!suite.fanOut) return suite.displayName; // Fan-out suites: expand by storage and scale factor const storage = benchmark.storage?.toUpperCase() === "S3" ? "S3" : "NVMe"; - const rawSf = benchmark.dataset?.[suite.datasetKey]?.scale_factor; - const sf = rawSf ? Math.round(parseFloat(rawSf)) : 1; + const sf = scaleFactorFromBenchmarkId(parsedSqlId); return `${suite.displayName} (${storage}) (SF=${sf})`; } return null; } -// Format query name for display: "{prefix}_q00" -> "{QUERY_PREFIX} Q0" +// Format query name for display: "{prefix}_q00" -> "{QUERY_PREFIX} Q0". function formatQuery(q) { const lower = q.toLowerCase(); for (const suite of QUERY_SUITES) { @@ -232,7 +270,14 @@ function readLocalJsonl(fp) { async function forEachBenchmark(callback) { let stream; if (USE_LOCAL_DATA) { - stream = fs.createReadStream(path.join(__dirname, "sample/data.json")); + const localDataFile = + LOCAL_DATA_FILE || + (fs.existsSync(path.join(__dirname, "sample/data.json")) + ? path.join(__dirname, "sample/data.json") + : path.join(__dirname, "sample/data.json.gz")); + stream = localDataFile.endsWith(".gz") + ? fs.createReadStream(localDataFile).pipe(zlib.createGunzip()) + : fs.createReadStream(localDataFile); } else { const res = await fetch(DATA_URL); if (!res.ok) throw new Error(`Fetch failed: ${DATA_URL} ${res.status}`); @@ -259,7 +304,9 @@ async function refresh() { try { // Load commits first (small dataset, must be fully in memory for indexing) const commitsArr = USE_LOCAL_DATA - ? await readLocalJsonl(path.join(__dirname, "sample/commits.json")) + ? await readLocalJsonl( + LOCAL_COMMITS_FILE || path.join(__dirname, "sample/commits.json"), + ) : await fetchJsonl(COMMITS_URL); // Build commit index (O(1) lookup) @@ -283,9 +330,12 @@ async function refresh() { return; } - const group = getGroup(b); + const parsedSqlId = parseSqlBenchmarkId(b.name); + const group = getGroup(b, parsedSqlId); if (!group) { - uncategorized.add(b.name.split("/")[0]); + if (!parsedSqlId?.suite.skip) { + uncategorized.add(b.name.split("/")[0]); + } return; } if (!groups[group]) return; @@ -293,17 +343,26 @@ async function refresh() { // Random access names have the form: random-access/{dataset}/{pattern}/{format} // Historical random access names: random-access/{format} // Other benchmarks use: {query}/{series} - let seriesName, chartName; + let seriesName, chartName, sortPos; const parts = b.name.split("/"); if (group === "Random Access" && parts.length === 4) { chartName = `${parts[1]}/${parts[2]}`.toUpperCase().replace(/[_-]/g, " "); seriesName = rename(parts[3] || "default"); + sortPos = 0; } else if (group === "Random Access" && parts.length === 2) { chartName = "RANDOM ACCESS"; seriesName = rename(parts[1] || "default"); + sortPos = 0; + } else if (parsedSqlId) { + seriesName = rename(parsedSqlId.seriesName); + chartName = parsedSqlId.chartName; + sortPos = parsedSqlId.sortPosition; } else { seriesName = rename(parts[1] || "default"); chartName = formatQuery(parts[0]); + sortPos = parts[0].match(/q(\d+)$/i)?.[1] + ? parseInt(RegExp.$1, 10) + : 0; } chartName = normalizeChartName(group, chartName); if (chartName.includes("PARQUET-UNC")) return; @@ -318,9 +377,6 @@ async function refresh() { else unit = "ns"; } - const sortPos = parts[0].match(/q(\d+)$/i)?.[1] - ? parseInt(RegExp.$1, 10) - : 0; const idx = commitIdx.get(commit.id); if (idx === undefined) return; diff --git a/benchmarks-website/src/config.js b/benchmarks-website/src/config.js index c6ce9060428..74bf43ea853 100644 --- a/benchmarks-website/src/config.js +++ b/benchmarks-website/src/config.js @@ -46,6 +46,7 @@ export const QUERY_SUITES = [ fanOut: true, }, { prefix: "fineweb", skip: true }, + { prefix: "gharchive", skip: true }, ]; // Pre-registered fan-out groups (storage x scale factor). diff --git a/benchmarks/datafusion-bench/src/main.rs b/benchmarks/datafusion-bench/src/main.rs index ad0df8ea74a..745d8371303 100644 --- a/benchmarks/datafusion-bench/src/main.rs +++ b/benchmarks/datafusion-bench/src/main.rs @@ -94,6 +94,9 @@ struct Args { #[arg(long, default_value_t = false)] track_memory: bool, + #[arg(long, default_value = "unknown")] + runner: String, + #[arg(long, default_value_t = false)] explain: bool, @@ -149,6 +152,7 @@ async fn main() -> anyhow::Result<()> { let mut runner = SqlBenchmarkRunner::new( &*benchmark, Engine::DataFusion, + args.runner.clone(), args.formats.clone(), args.track_memory, args.hide_progress_bar, diff --git a/benchmarks/duckdb-bench/src/main.rs b/benchmarks/duckdb-bench/src/main.rs index 7ab8f1ac7ab..d8a3306b224 100644 --- a/benchmarks/duckdb-bench/src/main.rs +++ b/benchmarks/duckdb-bench/src/main.rs @@ -64,6 +64,9 @@ struct Args { #[arg(long, default_value_t = false)] hide_progress_bar: bool, + #[arg(long, default_value = "unknown")] + runner: String, + #[arg(long, value_delimiter = ',', value_parser = value_parser!(Format))] formats: Vec, @@ -142,6 +145,7 @@ fn main() -> anyhow::Result<()> { let mut runner = SqlBenchmarkRunner::new( &*benchmark, Engine::DuckDB, + args.runner.clone(), args.formats.clone(), args.track_memory, args.hide_progress_bar, diff --git a/benchmarks/lance-bench/src/main.rs b/benchmarks/lance-bench/src/main.rs index 73fa8426ffe..6cce97d2548 100644 --- a/benchmarks/lance-bench/src/main.rs +++ b/benchmarks/lance-bench/src/main.rs @@ -65,6 +65,9 @@ struct Args { #[arg(long, default_value_t = false)] track_memory: bool, + #[arg(long, default_value = "unknown")] + runner: String, + #[arg(long = "opt", value_delimiter = ',', value_parser = value_parser!(Opt))] options: Vec, } @@ -93,6 +96,7 @@ async fn main() -> anyhow::Result<()> { let mut runner = SqlBenchmarkRunner::new( &*benchmark, Engine::DataFusion, + args.runner.clone(), vec![Format::Lance], args.track_memory, args.hide_progress_bar, diff --git a/scripts/compare-benchmark-jsons.py b/scripts/compare-benchmark-jsons.py index 44514053fad..8e6a36eca20 100644 --- a/scripts/compare-benchmark-jsons.py +++ b/scripts/compare-benchmark-jsons.py @@ -10,9 +10,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright the Vortex contributors +import argparse import math -import re -import sys from dataclasses import dataclass from typing import Any @@ -20,7 +19,7 @@ import pandas as pd # Analysis overview: -# - Join base and PR benchmark rows on benchmark identity. +# - Join base and PR benchmark rows on canonical benchmark name. # - Use log-ratios because benchmark slowdowns/speedups are multiplicative. # - Treat parquet rows as controls to estimate systemic drift beta(q). # - Attribute the remaining change to the PR as alpha(q, c). @@ -63,21 +62,33 @@ def extract_dataset_key(df: pd.DataFrame) -> pd.DataFrame: return df +def unknown_target_fields() -> pd.Series: + return pd.Series({"engine": "unknown", "file_format": "unknown", "query": pd.NA}) + + def extract_target_fields(name: str) -> pd.Series: - """Parse query, engine, and format from the benchmark name.""" + """Parse query, engine, and format from a canonical SQL benchmark name.""" if not isinstance(name, str): - return pd.Series({"engine": "unknown", "file_format": "unknown", "query": pd.NA}) + return unknown_target_fields() - match = re.search(r"_q(\d+)/([^:]+):(.+)$", name) - if match is None: - return pd.Series({"engine": "unknown", "file_format": "unknown", "query": pd.NA}) + parts = name.split("/") + if parts and parts[0].lower() == "memory": + parts = parts[1:] + if len(parts) < 4: + return unknown_target_fields() + query = parts[-3] + target = parts[-1] + if not query.lower().startswith("q") or not query[1:].isdigit() or ":" not in target: + return unknown_target_fields() + + engine, file_format = target.split(":", 1) return pd.Series( { - "engine": match.group(2), - "file_format": match.group(3), - "query": int(match.group(1)), + "engine": engine.lower(), + "file_format": file_format.lower(), + "query": int(query[1:]), } ) @@ -475,13 +486,24 @@ def group_sort_key(group_key: tuple[str, str]) -> tuple[int, int, str, str]: ) +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Compare two benchmark JSONL files and render a PR comment." + ) + parser.add_argument("base_json", help="Base benchmark JSONL file") + parser.add_argument("pr_json", help="PR benchmark JSONL file") + parser.add_argument("benchmark_name", nargs="?", default="") + return parser.parse_args() + + def main() -> None: """Render the benchmark comparison markdown used in CI PR comments.""" - benchmark_name = sys.argv[3] if len(sys.argv) > 3 else "" + args = parse_args() + benchmark_name = args.benchmark_name - base = pd.read_json(sys.argv[1], lines=True) - pr = pd.read_json(sys.argv[2], lines=True) + base = pd.read_json(args.base_json, lines=True) + pr = pd.read_json(args.pr_json, lines=True) base_commit_id = set(base["commit_id"].unique()) pr_commit_id = set(pr["commit_id"].unique()) @@ -498,7 +520,13 @@ def main() -> None: base = extract_dataset_key(base) pr = extract_dataset_key(pr) - df3 = pd.merge(base, pr, on=["name", "storage", "dataset_key"], how="right", suffixes=("_base", "_pr")) + df3 = pd.merge( + base, + pr, + on=["name", "storage", "dataset_key"], + how="right", + suffixes=("_base", "_pr"), + ) df3["ratio"] = df3["value_pr"] / df3["value_base"] df3[["engine", "file_format", "query"]] = df3["name"].apply(extract_target_fields) diff --git a/vortex-bench/src/datasets/mod.rs b/vortex-bench/src/datasets/mod.rs index e7f53156500..709579a8569 100644 --- a/vortex-bench/src/datasets/mod.rs +++ b/vortex-bench/src/datasets/mod.rs @@ -2,6 +2,7 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use std::fmt::Display; +use std::path::PathBuf; use anyhow::Result; use async_trait::async_trait; @@ -19,7 +20,7 @@ pub mod struct_list_of_ints; pub mod taxi_data; pub mod tpch_l_comment; -use std::path::PathBuf; +pub(crate) const DEFAULT_BENCHMARK_RUNNER_ID: &str = "unknown"; #[async_trait] pub trait Dataset { @@ -67,6 +68,85 @@ impl BenchmarkDataset { BenchmarkDataset::GhArchive => "gharchive", } } + + /// Return the globally unique path prefix used for query benchmark result IDs. + pub fn benchmark_id_path(&self, benchmark_runner: &str, query_idx: usize) -> String { + let runner_id = normalize_benchmark_runner_id(benchmark_runner); + let query_segment = format!("q{query_idx:02}"); + let dataset_path = match self { + BenchmarkDataset::TpcH { scale_factor } => { + format!( + "tpch/sf_{}/{query_segment}", + scale_factor_slug(scale_factor) + ) + } + BenchmarkDataset::TpcDS { scale_factor } => { + format!( + "tpcds/sf_{}/{query_segment}", + scale_factor_slug(scale_factor) + ) + } + BenchmarkDataset::ClickBench { flavor } => { + format!( + "clickbench/flavor_{}/{query_segment}", + slug(&flavor.to_string()) + ) + } + BenchmarkDataset::PublicBi { name } => { + format!("public-bi/{}/{query_segment}", slug(name)) + } + BenchmarkDataset::StatPopGen { n_rows } => { + format!("statpopgen/rows_{n_rows}/{query_segment}") + } + BenchmarkDataset::PolarSignals { n_rows } => { + format!("polarsignals/rows_{n_rows}/{query_segment}") + } + BenchmarkDataset::Fineweb => format!("fineweb/{query_segment}"), + BenchmarkDataset::GhArchive => format!("gharchive/{query_segment}"), + }; + format!("{dataset_path}/{runner_id}") + } + + pub fn benchmark_memory_id_path(&self, benchmark_runner: &str, query_idx: usize) -> String { + format!( + "memory/{}", + self.benchmark_id_path(benchmark_runner, query_idx) + ) + } +} + +pub(crate) fn normalize_benchmark_runner_id(benchmark_runner: &str) -> String { + let benchmark_runner = benchmark_runner.trim().replace('/', "_"); + if benchmark_runner.is_empty() { + DEFAULT_BENCHMARK_RUNNER_ID.to_string() + } else { + benchmark_runner + } +} + +fn scale_factor_slug(scale_factor: &str) -> String { + slug(scale_factor.strip_suffix(".0").unwrap_or(scale_factor)) +} + +fn slug(value: &str) -> String { + let mut slug = String::new(); + let mut last_was_separator = false; + + for ch in value.chars().flat_map(char::to_lowercase) { + if ch.is_ascii_alphanumeric() { + slug.push(ch); + last_was_separator = false; + } else if !last_was_separator && !slug.is_empty() { + slug.push('_'); + last_was_separator = true; + } + } + + if slug.ends_with('_') { + slug.pop(); + } + + slug } impl Display for BenchmarkDataset { diff --git a/vortex-bench/src/measurements.rs b/vortex-bench/src/measurements.rs index f49349cd95e..8e62c79753b 100644 --- a/vortex-bench/src/measurements.rs +++ b/vortex-bench/src/measurements.rs @@ -243,6 +243,7 @@ pub struct QueryMeasurement { pub query_idx: usize, pub target: Target, pub benchmark_dataset: BenchmarkDataset, + pub benchmark_runner: String, /// The storage backend against which this test was run. One of: s3, gcs, nvme. pub storage: String, pub runs: Vec, @@ -297,11 +298,12 @@ pub struct TripleJson { impl ToJson for QueryMeasurement { fn to_json(&self) -> serde_json::Value { let name = format!( - "{dataset}_q{query_idx:02}/{engine}:{format}", - dataset = self.benchmark_dataset.name(), + "{benchmark_id_path}/{engine}:{format}", + benchmark_id_path = self + .benchmark_dataset + .benchmark_id_path(&self.benchmark_runner, self.query_idx), engine = self.target.engine, format = self.target.format.name(), - query_idx = self.query_idx ); let host = Triple::host(); @@ -430,6 +432,7 @@ pub struct MemoryMeasurement { pub query_idx: usize, pub target: Target, pub benchmark_dataset: BenchmarkDataset, + pub benchmark_runner: String, pub storage: String, pub physical_memory_delta: i64, pub virtual_memory_delta: i64, @@ -442,6 +445,7 @@ impl MemoryMeasurement { query_idx: usize, target: Target, benchmark_dataset: BenchmarkDataset, + benchmark_runner: String, storage: String, memory_result: MemoryMeasurementResult, ) -> Self { @@ -449,6 +453,7 @@ impl MemoryMeasurement { query_idx, target, benchmark_dataset, + benchmark_runner, storage, physical_memory_delta: memory_result.physical_memory_delta, virtual_memory_delta: memory_result.virtual_memory_delta, @@ -461,11 +466,12 @@ impl MemoryMeasurement { impl ToJson for MemoryMeasurement { fn to_json(&self) -> serde_json::Value { let name = format!( - "{dataset}_q{query_idx:02}_memory/{engine}:{format}", - dataset = self.benchmark_dataset.name(), + "{benchmark_id_path}/{engine}:{format}", + benchmark_id_path = self + .benchmark_dataset + .benchmark_memory_id_path(&self.benchmark_runner, self.query_idx), engine = self.target.engine, format = self.target.format.name(), - query_idx = self.query_idx ); let host = Triple::host(); @@ -515,3 +521,58 @@ pub struct MemoryMeasurementJson { pub target: Target, pub env_triple: TripleJson, } + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use super::*; + use crate::memory::MemoryMeasurementResult; + + #[test] + fn query_measurement_json_name_uses_runner_prefixed_path() { + let measurement = QueryMeasurement { + query_idx: 1, + target: Target::new(Engine::DataFusion, Format::OnDiskVortex), + benchmark_dataset: BenchmarkDataset::TpcH { + scale_factor: "1.0".to_string(), + }, + benchmark_runner: "ec2_c6id.8xlarge".to_string(), + storage: "nvme".to_string(), + runs: vec![Duration::from_nanos(10)], + }; + + let json = measurement.to_json(); + + assert_eq!( + json.get("name").and_then(serde_json::Value::as_str), + Some("tpch/sf_1/q01/ec2_c6id.8xlarge/datafusion:vortex-file-compressed") + ); + } + + #[test] + fn memory_measurement_json_name_uses_runner_prefixed_path() { + let measurement = MemoryMeasurement::new( + 2, + Target::new(Engine::DuckDB, Format::Parquet), + BenchmarkDataset::TpcH { + scale_factor: "1.0".to_string(), + }, + "ec2_c6id.8xlarge".to_string(), + "nvme".to_string(), + MemoryMeasurementResult { + physical_memory_delta: 1, + virtual_memory_delta: 2, + peak_physical_memory: 3, + peak_virtual_memory: 4, + }, + ); + + let json = measurement.to_json(); + + assert_eq!( + json.get("name").and_then(serde_json::Value::as_str), + Some("memory/tpch/sf_1/q02/ec2_c6id.8xlarge/duckdb:parquet") + ); + } +} diff --git a/vortex-bench/src/runner.rs b/vortex-bench/src/runner.rs index dc7d729232e..3885bace2af 100644 --- a/vortex-bench/src/runner.rs +++ b/vortex-bench/src/runner.rs @@ -20,6 +20,8 @@ use crate::BenchmarkDataset; use crate::Engine; use crate::Format; use crate::Target; +use crate::datasets::DEFAULT_BENCHMARK_RUNNER_ID; +use crate::datasets::normalize_benchmark_runner_id; /// Controls whether queries are benchmarked or explained. pub enum BenchmarkMode { @@ -66,6 +68,7 @@ pub struct BenchmarkResults { pub struct SqlBenchmarkRunner { engine: Engine, benchmark_dataset: BenchmarkDataset, + benchmark_runner: String, storage: String, expected_row_counts: Option>, /// Deduplicated, preserving insertion order. @@ -81,6 +84,7 @@ impl SqlBenchmarkRunner { pub fn new( benchmark: &B, engine: Engine, + benchmark_runner: String, formats: impl IntoIterator, track_memory: bool, hide_progress_bar: bool, @@ -88,12 +92,15 @@ impl SqlBenchmarkRunner { let mut seen = HashSet::new(); let formats: Vec = formats.into_iter().filter(|f| seen.insert(*f)).collect(); let storage = url_scheme_to_storage(benchmark.data_url())?; + let benchmark_runner = normalize_benchmark_runner_id(&benchmark_runner); + validate_benchmark_runner_id(&benchmark_runner, is_ci())?; let memory_tracker = track_memory.then(BenchmarkMemoryTracker::new); Ok(Self { engine, benchmark_dataset: benchmark.dataset(), + benchmark_runner, storage, expected_row_counts: benchmark.expected_row_counts().map(|s| s.to_vec()), formats, @@ -168,6 +175,7 @@ impl SqlBenchmarkRunner { query_idx, target, benchmark_dataset: self.benchmark_dataset.clone(), + benchmark_runner: self.benchmark_runner.clone(), storage: self.storage.clone(), runs, }); @@ -192,6 +200,7 @@ impl SqlBenchmarkRunner { query_idx, target, self.benchmark_dataset.clone(), + self.benchmark_runner.clone(), self.storage.clone(), memory_result, )); @@ -411,6 +420,18 @@ impl SqlBenchmarkRunner { } } +fn is_ci() -> bool { + matches!(std::env::var("CI").as_deref(), Ok("true")) +} + +fn validate_benchmark_runner_id(benchmark_runner: &str, is_ci: bool) -> anyhow::Result<()> { + anyhow::ensure!( + !is_ci || benchmark_runner != DEFAULT_BENCHMARK_RUNNER_ID, + "benchmark runner must not be unknown in CI; pass --runner" + ); + Ok(()) +} + pub fn export_results( queries: Vec, memory: Vec, @@ -460,3 +481,23 @@ pub fn filter_queries( }) .collect() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn ci_rejects_unknown_benchmark_runner() { + assert!(validate_benchmark_runner_id("unknown", true).is_err()); + } + + #[test] + fn ci_accepts_explicit_benchmark_runner() { + assert!(validate_benchmark_runner_id("ec2_c6id.8xlarge", true).is_ok()); + } + + #[test] + fn local_accepts_unknown_benchmark_runner() { + assert!(validate_benchmark_runner_id("unknown", false).is_ok()); + } +}