Skip to content

Commit 35d59ae

Browse files
committed
feat(bench): emit v3 JSONL records and dual-write to bench server
Brings the v3 emitter and CI dual-write plumbing from ct/benchmarks-v3 onto develop without the v3 server/website code. CI continues to write v2 results to S3 unchanged; v3 ingest is gated on vars.V3_INGEST_URL and `continue-on-error: true`, so when the variable is unset (or the server is unreachable) the workflow no-ops. vortex-bench: - New `vortex-bench/src/v3.rs` with one record per `kind` (`query_measurement`, `compression_time`, `compression_size`, `random_access_time`, `vector_search_run`) plus a serde-tagged `V3Record` enum, JSONL writer, and snapshot tests. - `Dataset::v3_dataset_dims()` (default `(name(), None)`) lets Public-BI map to `(public-bi, <subset>)`. - `compress`/`runner` capture per-iteration timings and provide `SqlBenchmarkRunner::v3_records()`. Benchmark binaries (`compress-bench`, `datafusion-bench`, `duckdb-bench`, `lance-bench`, `random-access-bench`, `vector-search-bench`) gain `--gh-json-v3 <path>` for JSONL emission alongside the existing `gh-json` flow. bench-orchestrator passes `--gh-json-v3` through `vx-bench run`. `scripts/post-ingest.py` reads JSONL, fills the `commit` envelope from `git show`, wraps in `{run_meta, commit, records}`, and POSTs to `/api/ingest`. Stdlib only. Workflows: - `.github/workflows/bench.yml` and `sql-benchmarks.yml` add `--gh-json-v3 results.v3.jsonl` and a follow-up "Ingest results to v3 server" step. - New `.github/workflows/v3-commit-metadata.yml` POSTs an empty envelope on every push to `develop` so the v3 `commits` dim stays populated. Files intentionally NOT brought over: anything under `benchmarks-website/`, the workspace member additions for the v3 server, and workflows depending on the v3 server crate. The v3 website ships in a follow-up PR off `ct/benchmarks-v3` once dual-write is healthy in production. Signed-off-by: Claude <noreply@anthropic.com>
1 parent 44a6367 commit 35d59ae

29 files changed

Lines changed: 1184 additions & 5 deletions

.github/workflows/bench.yml

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ jobs:
9292
VORTEX_EXPERIMENTAL_PATCHED_ARRAY: "1"
9393
FLAT_LAYOUT_INLINE_ARRAY_NODE: "1"
9494
run: |
95-
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json
95+
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json --gh-json-v3 results.v3.jsonl
9696
9797
- name: Setup AWS CLI
9898
uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6
@@ -105,6 +105,19 @@ jobs:
105105
run: |
106106
bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json
107107
108+
- name: Ingest results to v3 server
109+
if: vars.V3_INGEST_URL != ''
110+
continue-on-error: true
111+
shell: bash
112+
env:
113+
INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
114+
run: |
115+
python3 scripts/post-ingest.py results.v3.jsonl \
116+
--server "${{ vars.V3_INGEST_URL }}" \
117+
--commit-sha "${{ github.sha }}" \
118+
--benchmark-id "${{ matrix.benchmark.id }}" \
119+
--repo-url "${{ github.server_url }}/${{ github.repository }}"
120+
108121
- name: Alert incident.io
109122
if: failure()
110123
uses: ./.github/actions/alert-incident-io

.github/workflows/sql-benchmarks.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ jobs:
376376
bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \
377377
--targets-json '${{ steps.targets.outputs.targets_json }}' \
378378
--output results.json \
379+
--gh-json-v3 results.v3.jsonl \
379380
--no-build \
380381
--runner "ec2_${{ inputs.machine_type }}" \
381382
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
@@ -395,6 +396,7 @@ jobs:
395396
bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \
396397
--targets-json '${{ steps.targets.outputs.targets_json }}' \
397398
--output results.json \
399+
--gh-json-v3 results.v3.jsonl \
398400
--no-build \
399401
--runner "ec2_${{ inputs.machine_type }}" \
400402
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
@@ -499,6 +501,19 @@ jobs:
499501
run: |
500502
bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json
501503
504+
- name: Ingest results to v3 server
505+
if: inputs.mode == 'develop' && vars.V3_INGEST_URL != ''
506+
continue-on-error: true
507+
shell: bash
508+
env:
509+
INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
510+
run: |
511+
python3 scripts/post-ingest.py results.v3.jsonl \
512+
--server "${{ vars.V3_INGEST_URL }}" \
513+
--commit-sha "${{ github.sha }}" \
514+
--benchmark-id "${{ matrix.id }}" \
515+
--repo-url "${{ github.server_url }}/${{ github.repository }}"
516+
502517
- name: Upload File Sizes
503518
if: inputs.mode == 'develop' && matrix.remote_storage == null
504519
shell: bash
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Posts a v3 ingest envelope with no records on every push to develop, so the
2+
# `commits` dim stays populated even when no benchmark ran.
3+
4+
name: v3 commit metadata
5+
6+
on:
7+
push:
8+
branches: [develop]
9+
workflow_dispatch: { }
10+
11+
permissions:
12+
contents: read
13+
14+
jobs:
15+
commit-metadata:
16+
runs-on: ubuntu-latest
17+
timeout-minutes: 10
18+
steps:
19+
- uses: actions/checkout@v6
20+
with:
21+
fetch-depth: 2
22+
23+
- name: Ingest commit metadata to v3 server
24+
if: vars.V3_INGEST_URL != ''
25+
continue-on-error: true
26+
shell: bash
27+
env:
28+
INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
29+
run: |
30+
echo -n > empty.jsonl
31+
python3 scripts/post-ingest.py empty.jsonl \
32+
--server "${{ vars.V3_INGEST_URL }}" \
33+
--commit-sha "${{ github.sha }}" \
34+
--benchmark-id "commit-metadata" \
35+
--repo-url "${{ github.server_url }}/${{ github.repository }}"

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bench-orchestrator/bench_orchestrator/cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ def run(
210210
Path | None,
211211
typer.Option("--output", help="Optional path for compatibility JSONL output"),
212212
] = None,
213+
gh_json_v3: Annotated[
214+
Path | None,
215+
typer.Option("--gh-json-v3", help="Optional path for v3 JSONL records emitted by the benchmark binary"),
216+
] = None,
213217
options: Annotated[list[str] | None, typer.Option("--opt", help="Engine or benchmark specific options")] = None,
214218
) -> None:
215219
"""Run benchmarks with specified configuration."""
@@ -294,6 +298,7 @@ def run(
294298
sample_rate=sample_rate,
295299
tracing=tracing,
296300
runner=runner,
301+
gh_json_v3=gh_json_v3,
297302
on_result=lambda line, store_writer=ctx.write_raw_json, compatibility=compatibility_file: (
298303
write_result_line(
299304
line,

bench-orchestrator/bench_orchestrator/runner/executor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def build_command(
4040
sample_rate: int | None = None,
4141
tracing: bool = False,
4242
runner: str | None = None,
43+
gh_json_v3: Path | None = None,
4344
) -> list[str]:
4445
"""Build the command used to execute a benchmark binary."""
4546
cmd = [
@@ -67,6 +68,8 @@ def build_command(
6768
cmd.append("--tracing")
6869
if runner:
6970
cmd.extend(["--runner", runner])
71+
if gh_json_v3 is not None:
72+
cmd.extend(["--gh-json-v3", str(gh_json_v3)])
7073
if options:
7174
for key, value in options.items():
7275
cmd.extend(["--opt", f"{key}={value}"])
@@ -98,6 +101,7 @@ def run(
98101
sample_rate: int | None = None,
99102
tracing: bool = False,
100103
runner: str | None = None,
104+
gh_json_v3: Path | None = None,
101105
on_result: Callable[[str], None] | None = None,
102106
) -> list[str]:
103107
"""
@@ -128,6 +132,7 @@ def run(
128132
sample_rate=sample_rate,
129133
tracing=tracing,
130134
runner=runner,
135+
gh_json_v3=gh_json_v3,
131136
)
132137

133138
if self.verbose:

bench-orchestrator/tests/test_executor.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,31 @@ def test_build_command_omits_formats_for_lance_backend() -> None:
4848
assert "1,3" in cmd
4949

5050

51+
def test_build_command_includes_gh_json_v3_when_set() -> None:
52+
executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB)
53+
54+
cmd = executor.build_command(
55+
benchmark=Benchmark.TPCH,
56+
formats=[Format.PARQUET],
57+
gh_json_v3=Path("results.v3.jsonl"),
58+
)
59+
60+
assert "--gh-json-v3" in cmd
61+
flag_idx = cmd.index("--gh-json-v3")
62+
assert cmd[flag_idx + 1] == "results.v3.jsonl"
63+
64+
65+
def test_build_command_omits_gh_json_v3_when_unset() -> None:
66+
executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB)
67+
68+
cmd = executor.build_command(
69+
benchmark=Benchmark.TPCH,
70+
formats=[Format.PARQUET],
71+
)
72+
73+
assert "--gh-json-v3" not in cmd
74+
75+
5176
def test_run_streams_logs_without_counting_them(tmp_path: Path) -> None:
5277
script = tmp_path / "fake-bench.py"
5378
script.write_text(

benchmarks/compress-bench/src/main.rs

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ use vortex_bench::public_bi::PBIDataset::Euro2016;
4141
use vortex_bench::public_bi::PBIDataset::Food;
4242
use vortex_bench::public_bi::PBIDataset::HashTags;
4343
use vortex_bench::setup_logging_and_tracing_with_format;
44+
use vortex_bench::v3;
4445

4546
#[derive(Parser, Debug)]
4647
#[command(version, about, long_about = None)]
@@ -68,6 +69,10 @@ struct Args {
6869
display_format: DisplayFormat,
6970
#[arg(short, long)]
7071
output_path: Option<PathBuf>,
72+
/// Additionally write v3 JSONL records to this path. See
73+
/// `benchmarks-website/planning/02-contracts.md`.
74+
#[arg(long)]
75+
gh_json_v3: Option<PathBuf>,
7176
#[arg(long)]
7277
tracing: bool,
7378
/// Format for the primary stderr log sink. `text` is the default human-readable format;
@@ -89,6 +94,7 @@ async fn main() -> anyhow::Result<()> {
8994
args.ops,
9095
args.display_format,
9196
args.output_path,
97+
args.gh_json_v3,
9298
)
9399
.await
94100
}
@@ -114,6 +120,7 @@ async fn run_compress(
114120
ops: Vec<CompressOp>,
115121
display_format: DisplayFormat,
116122
output_path: Option<PathBuf>,
123+
gh_json_v3: Option<PathBuf>,
117124
) -> anyhow::Result<()> {
118125
let targets = formats
119126
.iter()
@@ -163,17 +170,24 @@ async fn run_compress(
163170
let progress = ProgressBar::new((datasets.len() * formats.len() * ops.len()) as u64);
164171

165172
let mut measurements = vec![];
173+
let mut v3_records: Vec<v3::V3Record> = Vec::new();
166174

167175
for dataset_handle in datasets.into_iter() {
168-
let m = run_benchmark_for_dataset(&progress, &formats, &ops, iterations, dataset_handle)
169-
.await?;
176+
let (m, mut records) =
177+
run_benchmark_for_dataset(&progress, &formats, &ops, iterations, dataset_handle)
178+
.await?;
170179
measurements.push(m);
180+
v3_records.append(&mut records);
171181
}
172182

173183
let measurements = CompressMeasurements::from_iter(measurements);
174184

175185
progress.finish();
176186

187+
if let Some(path) = gh_json_v3 {
188+
v3::write_jsonl_to_path(&path, &v3_records)?;
189+
}
190+
177191
let mut writer = create_output_writer(&display_format, output_path, BENCHMARK_ID)?;
178192

179193
match display_format {
@@ -202,8 +216,9 @@ async fn run_benchmark_for_dataset(
202216
ops: &[CompressOp],
203217
iterations: usize,
204218
dataset_handle: &dyn Dataset,
205-
) -> anyhow::Result<CompressMeasurements> {
219+
) -> anyhow::Result<(CompressMeasurements, Vec<v3::V3Record>)> {
206220
let bench_name = dataset_handle.name();
221+
let (v3_dataset, v3_variant) = dataset_handle.v3_dataset_dims();
207222
tracing::info!("Running {bench_name} benchmark");
208223

209224
// Get the parquet file path for this dataset
@@ -213,6 +228,7 @@ async fn run_benchmark_for_dataset(
213228
let mut timings = Vec::new();
214229
let mut measurements_map: HashMap<(Format, CompressOp), Duration> = HashMap::new();
215230
let mut compressed_sizes: HashMap<Format, u64> = HashMap::new();
231+
let mut v3_records: Vec<v3::V3Record> = Vec::new();
216232

217233
for format in formats {
218234
let compressor = get_compressor(*format);
@@ -228,6 +244,24 @@ async fn run_benchmark_for_dataset(
228244
)
229245
.await?;
230246
compressed_sizes.insert(*format, result.compressed_size);
247+
let all_runs_ns: Vec<u64> = result
248+
.all_runs
249+
.iter()
250+
.map(|d| u64::try_from(d.as_nanos()).unwrap_or(u64::MAX))
251+
.collect();
252+
v3_records.push(v3::compression_time_record(
253+
&result.timing,
254+
v3_dataset,
255+
v3_variant,
256+
CompressOp::Compress,
257+
all_runs_ns,
258+
));
259+
v3_records.push(v3::compression_size_record(
260+
v3_dataset,
261+
v3_variant,
262+
*format,
263+
result.compressed_size,
264+
));
231265
ratios.extend(result.ratios);
232266
timings.push(result.timing);
233267
result.time
@@ -240,6 +274,18 @@ async fn run_benchmark_for_dataset(
240274
bench_name,
241275
)
242276
.await?;
277+
let all_runs_ns: Vec<u64> = result
278+
.all_runs
279+
.iter()
280+
.map(|d| u64::try_from(d.as_nanos()).unwrap_or(u64::MAX))
281+
.collect();
282+
v3_records.push(v3::compression_time_record(
283+
&result.timing,
284+
v3_dataset,
285+
v3_variant,
286+
CompressOp::Decompress,
287+
all_runs_ns,
288+
));
243289
timings.push(result.timing);
244290
result.time
245291
}
@@ -258,5 +304,5 @@ async fn run_benchmark_for_dataset(
258304
&mut ratios,
259305
);
260306

261-
Ok(CompressMeasurements { timings, ratios })
307+
Ok((CompressMeasurements { timings, ratios }, v3_records))
262308
}

benchmarks/datafusion-bench/src/main.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use vortex_bench::runner::BenchmarkQueryResult;
4444
use vortex_bench::runner::SqlBenchmarkRunner;
4545
use vortex_bench::runner::filter_queries;
4646
use vortex_bench::setup_logging_and_tracing;
47+
use vortex_bench::v3;
4748
use vortex_datafusion::metrics::VortexMetricsFinder;
4849

4950
/// Common arguments shared across benchmarks
@@ -82,6 +83,11 @@ struct Args {
8283
#[arg(short)]
8384
output_path: Option<PathBuf>,
8485

86+
/// Additionally write v3 JSONL records to this path. See
87+
/// `benchmarks-website/planning/02-contracts.md`.
88+
#[arg(long)]
89+
gh_json_v3: Option<PathBuf>,
90+
8591
#[arg(long, default_value_t = false)]
8692
show_metrics: bool,
8793

@@ -226,6 +232,10 @@ async fn main() -> anyhow::Result<()> {
226232
print_metrics(plans.as_ref());
227233
}
228234

235+
if let Some(path) = args.gh_json_v3.as_ref() {
236+
v3::write_jsonl_to_path(path, &runner.v3_records())?;
237+
}
238+
229239
let benchmark_id = format!("datafusion-{}", benchmark.dataset_name());
230240
let writer = create_output_writer(&args.display_format, args.output_path, &benchmark_id)?;
231241
runner.export_to(&args.display_format, writer)?;

benchmarks/duckdb-bench/src/main.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use vortex_bench::runner::BenchmarkMode;
2424
use vortex_bench::runner::SqlBenchmarkRunner;
2525
use vortex_bench::runner::filter_queries;
2626
use vortex_bench::setup_logging_and_tracing;
27+
use vortex_bench::v3;
2728

2829
/// Common arguments shared across benchmarks
2930
#[derive(Parser)]
@@ -58,6 +59,11 @@ struct Args {
5859
#[arg(short)]
5960
output_path: Option<PathBuf>,
6061

62+
/// Additionally write v3 JSONL records to this path. See
63+
/// `benchmarks-website/planning/02-contracts.md`.
64+
#[arg(long)]
65+
gh_json_v3: Option<PathBuf>,
66+
6167
#[arg(long, default_value_t = false)]
6268
track_memory: bool,
6369

@@ -190,6 +196,10 @@ fn main() -> anyhow::Result<()> {
190196
)?;
191197

192198
if !args.explain {
199+
if let Some(path) = args.gh_json_v3.as_ref() {
200+
v3::write_jsonl_to_path(path, &runner.v3_records())?;
201+
}
202+
193203
let benchmark_id = format!("duckdb-{}", benchmark.dataset_name());
194204
let writer = create_output_writer(&args.display_format, args.output_path, &benchmark_id)?;
195205
runner.export_to(&args.display_format, writer)?;

0 commit comments

Comments
 (0)