Skip to content

Commit a4476f1

Browse files
AdamGSrobert3005
andauthored
Minimize default sql benchmarks suite (#8465)
## Summary This PR changes the default benchmark suite that runs when adding `action/benchmark-sql` by removing the following benchmarks from the baseline: 1. Appian and TPC-H SF=10 on S3 (two slowest benchmarks) 2. Any `duckdb:duckdb` and `vortex-compact` runs The full run is unchanged, and will still run post-merge or when using the `action/benchmark-sql-full` label. Happy to remove more things, but I think these are the least contentious targets we can start with. --------- Signed-off-by: Adam Gutglick <adam@spiraldb.com> Co-authored-by: Robert Kruszewski <github@robertk.io>
1 parent c2d2722 commit a4476f1

4 files changed

Lines changed: 248 additions & 18 deletions

File tree

.github/workflows/bench-dispatch.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,21 @@ jobs:
4747
needs: remove-sql-label
4848
uses: ./.github/workflows/sql-pr.yml
4949
secrets: inherit
50+
51+
remove-sql-full-label:
52+
runs-on: ubuntu-latest
53+
timeout-minutes: 10
54+
if: github.event.label.name == 'action/benchmark-sql-full'
55+
steps:
56+
- uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0 # v1
57+
if: github.event.pull_request.head.repo.full_name == 'vortex-data/vortex'
58+
with:
59+
labels: action/benchmark-sql-full
60+
fail_on_error: true
61+
62+
sql-full-bench:
63+
needs: remove-sql-full-label
64+
uses: ./.github/workflows/sql-pr.yml
65+
secrets: inherit
66+
with:
67+
benchmark_profile: "full"

.github/workflows/sql-benchmarks.yml

Lines changed: 204 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ on:
1313
benchmark_matrix:
1414
required: false
1515
type: string
16-
description: "JSON string containing the matrix configuration"
16+
description: "JSON string containing the full matrix configuration"
1717
default: |
1818
[
1919
{
@@ -277,6 +277,193 @@ on:
277277
"iterations": "10"
278278
}
279279
]
280+
base_benchmark_matrix:
281+
required: false
282+
type: string
283+
description: "JSON string containing the base matrix configuration"
284+
default: |
285+
[
286+
{
287+
"id": "clickbench-nvme",
288+
"subcommand": "clickbench",
289+
"name": "Clickbench on NVME",
290+
"data_formats": ["parquet", "vortex"],
291+
"pr_targets": [
292+
{"engine": "datafusion", "format": "parquet"},
293+
{"engine": "datafusion", "format": "vortex"},
294+
{"engine": "duckdb", "format": "parquet"},
295+
{"engine": "duckdb", "format": "vortex"}
296+
],
297+
"develop_targets": [
298+
{"engine": "datafusion", "format": "parquet"},
299+
{"engine": "datafusion", "format": "vortex"},
300+
{"engine": "datafusion", "format": "lance"},
301+
{"engine": "duckdb", "format": "parquet"},
302+
{"engine": "duckdb", "format": "vortex"}
303+
]
304+
},
305+
{
306+
"id": "tpch-nvme",
307+
"subcommand": "tpch",
308+
"name": "TPC-H SF=1 on NVME",
309+
"data_formats": ["parquet", "vortex"],
310+
"pr_targets": [
311+
{"engine": "datafusion", "format": "arrow"},
312+
{"engine": "datafusion", "format": "parquet"},
313+
{"engine": "datafusion", "format": "vortex"},
314+
{"engine": "duckdb", "format": "parquet"},
315+
{"engine": "duckdb", "format": "vortex"}
316+
],
317+
"develop_targets": [
318+
{"engine": "datafusion", "format": "arrow"},
319+
{"engine": "datafusion", "format": "parquet"},
320+
{"engine": "datafusion", "format": "vortex"},
321+
{"engine": "datafusion", "format": "lance"},
322+
{"engine": "duckdb", "format": "parquet"},
323+
{"engine": "duckdb", "format": "vortex"}
324+
],
325+
"scale_factor": "1.0",
326+
"iterations": "10"
327+
},
328+
{
329+
"id": "tpch-s3",
330+
"subcommand": "tpch",
331+
"name": "TPC-H SF=1 on S3",
332+
"local_dir": "vortex-bench/data/tpch/1.0",
333+
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/1.0/",
334+
"data_formats": ["parquet", "vortex"],
335+
"pr_targets": [
336+
{"engine": "datafusion", "format": "parquet"},
337+
{"engine": "datafusion", "format": "vortex"},
338+
{"engine": "duckdb", "format": "parquet"},
339+
{"engine": "duckdb", "format": "vortex"}
340+
],
341+
"develop_targets": [
342+
{"engine": "datafusion", "format": "parquet"},
343+
{"engine": "datafusion", "format": "vortex"},
344+
{"engine": "duckdb", "format": "parquet"},
345+
{"engine": "duckdb", "format": "vortex"}
346+
],
347+
"scale_factor": "1.0",
348+
"iterations": "10"
349+
},
350+
{
351+
"id": "tpch-nvme-10",
352+
"subcommand": "tpch",
353+
"name": "TPC-H SF=10 on NVME",
354+
"data_formats": ["parquet", "vortex"],
355+
"pr_targets": [
356+
{"engine": "datafusion", "format": "arrow"},
357+
{"engine": "datafusion", "format": "parquet"},
358+
{"engine": "datafusion", "format": "vortex"},
359+
{"engine": "duckdb", "format": "parquet"},
360+
{"engine": "duckdb", "format": "vortex"}
361+
],
362+
"develop_targets": [
363+
{"engine": "datafusion", "format": "arrow"},
364+
{"engine": "datafusion", "format": "parquet"},
365+
{"engine": "datafusion", "format": "vortex"},
366+
{"engine": "datafusion", "format": "lance"},
367+
{"engine": "duckdb", "format": "parquet"},
368+
{"engine": "duckdb", "format": "vortex"}
369+
],
370+
"scale_factor": "10.0",
371+
"iterations": "10"
372+
},
373+
{
374+
"id": "tpcds-nvme",
375+
"subcommand": "tpcds",
376+
"name": "TPC-DS SF=1 on NVME",
377+
"data_formats": ["parquet", "vortex"],
378+
"pr_targets": [
379+
{"engine": "datafusion", "format": "parquet"},
380+
{"engine": "datafusion", "format": "vortex"},
381+
{"engine": "duckdb", "format": "parquet"},
382+
{"engine": "duckdb", "format": "vortex"}
383+
],
384+
"develop_targets": [
385+
{"engine": "datafusion", "format": "parquet"},
386+
{"engine": "datafusion", "format": "vortex"},
387+
{"engine": "duckdb", "format": "parquet"},
388+
{"engine": "duckdb", "format": "vortex"}
389+
],
390+
"scale_factor": "1.0"
391+
},
392+
{
393+
"id": "statpopgen",
394+
"subcommand": "statpopgen",
395+
"name": "Statistical and Population Genetics",
396+
"local_dir": "vortex-bench/data/statpopgen",
397+
"data_formats": ["parquet", "vortex"],
398+
"pr_targets": [
399+
{"engine": "duckdb", "format": "parquet"},
400+
{"engine": "duckdb", "format": "vortex"}
401+
],
402+
"develop_targets": [
403+
{"engine": "duckdb", "format": "parquet"},
404+
{"engine": "duckdb", "format": "vortex"}
405+
],
406+
"scale_factor": "100"
407+
},
408+
{
409+
"id": "fineweb",
410+
"subcommand": "fineweb",
411+
"name": "FineWeb NVMe",
412+
"data_formats": ["parquet", "vortex"],
413+
"pr_targets": [
414+
{"engine": "datafusion", "format": "parquet"},
415+
{"engine": "datafusion", "format": "vortex"},
416+
{"engine": "duckdb", "format": "parquet"},
417+
{"engine": "duckdb", "format": "vortex"}
418+
],
419+
"develop_targets": [
420+
{"engine": "datafusion", "format": "parquet"},
421+
{"engine": "datafusion", "format": "vortex"},
422+
{"engine": "duckdb", "format": "parquet"},
423+
{"engine": "duckdb", "format": "vortex"}
424+
],
425+
"scale_factor": "100"
426+
},
427+
{
428+
"id": "fineweb-s3",
429+
"subcommand": "fineweb",
430+
"name": "FineWeb S3",
431+
"local_dir": "vortex-bench/data/fineweb",
432+
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/fineweb/",
433+
"data_formats": ["parquet", "vortex"],
434+
"pr_targets": [
435+
{"engine": "datafusion", "format": "parquet"},
436+
{"engine": "datafusion", "format": "vortex"},
437+
{"engine": "duckdb", "format": "parquet"},
438+
{"engine": "duckdb", "format": "vortex"}
439+
],
440+
"develop_targets": [
441+
{"engine": "datafusion", "format": "parquet"},
442+
{"engine": "datafusion", "format": "vortex"},
443+
{"engine": "duckdb", "format": "parquet"},
444+
{"engine": "duckdb", "format": "vortex"}
445+
],
446+
"scale_factor": "100"
447+
},
448+
{
449+
"id": "polarsignals",
450+
"subcommand": "polarsignals",
451+
"name": "PolarSignals Profiling",
452+
"data_formats": ["vortex"],
453+
"pr_targets": [
454+
{"engine": "datafusion", "format": "vortex"}
455+
],
456+
"develop_targets": [
457+
{"engine": "datafusion", "format": "vortex"}
458+
],
459+
"scale_factor": "1"
460+
}
461+
]
462+
benchmark_profile:
463+
required: false
464+
type: string
465+
description: "Benchmark profile to run: full or base"
466+
default: "full"
280467

281468
jobs:
282469
bench:
@@ -289,7 +476,7 @@ jobs:
289476
strategy:
290477
fail-fast: false
291478
matrix:
292-
include: ${{ fromJSON(inputs.benchmark_matrix) }}
479+
include: ${{ fromJSON(inputs.benchmark_profile == 'base' && inputs.base_benchmark_matrix || inputs.benchmark_matrix) }}
293480

294481
runs-on: >-
295482
${{ github.repository == 'vortex-data/vortex'
@@ -321,7 +508,7 @@ jobs:
321508
run: |
322509
wget -qO- https://github.com/duckdb/duckdb/releases/download/v1.5.3/duckdb_cli-linux-amd64.zip | funzip > duckdb
323510
chmod +x duckdb
324-
echo "$PWD" >> $GITHUB_PATH
511+
echo "$PWD" >> "$GITHUB_PATH"
325512
326513
- uses: ./.github/actions/system-info
327514

@@ -345,11 +532,11 @@ jobs:
345532
env:
346533
RUSTFLAGS: "-C target-cpu=native"
347534
run: |
348-
packages="--bin data-gen --bin datafusion-bench --bin duckdb-bench"
535+
packages=(--bin data-gen --bin datafusion-bench --bin duckdb-bench)
349536
if [ "${{ inputs.mode }}" != "pr" ]; then
350-
packages="$packages --bin lance-bench"
537+
packages+=(--bin lance-bench)
351538
fi
352-
cargo build $packages --profile release_debug --features unstable_encodings
539+
cargo build "${packages[@]}" --profile release_debug --features unstable_encodings
353540
354541
- name: Generate data
355542
shell: bash
@@ -446,11 +633,16 @@ jobs:
446633
python3 scripts/s3-download.py s3://vortex-ci-benchmark-results/data.json.gz data.json.gz --no-sign-request
447634
gzip -d -c data.json.gz > base.json
448635
449-
echo '# Benchmarks: ${{ matrix.name }}' > comment.md
636+
benchmark_name="${{ matrix.name }}"
637+
if [ "${{ inputs.benchmark_profile }}" != "full" ]; then
638+
benchmark_name="$benchmark_name (${{ inputs.benchmark_profile }})"
639+
fi
640+
641+
echo "# Benchmarks: $benchmark_name" > comment.md
450642
echo '' >> comment.md
451-
uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json "${{ matrix.name }}" \
643+
uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json "$benchmark_name" \
452644
>> comment.md
453-
cat comment.md >> $GITHUB_STEP_SUMMARY
645+
cat comment.md >> "$GITHUB_STEP_SUMMARY"
454646
455647
- name: Comment PR
456648
if: inputs.mode == 'pr' && github.event.pull_request.head.repo.fork == false
@@ -460,7 +652,7 @@ jobs:
460652
# There is exactly one comment per comment-tag. If a comment with this tag already exists,
461653
# this action will *update* the comment instead of posting a new comment. Therefore, each
462654
# unique benchmark configuration must have a unique comment-tag.
463-
comment-tag: bench-pr-comment-${{ matrix.id }}
655+
comment-tag: bench-pr-comment-${{ matrix.id }}${{ inputs.benchmark_profile == 'base' && '-base' || '' }}
464656

465657
- name: Comment PR on failure
466658
if: failure() && inputs.mode == 'pr' && github.event.pull_request.head.repo.fork == false
@@ -469,8 +661,8 @@ jobs:
469661
message: |
470662
# 🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨
471663
472-
Benchmark `${{ matrix.name }}` failed! Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
473-
comment-tag: bench-pr-comment-${{ matrix.id }}
664+
Benchmark `${{ matrix.name }}` (${{ inputs.benchmark_profile }}) failed! Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
665+
comment-tag: bench-pr-comment-${{ matrix.id }}${{ inputs.benchmark_profile == 'base' && '-base' || '' }}
474666

475667
- name: Upload Benchmark Results
476668
if: inputs.mode == 'develop'

.github/workflows/sql-pr.yml

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,32 @@
11
# Runs SQL benchmarks once for a pull request.
2-
# Called from bench-dispatch.yml when the `action/benchmark-sql` label is added.
2+
# Called from bench-dispatch.yml when SQL benchmark labels are added.
33

44
name: PR SQL Benchmarks
55

66
concurrency:
7-
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
7+
group: >-
8+
${{ github.workflow }}-${{ inputs.benchmark_profile || 'base' }}-${{
9+
github.head_ref || github.run_id
10+
}}
811
cancel-in-progress: true
912

1013
on:
11-
workflow_call: { }
12-
workflow_dispatch: { }
14+
workflow_call:
15+
inputs:
16+
benchmark_profile:
17+
required: false
18+
type: string
19+
default: "base"
20+
workflow_dispatch:
21+
inputs:
22+
benchmark_profile:
23+
description: "SQL benchmark profile to run"
24+
required: false
25+
type: choice
26+
default: "base"
27+
options:
28+
- "base"
29+
- "full"
1330

1431
permissions:
1532
contents: read
@@ -22,3 +39,4 @@ jobs:
2239
secrets: inherit
2340
with:
2441
mode: "pr"
42+
benchmark_profile: ${{ inputs.benchmark_profile || 'base' }}

docs/developer-guide/benchmarking.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,10 @@ Benchmarks run automatically on all commits to `develop` and can be run on-deman
212212
`develop`, with results uploaded for historical tracking.
213213
- **PR benchmarks** -- triggered by the `action/benchmark` label. Results are compared against
214214
the latest `develop` run and posted as a PR comment.
215-
- **SQL benchmarks** -- triggered by the `action/benchmark-sql` label. Runs a parametric matrix
216-
of suites, engines, formats, and storage backends (NVMe, S3).
215+
- **SQL benchmarks** -- triggered by the `action/benchmark-sql` label. Runs the base SQL matrix,
216+
which excludes Appian, TPC-H SF=10 on S3, `vortex-compact`, and `duckdb:duckdb`.
217+
- **Full SQL benchmarks** -- triggered by the `action/benchmark-sql-full` label. Runs the full
218+
SQL matrix of suites, engines, formats, and storage backends (NVMe, S3).
217219

218220
All CI benchmarks run on dedicated instances with the `release_debug` profile and
219221
`-C target-cpu=native` to produce representative numbers.

0 commit comments

Comments
 (0)