Minimize default sql benchmarks suite (#8465)

AdamGS · robert3005 · web-flow · commit a4476f1d772a · 2026-06-18T09:43:17.000+01:00
## Summary

This PR changes the default benchmark suite that runs when adding
`action/benchmark-sql` by removing the following benchmarks from the
baseline:
1. Appian and TPC-H SF=10 on S3 (two slowest benchmarks)
2. Any `duckdb:duckdb` and `vortex-compact` runs

The full run is unchanged, and will still run post-merge or when using
the `action/benchmark-sql-full` label.

Happy to remove more things, but I think these are the least contentious
targets we can start with.

---------

Signed-off-by: Adam Gutglick &lt;adam@spiraldb.com&gt;
Co-authored-by: Robert Kruszewski &lt;github@robertk.io&gt;
diff --git a/.github/workflows/bench-dispatch.yml b/.github/workflows/bench-dispatch.yml
@@ -47,3 +47,21 @@ jobs:
     needs: remove-sql-label
     uses: ./.github/workflows/sql-pr.yml
     secrets: inherit
+
+  remove-sql-full-label:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    if: github.event.label.name == 'action/benchmark-sql-full'
+    steps:
+      - uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0  # v1
+        if: github.event.pull_request.head.repo.full_name == 'vortex-data/vortex'
+        with:
+          labels: action/benchmark-sql-full
+          fail_on_error: true
+
+  sql-full-bench:
+    needs: remove-sql-full-label
+    uses: ./.github/workflows/sql-pr.yml
+    secrets: inherit
+    with:
+      benchmark_profile: "full"
diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml
@@ -13,7 +13,7 @@ on:
       benchmark_matrix:
         required: false
         type: string
-        description: "JSON string containing the matrix configuration"
+        description: "JSON string containing the full matrix configuration"
         default: |
           [
             {
@@ -277,6 +277,193 @@ on:
               "iterations": "10"
             }
           ]
+      base_benchmark_matrix:
+        required: false
+        type: string
+        description: "JSON string containing the base matrix configuration"
+        default: |
+          [
+            {
+              "id": "clickbench-nvme",
+              "subcommand": "clickbench",
+              "name": "Clickbench on NVME",
+              "data_formats": ["parquet", "vortex"],
+              "pr_targets": [
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "develop_targets": [
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "datafusion", "format": "lance"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ]
+            },
+            {
+              "id": "tpch-nvme",
+              "subcommand": "tpch",
+              "name": "TPC-H SF=1 on NVME",
+              "data_formats": ["parquet", "vortex"],
+              "pr_targets": [
+                {"engine": "datafusion", "format": "arrow"},
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "develop_targets": [
+                {"engine": "datafusion", "format": "arrow"},
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "datafusion", "format": "lance"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "scale_factor": "1.0",
+              "iterations": "10"
+            },
+            {
+              "id": "tpch-s3",
+              "subcommand": "tpch",
+              "name": "TPC-H SF=1 on S3",
+              "local_dir": "vortex-bench/data/tpch/1.0",
+              "remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/1.0/",
+              "data_formats": ["parquet", "vortex"],
+              "pr_targets": [
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "develop_targets": [
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "scale_factor": "1.0",
+              "iterations": "10"
+            },
+            {
+              "id": "tpch-nvme-10",
+              "subcommand": "tpch",
+              "name": "TPC-H SF=10 on NVME",
+              "data_formats": ["parquet", "vortex"],
+              "pr_targets": [
+                {"engine": "datafusion", "format": "arrow"},
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "develop_targets": [
+                {"engine": "datafusion", "format": "arrow"},
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "datafusion", "format": "lance"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "scale_factor": "10.0",
+              "iterations": "10"
+            },
+            {
+              "id": "tpcds-nvme",
+              "subcommand": "tpcds",
+              "name": "TPC-DS SF=1 on NVME",
+              "data_formats": ["parquet", "vortex"],
+              "pr_targets": [
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "develop_targets": [
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "scale_factor": "1.0"
+            },
+            {
+              "id": "statpopgen",
+              "subcommand": "statpopgen",
+              "name": "Statistical and Population Genetics",
+              "local_dir": "vortex-bench/data/statpopgen",
+              "data_formats": ["parquet", "vortex"],
+              "pr_targets": [
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "develop_targets": [
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "scale_factor": "100"
+            },
+            {
+              "id": "fineweb",
+              "subcommand": "fineweb",
+              "name": "FineWeb NVMe",
+              "data_formats": ["parquet", "vortex"],
+              "pr_targets": [
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "develop_targets": [
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "scale_factor": "100"
+            },
+            {
+              "id": "fineweb-s3",
+              "subcommand": "fineweb",
+              "name": "FineWeb S3",
+              "local_dir": "vortex-bench/data/fineweb",
+              "remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/fineweb/",
+              "data_formats": ["parquet", "vortex"],
+              "pr_targets": [
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "develop_targets": [
+                {"engine": "datafusion", "format": "parquet"},
+                {"engine": "datafusion", "format": "vortex"},
+                {"engine": "duckdb", "format": "parquet"},
+                {"engine": "duckdb", "format": "vortex"}
+              ],
+              "scale_factor": "100"
+            },
+            {
+              "id": "polarsignals",
+              "subcommand": "polarsignals",
+              "name": "PolarSignals Profiling",
+              "data_formats": ["vortex"],
+              "pr_targets": [
+                {"engine": "datafusion", "format": "vortex"}
+              ],
+              "develop_targets": [
+                {"engine": "datafusion", "format": "vortex"}
+              ],
+              "scale_factor": "1"
+            }
+          ]
+      benchmark_profile:
+        required: false
+        type: string
+        description: "Benchmark profile to run: full or base"
+        default: "full"
 
 jobs:
   bench:
@@ -289,7 +476,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        include: ${{ fromJSON(inputs.benchmark_matrix) }}
+        include: ${{ fromJSON(inputs.benchmark_profile == 'base' && inputs.base_benchmark_matrix || inputs.benchmark_matrix) }}
 
     runs-on: >-
       ${{ github.repository == 'vortex-data/vortex'
@@ -321,7 +508,7 @@ jobs:
         run: |
           wget -qO- https://github.com/duckdb/duckdb/releases/download/v1.5.3/duckdb_cli-linux-amd64.zip | funzip > duckdb
           chmod +x duckdb
-          echo "$PWD" >> $GITHUB_PATH
+          echo "$PWD" >> "$GITHUB_PATH"
 
       - uses: ./.github/actions/system-info
 
@@ -345,11 +532,11 @@ jobs:
         env:
           RUSTFLAGS: "-C target-cpu=native"
         run: |
-          packages="--bin data-gen --bin datafusion-bench --bin duckdb-bench"
+          packages=(--bin data-gen --bin datafusion-bench --bin duckdb-bench)
           if [ "${{ inputs.mode }}" != "pr" ]; then
-            packages="$packages --bin lance-bench"
+            packages+=(--bin lance-bench)
           fi
-          cargo build $packages --profile release_debug --features unstable_encodings
+          cargo build "${packages[@]}" --profile release_debug --features unstable_encodings
 
       - name: Generate data
         shell: bash
@@ -446,11 +633,16 @@ jobs:
           python3 scripts/s3-download.py s3://vortex-ci-benchmark-results/data.json.gz data.json.gz --no-sign-request
           gzip -d -c data.json.gz > base.json
 
-          echo '# Benchmarks: ${{ matrix.name }}' > comment.md
+          benchmark_name="${{ matrix.name }}"
+          if [ "${{ inputs.benchmark_profile }}" != "full" ]; then
+            benchmark_name="$benchmark_name (${{ inputs.benchmark_profile }})"
+          fi
+
+          echo "# Benchmarks: $benchmark_name" > comment.md
           echo '' >> comment.md
-          uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json "${{ matrix.name }}" \
+          uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json "$benchmark_name" \
             >> comment.md
-          cat comment.md >> $GITHUB_STEP_SUMMARY
+          cat comment.md >> "$GITHUB_STEP_SUMMARY"
 
       - name: Comment PR
         if: inputs.mode == 'pr' && github.event.pull_request.head.repo.fork == false
@@ -460,7 +652,7 @@ jobs:
           # There is exactly one comment per comment-tag. If a comment with this tag already exists,
           # this action will *update* the comment instead of posting a new comment. Therefore, each
           # unique benchmark configuration must have a unique comment-tag.
-          comment-tag: bench-pr-comment-${{ matrix.id }}
+          comment-tag: bench-pr-comment-${{ matrix.id }}${{ inputs.benchmark_profile == 'base' && '-base' || '' }}
 
       - name: Comment PR on failure
         if: failure() && inputs.mode == 'pr' && github.event.pull_request.head.repo.fork == false
@@ -469,8 +661,8 @@ jobs:
           message: |
             # 🚨🚨🚨❌❌❌ SQL BENCHMARK FAILED ❌❌❌🚨🚨🚨
 
-            Benchmark `${{ matrix.name }}` failed! Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
-          comment-tag: bench-pr-comment-${{ matrix.id }}
+            Benchmark `${{ matrix.name }}` (${{ inputs.benchmark_profile }}) failed! Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
+          comment-tag: bench-pr-comment-${{ matrix.id }}${{ inputs.benchmark_profile == 'base' && '-base' || '' }}
 
       - name: Upload Benchmark Results
         if: inputs.mode == 'develop'
diff --git a/.github/workflows/sql-pr.yml b/.github/workflows/sql-pr.yml
@@ -1,15 +1,32 @@
 # Runs SQL benchmarks once for a pull request.
-# Called from bench-dispatch.yml when the `action/benchmark-sql` label is added.
+# Called from bench-dispatch.yml when SQL benchmark labels are added.
 
 name: PR SQL Benchmarks
 
 concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  group: >-
+    ${{ github.workflow }}-${{ inputs.benchmark_profile || 'base' }}-${{
+      github.head_ref || github.run_id
+    }}
   cancel-in-progress: true
 
 on:
-  workflow_call: { }
-  workflow_dispatch: { }
+  workflow_call:
+    inputs:
+      benchmark_profile:
+        required: false
+        type: string
+        default: "base"
+  workflow_dispatch:
+    inputs:
+      benchmark_profile:
+        description: "SQL benchmark profile to run"
+        required: false
+        type: choice
+        default: "base"
+        options:
+          - "base"
+          - "full"
 
 permissions:
   contents: read
@@ -22,3 +39,4 @@ jobs:
     secrets: inherit
     with:
       mode: "pr"
+      benchmark_profile: ${{ inputs.benchmark_profile || 'base' }}
diff --git a/docs/developer-guide/benchmarking.md b/docs/developer-guide/benchmarking.md
@@ -212,8 +212,10 @@ Benchmarks run automatically on all commits to `develop` and can be run on-deman
   `develop`, with results uploaded for historical tracking.
 - **PR benchmarks** -- triggered by the `action/benchmark` label. Results are compared against
   the latest `develop` run and posted as a PR comment.
-- **SQL benchmarks** -- triggered by the `action/benchmark-sql` label. Runs a parametric matrix
-  of suites, engines, formats, and storage backends (NVMe, S3).
+- **SQL benchmarks** -- triggered by the `action/benchmark-sql` label. Runs the base SQL matrix,
+  which excludes Appian, TPC-H SF=10 on S3, `vortex-compact`, and `duckdb:duckdb`.
+- **Full SQL benchmarks** -- triggered by the `action/benchmark-sql-full` label. Runs the full
+  SQL matrix of suites, engines, formats, and storage backends (NVMe, S3).
 
 All CI benchmarks run on dedicated instances with the `release_debug` profile and
 `-C target-cpu=native` to produce representative numbers.