Skip to content

chore[ci]: tpch-10 on ci action #19

chore[ci]: tpch-10 on ci action

chore[ci]: tpch-10 on ci action #19

Workflow file for this run

name: "SQL-related benchmarks"
on:
workflow_call:
inputs:
mode:
required: true
type: string
machine_type:
required: false
type: string
default: c6id.8xlarge
benchmark_matrix:
required: false
type: string
description: "JSON string containing the matrix configuration"
default: |
[
{
"id": "tpch-nvme",
"subcommand": "tpch",
"name": "TPC-H SF=1 on NVME",
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb",
"scale_factor": "--scale-factor 1.0"
},
{
"id": "clickbench-nvme",
"subcommand": "clickbench",
"name": "Clickbench on NVME",
"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb",
},
{
"id": "tpch-s3",
"subcommand": "tpch",
"name": "TPC-H SF=1 on S3",
"local_dir": "bench-vortex/data/tpch/1.0",
"remote_storage": "s3://vortex-bench-dev-eu/${{github.ref_name}}/tpch/1.0/",
"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex",
"scale_factor": "--scale-factor 1.0"
},
{
"id": "tpch-nvme-10",
"subcommand": "tpch",
"name": "TPC-H SF=10 on NVME",
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb",
"scale_factor": "--scale-factor 10.0"
},
{
"id": "tpch-s3-10",
"subcommand": "tpch",
"name": "TPC-H SF=10 on S3",
"local_dir": "bench-vortex/data/tpch/1.0",
"remote_storage": "s3://vortex-bench-dev-eu/${{github.ref_name}}/tpch/10.0/",
"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex",
"scale_factor": "--scale-factor 10.0"
},
{
"id": "tpcds-nvme",
"subcommand": "tpcds",
"name": "TPC-DS SF=1 on NVME",
"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb",
"scale_factor": "--scale-factor 1.0"
},
{
"id": "statpopgen",
"subcommand": "statpopgen",
"name": "Statistical and Population Genetics",
"local_dir": "bench-vortex/data/statpopgen",
"targets": "duckdb:parquet,duckdb:vortex",
"scale_factor": "--scale-factor 100"
},
]
jobs:
bench:
timeout-minutes: 120
strategy:
fail-fast: false
matrix:
include: ${{ fromJSON(inputs.benchmark_matrix) }}
runs-on:
- runs-on=${{ github.run_id }}
- family=${{ inputs.machine_type }}
- image=ubuntu24-full-x64
- spot=false
- extras=s3-cache
- tag=${{ matrix.id }}
steps:

Check failure on line 89 in .github/workflows/sql-benchmarks.yml

View workflow run for this annotation

GitHub Actions / .github/workflows/sql-benchmarks.yml

Invalid workflow file

You have an error in your yaml syntax on line 89
- uses: runs-on/action@v2
with:
sccache: s3
- uses: actions/checkout@v5
if: inputs.mode == 'pr'
with:
ref: ${{ github.event.pull_request.head.sha }}
- uses: actions/checkout@v5
if: inputs.mode != 'pr'
- uses: ./.github/actions/setup-rust
- name: Install DuckDB
run: |
wget -qO- https://github.com/duckdb/duckdb/releases/download/v1.3.2/duckdb_cli-linux-amd64.zip | funzip > duckdb
chmod +x duckdb
echo "$PWD" >> $GITHUB_PATH
- name: Build binary
shell: bash
env:
RUSTFLAGS: "-C target-cpu=native -C force-frame-pointers=yes"
run: |
cargo build --bin query_bench --package bench-vortex --profile release_debug
- name: Generate data
shell: bash
env:
RUST_BACKTRACE: full
run: |
# Generate data, running each query once to make sure they don't panic.
target/release_debug/query_bench \
${{ matrix.subcommand }} \
--targets ${{ matrix.targets }} \
-i1 \
-d gh-json ${{ matrix.scale_factor }}
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v4
with:
role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Upload data
if: matrix.remote_storage != null
shell: bash
env:
AWS_REGION: "eu-west-1"
run: |
aws s3 rm --recursive ${{ matrix.remote_storage }}
aws s3 cp --recursive ${{matrix.local_dir}} ${{ matrix.remote_storage }}
- name: Setup Polar Signals
uses: polarsignals/gh-actions-ps-profiling@v0.6.0
with:
polarsignals_cloud_token: ${{ secrets.POLAR_SIGNALS_API_KEY }}
labels: "branch=${{ github.ref_name }};gh_run_id=${{ github.run_id }};benchmark=${{ matrix.id }}"
parca_agent_version: "0.39.3"
project_uuid: "e5d846e1-b54c-46e7-9174-8bf055a3af56"
extra_args: "--off-cpu-threshold=1" # Personally tuned by @brancz
- name: Run ${{ matrix.name }} benchmark
if: matrix.remote_storage == null
shell: bash
env:
OTEL_SERVICE_NAME: "vortex-bench"
OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf"
OTEL_EXPORTER_OTLP_ENDPOINT: "${{ secrets.OTEL_EXPORTER_OTLP_ENDPOINT }}"
OTEL_EXPORTER_OTLP_HEADERS: "${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }}"
OTEL_RESOURCE_ATTRIBUTES: "bench-name=${{ matrix.id }}"
run: |
target/release_debug/query_bench ${{ matrix.subcommand }} \
-d gh-json \
--targets ${{ matrix.targets }} \
--export-spans \
${{ matrix.scale_factor }} \
--delete-duckdb-database \
-o results.json
- name: Run ${{ matrix.name }} benchmark (remote)
if: matrix.remote_storage != null
shell: bash
env:
AWS_REGION: "eu-west-1"
OTEL_SERVICE_NAME: "vortex-bench"
OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf"
OTEL_EXPORTER_OTLP_ENDPOINT: "${{ secrets.OTEL_EXPORTER_OTLP_ENDPOINT }}"
OTEL_EXPORTER_OTLP_HEADERS: "${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }}"
OTEL_RESOURCE_ATTRIBUTES: "bench-name=${{ matrix.id }}"
run: |
target/release_debug/query_bench ${{ matrix.subcommand }} \
--use-remote-data-dir ${{ matrix.remote_storage }} \
--targets ${{ matrix.targets }} \
--export-spans \
${{ matrix.scale_factor }} \
-d gh-json \
--delete-duckdb-database \
-o results.json
- name: Install uv
if: inputs.mode == 'pr'
uses: spiraldb/actions/.github/actions/setup-uv@0.15.0
with:
sync: false
- name: Compare results
if: inputs.mode == 'pr'
shell: bash
run: |
set -Eeu -o pipefail -x
base_commit_sha=$(\
curl -L \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
https://api.github.com/repos/vortex-data/vortex/actions/workflows/bench.yml/runs\?branch\=develop\&status\=success\&per_page\=1 \
| jq -r '.workflow_runs[].head_sha' \
)
aws s3 cp s3://vortex-benchmark-results-database/data.json.gz - \
| gzip -d \
| grep $base_commit_sha \
> base.json
echo '# Benchmarks: ${{ matrix.name }}' > comment.md
echo '<details>' >> comment.md
echo '<summary>Table of Results</summary>' >> comment.md
echo '' >> comment.md
uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json \
>> comment.md
echo '</details>' >> comment.md
- name: Comment PR
if: inputs.mode == 'pr'
uses: thollander/actions-comment-pull-request@v3
with:
file-path: comment.md
# There is exactly one comment per comment-tag. If a comment with this tag already exists,
# this action will *update* the comment instead of posting a new comment. Therefore, each
# unique benchmark configuration must have a unique comment-tag.
comment-tag: bench-pr-comment-${{ matrix.id }}
- name: Upload Benchmark Results
if: inputs.mode == 'develop'
shell: bash
run: |
bash scripts/cat-s3.sh vortex-benchmark-results-database data.json.gz results.json