chore[ci]: tpch-10 on ci action #19

Workflow file for this run

.github/workflows/sql-benchmarks.yml at e712602

	name: "SQL-related benchmarks"

	on:
	workflow_call:
	inputs:
	mode:
	required: true
	type: string
	machine_type:
	required: false
	type: string
	default: c6id.8xlarge
	benchmark_matrix:
	required: false
	type: string
	description: "JSON string containing the matrix configuration"
	default: \|
	[
	{
	"id": "tpch-nvme",
	"subcommand": "tpch",
	"name": "TPC-H SF=1 on NVME",
	"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb",
	"scale_factor": "--scale-factor 1.0"
	},
	{
	"id": "clickbench-nvme",
	"subcommand": "clickbench",
	"name": "Clickbench on NVME",
	"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb",
	},
	{
	"id": "tpch-s3",
	"subcommand": "tpch",
	"name": "TPC-H SF=1 on S3",
	"local_dir": "bench-vortex/data/tpch/1.0",
	"remote_storage": "s3://vortex-bench-dev-eu/${{github.ref_name}}/tpch/1.0/",
	"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex",
	"scale_factor": "--scale-factor 1.0"
	},
	{
	"id": "tpch-nvme-10",
	"subcommand": "tpch",
	"name": "TPC-H SF=10 on NVME",
	"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb",
	"scale_factor": "--scale-factor 10.0"
	},
	{
	"id": "tpch-s3-10",
	"subcommand": "tpch",
	"name": "TPC-H SF=10 on S3",
	"local_dir": "bench-vortex/data/tpch/1.0",
	"remote_storage": "s3://vortex-bench-dev-eu/${{github.ref_name}}/tpch/10.0/",
	"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex",
	"scale_factor": "--scale-factor 10.0"
	},
	{
	"id": "tpcds-nvme",
	"subcommand": "tpcds",
	"name": "TPC-DS SF=1 on NVME",
	"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb",
	"scale_factor": "--scale-factor 1.0"
	},
	{
	"id": "statpopgen",
	"subcommand": "statpopgen",
	"name": "Statistical and Population Genetics",
	"local_dir": "bench-vortex/data/statpopgen",
	"targets": "duckdb:parquet,duckdb:vortex",
	"scale_factor": "--scale-factor 100"
	},
	]

	jobs:
	bench:
	timeout-minutes: 120
	strategy:
	fail-fast: false
	matrix:
	include: ${{ fromJSON(inputs.benchmark_matrix) }}

	runs-on:
	- runs-on=${{ github.run_id }}
	- family=${{ inputs.machine_type }}
	- image=ubuntu24-full-x64
	- spot=false
	- extras=s3-cache
	- tag=${{ matrix.id }}
	steps:
Check failure on line 89 in .github/workflows/sql-benchmarks.yml View workflow run for this annotation GitHub Actions / .github/workflows/sql-benchmarks.yml Invalid workflow file `You have an error in your yaml syntax on line 89`
	- uses: runs-on/action@v2
	with:
	sccache: s3
	- uses: actions/checkout@v5
	if: inputs.mode == 'pr'
	with:
	ref: ${{ github.event.pull_request.head.sha }}

	- uses: actions/checkout@v5
	if: inputs.mode != 'pr'
	- uses: ./.github/actions/setup-rust

	- name: Install DuckDB
	run: \|
	wget -qO- https://github.com/duckdb/duckdb/releases/download/v1.3.2/duckdb_cli-linux-amd64.zip \| funzip > duckdb
	chmod +x duckdb
	echo "$PWD" >> $GITHUB_PATH
	- name: Build binary
	shell: bash
	env:
	RUSTFLAGS: "-C target-cpu=native -C force-frame-pointers=yes"
	run: \|
	cargo build --bin query_bench --package bench-vortex --profile release_debug

	- name: Generate data
	shell: bash
	env:
	RUST_BACKTRACE: full
	run: \|
	# Generate data, running each query once to make sure they don't panic.
	target/release_debug/query_bench \
	${{ matrix.subcommand }} \
	--targets ${{ matrix.targets }} \
	-i1 \
	-d gh-json ${{ matrix.scale_factor }}

	- name: Setup AWS CLI
	uses: aws-actions/configure-aws-credentials@v4
	with:
	role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole
	aws-region: us-east-1

	- name: Upload data
	if: matrix.remote_storage != null
	shell: bash
	env:
	AWS_REGION: "eu-west-1"
	run: \|
	aws s3 rm --recursive ${{ matrix.remote_storage }}
	aws s3 cp --recursive ${{matrix.local_dir}} ${{ matrix.remote_storage }}

	- name: Setup Polar Signals
	uses: polarsignals/gh-actions-ps-profiling@v0.6.0
	with:
	polarsignals_cloud_token: ${{ secrets.POLAR_SIGNALS_API_KEY }}
	labels: "branch=${{ github.ref_name }};gh_run_id=${{ github.run_id }};benchmark=${{ matrix.id }}"
	parca_agent_version: "0.39.3"
	project_uuid: "e5d846e1-b54c-46e7-9174-8bf055a3af56"
	extra_args: "--off-cpu-threshold=1" # Personally tuned by @brancz

	- name: Run ${{ matrix.name }} benchmark
	if: matrix.remote_storage == null
	shell: bash
	env:
	OTEL_SERVICE_NAME: "vortex-bench"
	OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf"
	OTEL_EXPORTER_OTLP_ENDPOINT: "${{ secrets.OTEL_EXPORTER_OTLP_ENDPOINT }}"
	OTEL_EXPORTER_OTLP_HEADERS: "${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }}"
	OTEL_RESOURCE_ATTRIBUTES: "bench-name=${{ matrix.id }}"
	run: \|
	target/release_debug/query_bench ${{ matrix.subcommand }} \
	-d gh-json \
	--targets ${{ matrix.targets }} \
	--export-spans \
	${{ matrix.scale_factor }} \
	--delete-duckdb-database \
	-o results.json

	- name: Run ${{ matrix.name }} benchmark (remote)
	if: matrix.remote_storage != null
	shell: bash
	env:
	AWS_REGION: "eu-west-1"
	OTEL_SERVICE_NAME: "vortex-bench"
	OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf"
	OTEL_EXPORTER_OTLP_ENDPOINT: "${{ secrets.OTEL_EXPORTER_OTLP_ENDPOINT }}"
	OTEL_EXPORTER_OTLP_HEADERS: "${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }}"
	OTEL_RESOURCE_ATTRIBUTES: "bench-name=${{ matrix.id }}"
	run: \|
	target/release_debug/query_bench ${{ matrix.subcommand }} \
	--use-remote-data-dir ${{ matrix.remote_storage }} \
	--targets ${{ matrix.targets }} \
	--export-spans \
	${{ matrix.scale_factor }} \
	-d gh-json \
	--delete-duckdb-database \
	-o results.json

	- name: Install uv
	if: inputs.mode == 'pr'
	uses: spiraldb/actions/.github/actions/setup-uv@0.15.0
	with:
	sync: false
	- name: Compare results

	if: inputs.mode == 'pr'
	shell: bash
	run: \|
	set -Eeu -o pipefail -x

	base_commit_sha=$(\
	curl -L \
	-H "Accept: application/vnd.github+json" \
	-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
	https://api.github.com/repos/vortex-data/vortex/actions/workflows/bench.yml/runs\?branch\=develop\&status\=success\&per_page\=1 \
	\| jq -r '.workflow_runs[].head_sha' \
	)

	aws s3 cp s3://vortex-benchmark-results-database/data.json.gz - \
	\| gzip -d \
	\| grep $base_commit_sha \
	> base.json

	echo '# Benchmarks: ${{ matrix.name }}' > comment.md
	echo '<details>' >> comment.md
	echo '<summary>Table of Results</summary>' >> comment.md
	echo '' >> comment.md
	uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json \
	>> comment.md
	echo '</details>' >> comment.md

	- name: Comment PR
	if: inputs.mode == 'pr'
	uses: thollander/actions-comment-pull-request@v3
	with:
	file-path: comment.md
	# There is exactly one comment per comment-tag. If a comment with this tag already exists,
	# this action will update the comment instead of posting a new comment. Therefore, each
	# unique benchmark configuration must have a unique comment-tag.
	comment-tag: bench-pr-comment-${{ matrix.id }}

	- name: Upload Benchmark Results
	if: inputs.mode == 'develop'
	shell: bash
	run: \|
	bash scripts/cat-s3.sh vortex-benchmark-results-database data.json.gz results.json

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

chore[ci]: tpch-10 on ci action #19

Workflow file

chore[ci]: tpch-10 on ci action #19

Uh oh!

Workflow file for this run

GitHub Actions / .github/workflows/sql-benchmarks.yml