chore[ci]: tpch-10 on ci action #19
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: "SQL-related benchmarks" | ||
| on: | ||
| workflow_call: | ||
| inputs: | ||
| mode: | ||
| required: true | ||
| type: string | ||
| machine_type: | ||
| required: false | ||
| type: string | ||
| default: c6id.8xlarge | ||
| benchmark_matrix: | ||
| required: false | ||
| type: string | ||
| description: "JSON string containing the matrix configuration" | ||
| default: | | ||
| [ | ||
| { | ||
| "id": "tpch-nvme", | ||
| "subcommand": "tpch", | ||
| "name": "TPC-H SF=1 on NVME", | ||
| "targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb", | ||
| "scale_factor": "--scale-factor 1.0" | ||
| }, | ||
| { | ||
| "id": "clickbench-nvme", | ||
| "subcommand": "clickbench", | ||
| "name": "Clickbench on NVME", | ||
| "targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb", | ||
| }, | ||
| { | ||
| "id": "tpch-s3", | ||
| "subcommand": "tpch", | ||
| "name": "TPC-H SF=1 on S3", | ||
| "local_dir": "bench-vortex/data/tpch/1.0", | ||
| "remote_storage": "s3://vortex-bench-dev-eu/${{github.ref_name}}/tpch/1.0/", | ||
| "targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex", | ||
| "scale_factor": "--scale-factor 1.0" | ||
| }, | ||
| { | ||
| "id": "tpch-nvme-10", | ||
| "subcommand": "tpch", | ||
| "name": "TPC-H SF=10 on NVME", | ||
| "targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb", | ||
| "scale_factor": "--scale-factor 10.0" | ||
| }, | ||
| { | ||
| "id": "tpch-s3-10", | ||
| "subcommand": "tpch", | ||
| "name": "TPC-H SF=10 on S3", | ||
| "local_dir": "bench-vortex/data/tpch/1.0", | ||
| "remote_storage": "s3://vortex-bench-dev-eu/${{github.ref_name}}/tpch/10.0/", | ||
| "targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex", | ||
| "scale_factor": "--scale-factor 10.0" | ||
| }, | ||
| { | ||
| "id": "tpcds-nvme", | ||
| "subcommand": "tpcds", | ||
| "name": "TPC-DS SF=1 on NVME", | ||
| "targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb", | ||
| "scale_factor": "--scale-factor 1.0" | ||
| }, | ||
| { | ||
| "id": "statpopgen", | ||
| "subcommand": "statpopgen", | ||
| "name": "Statistical and Population Genetics", | ||
| "local_dir": "bench-vortex/data/statpopgen", | ||
| "targets": "duckdb:parquet,duckdb:vortex", | ||
| "scale_factor": "--scale-factor 100" | ||
| }, | ||
| ] | ||
| jobs: | ||
| bench: | ||
| timeout-minutes: 120 | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| include: ${{ fromJSON(inputs.benchmark_matrix) }} | ||
| runs-on: | ||
| - runs-on=${{ github.run_id }} | ||
| - family=${{ inputs.machine_type }} | ||
| - image=ubuntu24-full-x64 | ||
| - spot=false | ||
| - extras=s3-cache | ||
| - tag=${{ matrix.id }} | ||
| steps: | ||
| - uses: runs-on/action@v2 | ||
| with: | ||
| sccache: s3 | ||
| - uses: actions/checkout@v5 | ||
| if: inputs.mode == 'pr' | ||
| with: | ||
| ref: ${{ github.event.pull_request.head.sha }} | ||
| - uses: actions/checkout@v5 | ||
| if: inputs.mode != 'pr' | ||
| - uses: ./.github/actions/setup-rust | ||
| - name: Install DuckDB | ||
| run: | | ||
| wget -qO- https://github.com/duckdb/duckdb/releases/download/v1.3.2/duckdb_cli-linux-amd64.zip | funzip > duckdb | ||
| chmod +x duckdb | ||
| echo "$PWD" >> $GITHUB_PATH | ||
| - name: Build binary | ||
| shell: bash | ||
| env: | ||
| RUSTFLAGS: "-C target-cpu=native -C force-frame-pointers=yes" | ||
| run: | | ||
| cargo build --bin query_bench --package bench-vortex --profile release_debug | ||
| - name: Generate data | ||
| shell: bash | ||
| env: | ||
| RUST_BACKTRACE: full | ||
| run: | | ||
| # Generate data, running each query once to make sure they don't panic. | ||
| target/release_debug/query_bench \ | ||
| ${{ matrix.subcommand }} \ | ||
| --targets ${{ matrix.targets }} \ | ||
| -i1 \ | ||
| -d gh-json ${{ matrix.scale_factor }} | ||
| - name: Setup AWS CLI | ||
| uses: aws-actions/configure-aws-credentials@v4 | ||
| with: | ||
| role-to-assume: arn:aws:iam::375504701696:role/GitHubBenchmarkRole | ||
| aws-region: us-east-1 | ||
| - name: Upload data | ||
| if: matrix.remote_storage != null | ||
| shell: bash | ||
| env: | ||
| AWS_REGION: "eu-west-1" | ||
| run: | | ||
| aws s3 rm --recursive ${{ matrix.remote_storage }} | ||
| aws s3 cp --recursive ${{matrix.local_dir}} ${{ matrix.remote_storage }} | ||
| - name: Setup Polar Signals | ||
| uses: polarsignals/gh-actions-ps-profiling@v0.6.0 | ||
| with: | ||
| polarsignals_cloud_token: ${{ secrets.POLAR_SIGNALS_API_KEY }} | ||
| labels: "branch=${{ github.ref_name }};gh_run_id=${{ github.run_id }};benchmark=${{ matrix.id }}" | ||
| parca_agent_version: "0.39.3" | ||
| project_uuid: "e5d846e1-b54c-46e7-9174-8bf055a3af56" | ||
| extra_args: "--off-cpu-threshold=1" # Personally tuned by @brancz | ||
| - name: Run ${{ matrix.name }} benchmark | ||
| if: matrix.remote_storage == null | ||
| shell: bash | ||
| env: | ||
| OTEL_SERVICE_NAME: "vortex-bench" | ||
| OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf" | ||
| OTEL_EXPORTER_OTLP_ENDPOINT: "${{ secrets.OTEL_EXPORTER_OTLP_ENDPOINT }}" | ||
| OTEL_EXPORTER_OTLP_HEADERS: "${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }}" | ||
| OTEL_RESOURCE_ATTRIBUTES: "bench-name=${{ matrix.id }}" | ||
| run: | | ||
| target/release_debug/query_bench ${{ matrix.subcommand }} \ | ||
| -d gh-json \ | ||
| --targets ${{ matrix.targets }} \ | ||
| --export-spans \ | ||
| ${{ matrix.scale_factor }} \ | ||
| --delete-duckdb-database \ | ||
| -o results.json | ||
| - name: Run ${{ matrix.name }} benchmark (remote) | ||
| if: matrix.remote_storage != null | ||
| shell: bash | ||
| env: | ||
| AWS_REGION: "eu-west-1" | ||
| OTEL_SERVICE_NAME: "vortex-bench" | ||
| OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf" | ||
| OTEL_EXPORTER_OTLP_ENDPOINT: "${{ secrets.OTEL_EXPORTER_OTLP_ENDPOINT }}" | ||
| OTEL_EXPORTER_OTLP_HEADERS: "${{ secrets.OTEL_EXPORTER_OTLP_HEADERS }}" | ||
| OTEL_RESOURCE_ATTRIBUTES: "bench-name=${{ matrix.id }}" | ||
| run: | | ||
| target/release_debug/query_bench ${{ matrix.subcommand }} \ | ||
| --use-remote-data-dir ${{ matrix.remote_storage }} \ | ||
| --targets ${{ matrix.targets }} \ | ||
| --export-spans \ | ||
| ${{ matrix.scale_factor }} \ | ||
| -d gh-json \ | ||
| --delete-duckdb-database \ | ||
| -o results.json | ||
| - name: Install uv | ||
| if: inputs.mode == 'pr' | ||
| uses: spiraldb/actions/.github/actions/setup-uv@0.15.0 | ||
| with: | ||
| sync: false | ||
| - name: Compare results | ||
| if: inputs.mode == 'pr' | ||
| shell: bash | ||
| run: | | ||
| set -Eeu -o pipefail -x | ||
| base_commit_sha=$(\ | ||
| curl -L \ | ||
| -H "Accept: application/vnd.github+json" \ | ||
| -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ | ||
| https://api.github.com/repos/vortex-data/vortex/actions/workflows/bench.yml/runs\?branch\=develop\&status\=success\&per_page\=1 \ | ||
| | jq -r '.workflow_runs[].head_sha' \ | ||
| ) | ||
| aws s3 cp s3://vortex-benchmark-results-database/data.json.gz - \ | ||
| | gzip -d \ | ||
| | grep $base_commit_sha \ | ||
| > base.json | ||
| echo '# Benchmarks: ${{ matrix.name }}' > comment.md | ||
| echo '<details>' >> comment.md | ||
| echo '<summary>Table of Results</summary>' >> comment.md | ||
| echo '' >> comment.md | ||
| uv run --no-project scripts/compare-benchmark-jsons.py base.json results.json \ | ||
| >> comment.md | ||
| echo '</details>' >> comment.md | ||
| - name: Comment PR | ||
| if: inputs.mode == 'pr' | ||
| uses: thollander/actions-comment-pull-request@v3 | ||
| with: | ||
| file-path: comment.md | ||
| # There is exactly one comment per comment-tag. If a comment with this tag already exists, | ||
| # this action will *update* the comment instead of posting a new comment. Therefore, each | ||
| # unique benchmark configuration must have a unique comment-tag. | ||
| comment-tag: bench-pr-comment-${{ matrix.id }} | ||
| - name: Upload Benchmark Results | ||
| if: inputs.mode == 'develop' | ||
| shell: bash | ||
| run: | | ||
| bash scripts/cat-s3.sh vortex-benchmark-results-database data.json.gz results.json | ||