Skip to content

Bring back file-size comments (#7296) #5180

Bring back file-size comments (#7296)

Bring back file-size comments (#7296) #5180

Workflow file for this run

# Runs after every commit to `develop` (or in other words, _after_ every pull request merges).
name: Benchmarks
on:
push:
branches: [develop]
permissions:
id-token: write # enables AWS-GitHub OIDC
actions: read
contents: write
jobs:
commit-metadata:
runs-on: ubuntu-latest
timeout-minutes: 120
steps:
- uses: actions/checkout@v6
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v6
with:
role-to-assume: arn:aws:iam::245040174862:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Upload Commit Metadata
shell: bash
run: |
set -Eeu -o pipefail -x
sudo apt-get update && sudo apt-get install -y jq
bash scripts/commit-json.sh > new-commit.json
bash scripts/cat-s3.sh vortex-ci-benchmark-results commits.json new-commit.json
bench:
timeout-minutes: 120
runs-on: >-
${{ github.repository == 'vortex-data/vortex'
&& format('runs-on={0}/runner=bench-dedicated/extras=s3-cache/tag={1}', github.run_id, matrix.benchmark.id)
|| 'ubuntu-latest' }}
strategy:
matrix:
benchmark:
- id: random-access-bench
name: Random Access
build_args: "--features lance"
formats: "parquet,lance,vortex"
- id: compress-bench
name: Compression
build_args: "--features lance"
formats: "parquet,lance,vortex"
steps:
- uses: runs-on/action@v2
if: github.repository == 'vortex-data/vortex'
with:
sccache: s3
- uses: actions/checkout@v6
- name: Setup benchmark environment
run: sudo bash scripts/setup-benchmark.sh
- uses: ./.github/actions/setup-rust
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Install DuckDB
run: |
wget -qO- https://github.com/duckdb/duckdb/releases/download/v1.4.2/duckdb_cli-linux-amd64.zip | funzip > duckdb
chmod +x duckdb
echo "$PWD" >> $GITHUB_PATH
- uses: ./.github/actions/system-info
- name: Build binary
shell: bash
env:
RUSTFLAGS: "-C target-cpu=native -C force-frame-pointers=yes"
run: |
cargo build --bin ${{ matrix.benchmark.id }} --profile release_debug ${{ matrix.benchmark.build_args }}
- name: Setup Polar Signals
uses: polarsignals/gh-actions-ps-profiling@v0.8.1
with:
polarsignals_cloud_token: ${{ secrets.POLAR_SIGNALS_API_KEY }}
labels: "branch=${{ github.ref_name }};gh_run_id=${{ github.run_id }};benchmark=${{ matrix.benchmark.id }}"
project_uuid: "e5d846e1-b54c-46e7-9174-8bf055a3af56"
profiling_frequency: 199
extra_args: "--off-cpu-threshold=0.03" # Personally tuned by @brancz
- name: Run ${{ matrix.benchmark.name }} benchmark
shell: bash
env:
RUST_BACKTRACE: full
run: |
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json
- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@v6
with:
role-to-assume: arn:aws:iam::245040174862:role/GitHubBenchmarkRole
aws-region: us-east-1
- name: Upload Benchmark Results
shell: bash
run: |
bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json
- name: Alert incident.io
if: failure()
uses: ./.github/actions/alert-incident-io
with:
api-key: ${{ secrets.INCIDENT_IO_ALERT_TOKEN }}
alert-title: "${{ matrix.benchmark.name }} benchmark failed on develop"
deduplication-key: ci-bench-${{ matrix.benchmark.id }}-failure
sql:
uses: ./.github/workflows/sql-benchmarks.yml
secrets: inherit
with:
mode: "develop"
benchmark_matrix: |
[
{
"id": "clickbench-nvme",
"subcommand": "clickbench",
"name": "Clickbench on NVME",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
"build_lance": true
},
{
"id": "tpch-nvme",
"subcommand": "tpch",
"name": "TPC-H SF=1 on NVME",
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
"scale_factor": "1.0",
"build_lance": true
},
{
"id": "tpch-s3",
"subcommand": "tpch",
"name": "TPC-H SF=1 on S3",
"local_dir": "vortex-bench/data/tpch/1.0",
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/1.0/",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
"scale_factor": "1.0"
},
{
"id": "tpch-nvme-10",
"subcommand": "tpch",
"name": "TPC-H SF=10 on NVME",
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,datafusion:lance,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
"scale_factor": "10.0",
"build_lance": true
},
{
"id": "tpch-s3-10",
"subcommand": "tpch",
"name": "TPC-H SF=10 on S3",
"local_dir": "vortex-bench/data/tpch/10.0",
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/10.0/",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
"scale_factor": "10.0"
},
{
"id": "tpcds-nvme",
"subcommand": "tpcds",
"name": "TPC-DS SF=1 on NVME",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
"scale_factor": "1.0"
},
{
"id": "statpopgen",
"subcommand": "statpopgen",
"name": "Statistical and Population Genetics",
"local_dir": "vortex-bench/data/statpopgen",
"targets": "duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
"scale_factor": "100"
},
{
"id": "fineweb",
"subcommand": "fineweb",
"name": "FineWeb NVMe",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact"
},
{
"id": "fineweb-s3",
"subcommand": "fineweb",
"name": "FineWeb S3",
"local_dir": "vortex-bench/data/fineweb",
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/fineweb/",
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact"
},
{
"id": "polarsignals",
"subcommand": "polarsignals",
"name": "PolarSignals Profiling",
"targets": "datafusion:vortex",
"scale_factor": "1"
},
]