Skip to content

Commit 70c2e75

Browse files
committed
Merge remote-tracking branch 'origin/develop' into ji/validate-sql-bench
2 parents 71bcb30 + 2e9470f commit 70c2e75

543 files changed

Lines changed: 12219 additions & 8721 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: "Setup Prebuild"
2+
description: "Minimal setup for runners with pre-built Rust toolchain, nextest, and sccache"
3+
4+
runs:
5+
using: "composite"
6+
steps:
7+
- name: Configure sccache timeout
8+
shell: bash
9+
run: |
10+
mkdir -p ~/.config/sccache
11+
echo 'server_startup_timeout_ms = 15000' > ~/.config/sccache/config
12+
13+
- name: Pre-start sccache server
14+
shell: bash
15+
run: sccache --start-server &

.github/runs-on.yml

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1 @@
1-
images:
2-
vortex-ci-amd64:
3-
platform: "linux"
4-
arch: "x64"
5-
name: "vortex-ci-*"
6-
owner: "245040174862"
7-
vortex-ci-arm64:
8-
platform: "linux"
9-
arch: "arm64"
10-
name: "vortex-ci-*"
11-
owner: "245040174862"
1+
_extends: .github-private

.github/scripts/run-sql-bench.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#
1616
# Options:
1717
# --scale-factor <sf> Scale factor for the benchmark (e.g., 1.0, 10.0)
18+
# --iterations <n> Number of iterations to pass to each benchmark binary
1819
# --remote-storage <url> Remote storage URL (e.g., s3://bucket/path/)
1920
# If provided, runs in remote mode (no lance support).
2021
# --benchmark-id <id> Benchmark ID for error messages (e.g., tpch-s3)
@@ -26,6 +27,7 @@ targets="$2"
2627
shift 2
2728

2829
scale_factor=""
30+
iterations=""
2931
remote_storage=""
3032
benchmark_id=""
3133

@@ -35,6 +37,10 @@ while [[ $# -gt 0 ]]; do
3537
scale_factor="$2"
3638
shift 2
3739
;;
40+
--iterations)
41+
iterations="$2"
42+
shift 2
43+
;;
3844
--remote-storage)
3945
remote_storage="$2"
4046
shift 2
@@ -91,6 +97,9 @@ if [[ -n "$scale_factor" ]]; then
9197
opts="--opt scale-factor=$scale_factor"
9298
fi
9399
fi
100+
if [[ -n "$iterations" ]]; then
101+
opts="-i $iterations $opts"
102+
fi
94103

95104
touch results.json
96105

.github/workflows/bench-pr.yml

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ jobs:
9393
env:
9494
RUST_BACKTRACE: full
9595
run: |
96-
target/release_debug/${{ matrix.benchmark.id }} -d gh-json -o results.json
96+
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} -d gh-json -o results.json
9797
9898
- name: Setup AWS CLI
9999
if: github.event.pull_request.head.repo.fork == false
@@ -152,3 +152,81 @@ jobs:
152152
secrets: inherit
153153
with:
154154
mode: "pr"
155+
benchmark_matrix: |
156+
[
157+
{
158+
"id": "clickbench-nvme",
159+
"subcommand": "clickbench",
160+
"name": "Clickbench on NVME",
161+
"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb"
162+
},
163+
{
164+
"id": "tpch-nvme",
165+
"subcommand": "tpch",
166+
"name": "TPC-H SF=1 on NVME",
167+
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
168+
"scale_factor": "1.0"
169+
},
170+
{
171+
"id": "tpch-s3",
172+
"subcommand": "tpch",
173+
"name": "TPC-H SF=1 on S3",
174+
"local_dir": "vortex-bench/data/tpch/1.0",
175+
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/1.0/",
176+
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
177+
"scale_factor": "1.0"
178+
},
179+
{
180+
"id": "tpch-nvme-10",
181+
"subcommand": "tpch",
182+
"name": "TPC-H SF=10 on NVME",
183+
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
184+
"scale_factor": "10.0"
185+
},
186+
{
187+
"id": "tpch-s3-10",
188+
"subcommand": "tpch",
189+
"name": "TPC-H SF=10 on S3",
190+
"local_dir": "vortex-bench/data/tpch/10.0",
191+
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/10.0/",
192+
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
193+
"scale_factor": "10.0"
194+
},
195+
{
196+
"id": "tpcds-nvme",
197+
"subcommand": "tpcds",
198+
"name": "TPC-DS SF=1 on NVME",
199+
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
200+
"scale_factor": "1.0"
201+
},
202+
{
203+
"id": "statpopgen",
204+
"subcommand": "statpopgen",
205+
"name": "Statistical and Population Genetics",
206+
"targets": "duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
207+
"scale_factor": "100"
208+
},
209+
{
210+
"id": "fineweb",
211+
"subcommand": "fineweb",
212+
"name": "FineWeb NVMe",
213+
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
214+
"scale_factor": "100"
215+
},
216+
{
217+
"id": "fineweb-s3",
218+
"subcommand": "fineweb",
219+
"name": "FineWeb S3",
220+
"local_dir": "vortex-bench/data/fineweb",
221+
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/fineweb/",
222+
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
223+
"scale_factor": "100"
224+
},
225+
{
226+
"id": "polarsignals",
227+
"subcommand": "polarsignals",
228+
"name": "PolarSignals Profiling",
229+
"targets": "datafusion:vortex",
230+
"scale_factor": "1"
231+
},
232+
]

.github/workflows/bench.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ jobs:
8989
env:
9090
RUST_BACKTRACE: full
9191
run: |
92-
target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json
92+
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json
9393
9494
- name: Setup AWS CLI
9595
uses: aws-actions/configure-aws-credentials@v5

.github/workflows/ci.yml

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,12 @@ jobs:
110110
uv run --all-packages pytest --benchmark-disable -n auto test/
111111
working-directory: vortex-python/
112112

113+
- name: Setup benchmark environment
114+
run: sudo bash scripts/setup-benchmark.sh
115+
113116
- name: Pytest Benchmarks - Vortex
114117
run: |
115-
uv run --all-packages pytest --benchmark-only benchmark/
118+
bash ../scripts/bench-taskset.sh uv run --all-packages pytest --benchmark-only benchmark/
116119
working-directory: vortex-python/
117120

118121
- name: Doctest - PyVortex
@@ -265,18 +268,17 @@ jobs:
265268
timeout-minutes: 120
266269
runs-on: >-
267270
${{ github.repository == 'vortex-data/vortex'
268-
&& format('runs-on={0}/runner=amd64-large/tag=rust-lint', github.run_id)
271+
&& format('runs-on={0}/runner=amd64-large/image=ubuntu24-full-x64-pre/tag=rust-lint', github.run_id)
269272
|| 'ubuntu-latest' }}
270273
steps:
271274
- uses: runs-on/action@v2
272275
if: github.repository == 'vortex-data/vortex'
273276
with:
274277
sccache: s3
275278
- uses: actions/checkout@v6
276-
- id: setup-rust
277-
uses: ./.github/actions/setup-rust
278-
with:
279-
repo-token: ${{ secrets.GITHUB_TOKEN }}
279+
- uses: ./.github/actions/setup-prebuild
280+
- name: Install protoc
281+
uses: ./.github/actions/setup-protoc
280282
- name: Install nightly for fmt
281283
run: rustup toolchain install $NIGHTLY_TOOLCHAIN --component rustfmt
282284
- name: Rust Lint - Format
@@ -621,11 +623,10 @@ jobs:
621623
matrix:
622624
include:
623625
- os: windows-x64
624-
runner: runs-on=${{ github.run_id }}/pool=windows-x64
626+
runner: runs-on=${{ github.run_id }}/pool=windows-x64-pre
625627
fallback_runner: windows-latest
626628
- os: linux-arm64
627-
runner: runs-on=${{ github.run_id }}/runner=arm64-medium/tag=rust-test-linux-arm64
628-
fallback_runner: ubuntu-24.04-arm
629+
runner: runs-on=${{ github.run_id }}/runner=arm64-medium/image=ubuntu24-full-arm64-pre/tag=rust-test-linux-arm64
629630
runs-on: >-
630631
${{ github.repository == 'vortex-data/vortex'
631632
&& matrix.runner
@@ -636,26 +637,13 @@ jobs:
636637
with:
637638
sccache: s3
638639
- uses: actions/checkout@v5
639-
- name: Install Visual Studio Build Tools (Windows)
640+
- name: Setup (Windows)
640641
if: matrix.os == 'windows-x64'
641642
run: |
642643
$flags = '-C debuginfo=0'
643644
echo "RUSTFLAGS=$flags" >> $env:GITHUB_ENV
644-
choco install visualstudio2022buildtools --package-parameters `
645-
"--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 --add Microsoft.VisualStudio.Component.Windows11SDK.26100 --passive" -y
646-
- name: Setup Python (Windows)
647-
if: matrix.os == 'windows-x64'
648-
uses: actions/setup-python@v5
649-
with:
650-
python-version: "3.11"
651-
- id: setup-rust
652-
uses: ./.github/actions/setup-rust
653-
with:
654-
repo-token: ${{ secrets.GITHUB_TOKEN }}
655-
- name: Install nextest
656-
uses: taiki-e/install-action@v2
657-
with:
658-
tool: nextest
645+
echo "C:\rust\cargo\bin" >> $env:GITHUB_PATH
646+
- uses: ./.github/actions/setup-prebuild
659647
- name: Rust Tests (Windows)
660648
if: matrix.os == 'windows-x64'
661649
run: |
@@ -743,7 +731,7 @@ jobs:
743731
- name: Run benchmarks
744732
uses: CodSpeedHQ/action@281164b0f014a4e7badd2c02cecad9b595b70537
745733
with:
746-
run: cargo codspeed run
734+
run: bash scripts/bench-taskset.sh cargo codspeed run
747735
token: ${{ secrets.CODSPEED_TOKEN }}
748736
mode: "simulation"
749737

.github/workflows/release-drafter.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
contents: write
2727
runs-on: ubuntu-latest
2828
steps:
29-
- uses: release-drafter/release-drafter@v6.1.0
29+
- uses: release-drafter/release-drafter@v6.4.0
3030
env:
3131
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3232
with:

.github/workflows/sql-benchmarks.yml

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ on:
2121
"id": "clickbench-nvme",
2222
"subcommand": "clickbench",
2323
"name": "Clickbench on NVME",
24-
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb"
24+
"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb"
2525
},
2626
{
2727
"id": "tpch-nvme",
@@ -37,7 +37,8 @@ on:
3737
"local_dir": "vortex-bench/data/tpch/1.0",
3838
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/1.0/",
3939
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
40-
"scale_factor": "1.0"
40+
"scale_factor": "1.0",
41+
"iterations": "10"
4142
},
4243
{
4344
"id": "tpch-nvme-10",
@@ -53,7 +54,8 @@ on:
5354
"local_dir": "vortex-bench/data/tpch/10.0",
5455
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/10.0/",
5556
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
56-
"scale_factor": "10.0"
57+
"scale_factor": "10.0",
58+
"iterations": "10"
5759
},
5860
{
5961
"id": "tpcds-nvme",
@@ -83,7 +85,8 @@ on:
8385
"local_dir": "vortex-bench/data/fineweb",
8486
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/fineweb/",
8587
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
86-
"scale_factor": "100"
88+
"scale_factor": "100",
89+
"iterations": "10"
8790
},
8891
{
8992
"id": "polarsignals",
@@ -198,7 +201,8 @@ jobs:
198201
OTEL_EXPORTER_OTLP_HEADERS: "${{ (inputs.mode != 'pr' || github.event.pull_request.head.repo.fork == false) && secrets.OTEL_EXPORTER_OTLP_HEADERS || '' }}"
199202
OTEL_RESOURCE_ATTRIBUTES: "bench-name=${{ matrix.id }}"
200203
run: |
201-
.github/scripts/run-sql-bench.sh "${{ matrix.subcommand }}" "${{ matrix.targets }}" \
204+
bash scripts/bench-taskset.sh .github/scripts/run-sql-bench.sh "${{ matrix.subcommand }}" "${{ matrix.targets }}" \
205+
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
202206
${{ matrix.scale_factor && format('--scale-factor {0}', matrix.scale_factor) || '' }}
203207
204208
- name: Run ${{ matrix.name }} benchmark (remote)
@@ -212,7 +216,8 @@ jobs:
212216
OTEL_EXPORTER_OTLP_HEADERS: "${{ (inputs.mode != 'pr' || github.event.pull_request.head.repo.fork == false) && secrets.OTEL_EXPORTER_OTLP_HEADERS || '' }}"
213217
OTEL_RESOURCE_ATTRIBUTES: "bench-name=${{ matrix.id }}"
214218
run: |
215-
.github/scripts/run-sql-bench.sh "${{ matrix.subcommand }}" "${{ matrix.targets }}" \
219+
bash scripts/bench-taskset.sh .github/scripts/run-sql-bench.sh "${{ matrix.subcommand }}" "${{ matrix.targets }}" \
220+
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
216221
--remote-storage "${{ matrix.remote_storage }}" \
217222
--benchmark-id "${{ matrix.id }}" \
218223
${{ matrix.scale_factor && format('--scale-factor {0}', matrix.scale_factor) || '' }}

0 commit comments

Comments
 (0)