Skip to content

Commit 178e43f

Browse files
authored
Merge branch 'develop' into integration-clickhouse-benchmark-baseline
Signed-off-by: Peng Jian <pengjian.uestc@gmail.com>
2 parents 9a1eac8 + 01669a0 commit 178e43f

682 files changed

Lines changed: 18547 additions & 9820 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
name: "Build and verify rebuild is a no-op"
2+
description: "Run a cargo command, then re-run it to verify all artifacts are cached"
3+
inputs:
4+
command:
5+
description: "The cargo build command to run and verify (--message-format json is appended on the verification run)"
6+
required: true
7+
runs:
8+
using: "composite"
9+
steps:
10+
- name: "Build"
11+
shell: bash
12+
run: ${{ inputs.command }}
13+
- name: "Verify rebuild is a no-op"
14+
shell: bash
15+
run: |
16+
stale=$(${{ inputs.command }} \
17+
--message-format json 2>/dev/null \
18+
| jq -r 'select(.reason == "compiler-artifact" and .fresh == false) | .target.name')
19+
if [ -n "$stale" ]; then
20+
echo "ERROR: Rebuild recompiled crates that should have been cached:"
21+
echo "$stale"
22+
exit 1
23+
fi
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: "Setup Prebuild"
2+
description: "Minimal setup for runners with pre-built Rust toolchain, nextest, and sccache"
3+
4+
runs:
5+
using: "composite"
6+
steps:
7+
- name: Configure sccache timeout
8+
shell: bash
9+
run: |
10+
mkdir -p ~/.config/sccache
11+
echo 'server_startup_timeout_ms = 15000' > ~/.config/sccache/config
12+
13+
- name: Pre-start sccache server
14+
shell: bash
15+
run: sccache --start-server &

.github/actions/setup-rust/action.yml

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,25 +38,51 @@ runs:
3838
if: runner.os == 'Linux'
3939
uses: rui314/setup-mold@v1
4040

41+
- name: Check for rustup
42+
id: check-rustup
43+
shell: bash
44+
run: echo "exists=$(command -v rustup &> /dev/null && echo 'true' || echo 'false')" >> $GITHUB_OUTPUT
45+
4146
- name: Rust Toolchain
4247
id: rust-toolchain
4348
uses: dtolnay/rust-toolchain@stable
44-
if: steps.rustup-cache.outputs.cache-hit != 'true'
49+
if: steps.check-rustup.outputs.exists != 'true'
4550
with:
4651
toolchain: "${{ steps.toolchain-config.outputs.toolchain }}"
4752
targets: "${{ inputs.targets }}"
4853
components: "${{ inputs.components }}"
4954

55+
- name: Install additional targets and components
56+
if: steps.check-rustup.outputs.exists == 'true'
57+
shell: bash
58+
run: |
59+
rustup toolchain install "$TOOLCHAIN"
60+
[[ -n "$TARGETS" ]] && rustup target add --toolchain "$TOOLCHAIN" ${TARGETS//,/ }
61+
[[ -n "$COMPONENTS" ]] && rustup component add --toolchain "$TOOLCHAIN" ${COMPONENTS//,/ }
62+
env:
63+
TOOLCHAIN: ${{ steps.toolchain-config.outputs.toolchain }}
64+
TARGETS: ${{ inputs.targets }}
65+
COMPONENTS: ${{ inputs.components }}
66+
67+
- name: Configure sccache timeout
68+
if: inputs.enable-sccache == 'true'
69+
shell: bash
70+
run: |
71+
mkdir -p ~/.config/sccache
72+
echo 'server_startup_timeout_ms = 15000' > ~/.config/sccache/config
73+
5074
- name: Rust Compile Cache
5175
if: inputs.enable-sccache == 'true'
5276
uses: mozilla-actions/sccache-action@v0.0.9
5377

78+
- name: Pre-start sccache server
79+
if: inputs.enable-sccache == 'true'
80+
shell: bash
81+
run: sccache --start-server &
82+
5483
- name: Install Protoc (for lance-encoding build step)
5584
if: runner.os != 'Windows'
56-
uses: arduino/setup-protoc@v3
57-
with:
58-
version: "29.3"
59-
repo-token: ${{ inputs.repo-token }}
85+
uses: ./.github/actions/setup-protoc
6086

6187
- name: Install Ninja (for DuckDB build system)
6288
uses: seanmiddleditch/gha-setup-ninja@master

.github/runs-on.yml

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1 @@
1-
images:
2-
vortex-ci-amd64:
3-
platform: "linux"
4-
arch: "x64"
5-
name: "vortex-ci-*"
6-
owner: "245040174862"
7-
vortex-ci-arm64:
8-
platform: "linux"
9-
arch: "arm64"
10-
name: "vortex-ci-*"
11-
owner: "245040174862"
1+
_extends: .github-private

.github/scripts/fuzz_report/templates/new_issue.md

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
**Crash Location**: `{{CRASH_LOCATION}}`
66

77
**Error Message**:
8+
89
```
910
{{PANIC_MESSAGE}}
1011
```
@@ -15,6 +16,7 @@
1516
```
1617
{{STACK_TRACE_RAW}}
1718
```
19+
1820
</details>
1921
{% if CLAUDE_ANALYSIS %}
2022

@@ -31,23 +33,45 @@
3133
- **Commit**: {{COMMIT}}
3234
- **Crash Artifact**: {{ARTIFACT_URL}}
3335

34-
### Reproduction
36+
### Reproduce
37+
38+
```bash
39+
cargo +nightly fuzz run -D --sanitizer=none {{FUZZ_TARGET}} ./fuzz/artifacts/{{FUZZ_TARGET}}/{{CRASH_FILE}} -- -rss_limit_mb=0
40+
```
41+
42+
<details>
43+
<summary>First-time setup: download and extract the crash artifact</summary>
3544

3645
1. Download the crash artifact:
3746
- **Direct download**: {{ARTIFACT_URL}}
38-
- Extract the zip file
47+
- Extract the zip file (`unzip`)
48+
- The path should look like `/path/to/{{FUZZ_TARGET}}/{{CRASH_FILE}}`
49+
- You can create a `./fuzz/artifacts` directory that will be git-ignored in the `vortex` repo
50+
- Full path would be `./fuzz/artifacts/{{FUZZ_TARGET}}/{{CRASH_FILE}}`
51+
52+
2. Assuming you download the zipfile to `~/Downloads`, and your working directory is the repository root:
3953

40-
2. Reproduce locally:
4154
```bash
42-
cargo +nightly fuzz run -D --sanitizer=none {{FUZZ_TARGET}} {{FUZZ_TARGET}}/{{CRASH_FILE}} -- -rss_limit_mb=0
55+
mkdir -p ./fuzz/artifacts
56+
mv ~/Downloads/{{FUZZ_TARGET}}-crash-artifacts.zip ./fuzz/artifacts/
57+
unzip ./fuzz/artifacts/{{FUZZ_TARGET}}-crash-artifacts.zip -d ./fuzz/artifacts/
58+
rm ./fuzz/artifacts/{{FUZZ_TARGET}}-crash-artifacts.zip
59+
```
60+
61+
3. Get a backtrace:
62+
63+
```bash
64+
RUST_BACKTRACE=1 cargo +nightly fuzz run -D --sanitizer=none {{FUZZ_TARGET}} ./fuzz/artifacts/{{FUZZ_TARGET}}/{{CRASH_FILE}} -- -rss_limit_mb=0
4365
```
4466

45-
3. Get full backtrace:
4667
```bash
47-
RUST_BACKTRACE=full cargo +nightly fuzz run -D --sanitizer=none {{FUZZ_TARGET}} {{FUZZ_TARGET}}/{{CRASH_FILE}} -- -rss_limit_mb=0
68+
RUST_BACKTRACE=full cargo +nightly fuzz run -D --sanitizer=none {{FUZZ_TARGET}} ./fuzz/artifacts/{{FUZZ_TARGET}}/{{CRASH_FILE}} -- -rss_limit_mb=0
4869
```
4970

71+
</details>
72+
5073
<!-- seed_hash:{{SEED_HASH}} stack_hash:{{STACK_TRACE_HASH}} message_hash:{{MESSAGE_HASH}} -->
5174

5275
---
53-
*Auto-created by fuzzing workflow*
76+
77+
_Auto-created by fuzzing workflow_

.github/scripts/run-sql-bench.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#
1616
# Options:
1717
# --scale-factor <sf> Scale factor for the benchmark (e.g., 1.0, 10.0)
18+
# --iterations <n> Number of iterations to pass to each benchmark binary
1819
# --remote-storage <url> Remote storage URL (e.g., s3://bucket/path/)
1920
# If provided, runs in remote mode (no lance/clickhouse support).
2021
# --benchmark-id <id> Benchmark ID for error messages (e.g., tpch-s3)
@@ -26,6 +27,7 @@ targets="$2"
2627
shift 2
2728

2829
scale_factor=""
30+
iterations=""
2931
remote_storage=""
3032
benchmark_id=""
3133

@@ -35,6 +37,10 @@ while [[ $# -gt 0 ]]; do
3537
scale_factor="$2"
3638
shift 2
3739
;;
40+
--iterations)
41+
iterations="$2"
42+
shift 2
43+
;;
3844
--remote-storage)
3945
remote_storage="$2"
4046
shift 2
@@ -92,6 +98,9 @@ if [[ -n "$scale_factor" ]]; then
9298
opts="--opt scale-factor=$scale_factor"
9399
fi
94100
fi
101+
if [[ -n "$iterations" ]]; then
102+
opts="-i $iterations $opts"
103+
fi
95104

96105
touch results.json
97106

.github/workflows/bench-pr.yml

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ jobs:
5757
- uses: actions/checkout@v6
5858
with:
5959
ref: ${{ github.event.pull_request.head.sha }}
60+
- name: Setup benchmark environment
61+
run: sudo bash scripts/setup-benchmark.sh
6062
- uses: ./.github/actions/setup-rust
6163
with:
6264
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -91,7 +93,7 @@ jobs:
9193
env:
9294
RUST_BACKTRACE: full
9395
run: |
94-
target/release_debug/${{ matrix.benchmark.id }} -d gh-json -o results.json
96+
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} -d gh-json -o results.json
9597
9698
- name: Setup AWS CLI
9799
if: github.event.pull_request.head.repo.fork == false
@@ -150,3 +152,81 @@ jobs:
150152
secrets: inherit
151153
with:
152154
mode: "pr"
155+
benchmark_matrix: |
156+
[
157+
{
158+
"id": "clickbench-nvme",
159+
"subcommand": "clickbench",
160+
"name": "Clickbench on NVME",
161+
"targets": "datafusion:parquet,datafusion:vortex,duckdb:parquet,duckdb:vortex,duckdb:duckdb"
162+
},
163+
{
164+
"id": "tpch-nvme",
165+
"subcommand": "tpch",
166+
"name": "TPC-H SF=1 on NVME",
167+
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
168+
"scale_factor": "1.0"
169+
},
170+
{
171+
"id": "tpch-s3",
172+
"subcommand": "tpch",
173+
"name": "TPC-H SF=1 on S3",
174+
"local_dir": "vortex-bench/data/tpch/1.0",
175+
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/1.0/",
176+
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
177+
"scale_factor": "1.0"
178+
},
179+
{
180+
"id": "tpch-nvme-10",
181+
"subcommand": "tpch",
182+
"name": "TPC-H SF=10 on NVME",
183+
"targets": "datafusion:arrow,datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
184+
"scale_factor": "10.0"
185+
},
186+
{
187+
"id": "tpch-s3-10",
188+
"subcommand": "tpch",
189+
"name": "TPC-H SF=10 on S3",
190+
"local_dir": "vortex-bench/data/tpch/10.0",
191+
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/tpch/10.0/",
192+
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
193+
"scale_factor": "10.0"
194+
},
195+
{
196+
"id": "tpcds-nvme",
197+
"subcommand": "tpcds",
198+
"name": "TPC-DS SF=1 on NVME",
199+
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact,duckdb:duckdb",
200+
"scale_factor": "1.0"
201+
},
202+
{
203+
"id": "statpopgen",
204+
"subcommand": "statpopgen",
205+
"name": "Statistical and Population Genetics",
206+
"targets": "duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
207+
"scale_factor": "100"
208+
},
209+
{
210+
"id": "fineweb",
211+
"subcommand": "fineweb",
212+
"name": "FineWeb NVMe",
213+
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
214+
"scale_factor": "100"
215+
},
216+
{
217+
"id": "fineweb-s3",
218+
"subcommand": "fineweb",
219+
"name": "FineWeb S3",
220+
"local_dir": "vortex-bench/data/fineweb",
221+
"remote_storage": "s3://vortex-ci-benchmark-datasets/${{github.ref_name}}/${{github.run_id}}/fineweb/",
222+
"targets": "datafusion:parquet,datafusion:vortex,datafusion:vortex-compact,duckdb:parquet,duckdb:vortex,duckdb:vortex-compact",
223+
"scale_factor": "100"
224+
},
225+
{
226+
"id": "polarsignals",
227+
"subcommand": "polarsignals",
228+
"name": "PolarSignals Profiling",
229+
"targets": "datafusion:vortex",
230+
"scale_factor": "1"
231+
},
232+
]

.github/workflows/bench.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ jobs:
5454
with:
5555
sccache: s3
5656
- uses: actions/checkout@v6
57+
- name: Setup benchmark environment
58+
run: sudo bash scripts/setup-benchmark.sh
5759
- uses: ./.github/actions/setup-rust
5860
with:
5961
repo-token: ${{ secrets.GITHUB_TOKEN }}
@@ -87,7 +89,7 @@ jobs:
8789
env:
8890
RUST_BACKTRACE: full
8991
run: |
90-
target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json
92+
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json
9193
9294
- name: Setup AWS CLI
9395
uses: aws-actions/configure-aws-credentials@v5

0 commit comments

Comments
 (0)