Skip to content

Commit 5079069

Browse files
committed
Merge remote-tracking branch 'origin/develop' into ji/slots-for-arrays
Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk> # Conflicts: # encodings/alp/public-api.lock # encodings/alp/src/alp/array.rs # encodings/alp/src/alp_rd/array.rs # encodings/bytebool/public-api.lock # encodings/datetime-parts/public-api.lock # encodings/decimal-byte-parts/public-api.lock # encodings/fastlanes/public-api.lock # encodings/fsst/public-api.lock # encodings/pco/public-api.lock # encodings/runend/public-api.lock # encodings/sequence/public-api.lock # encodings/sparse/public-api.lock # encodings/zigzag/public-api.lock # encodings/zstd/public-api.lock # encodings/zstd/src/zstd_buffers.rs # vortex-array/public-api.lock # vortex-array/src/arrays/chunked/vtable/mod.rs # vortex-array/src/arrays/dict/vtable/mod.rs # vortex-array/src/arrays/filter/vtable.rs # vortex-array/src/arrays/shared/vtable.rs # vortex-array/src/arrays/slice/vtable.rs # vortex-array/src/arrays/variant/mod.rs # vortex-array/src/arrays/variant/vtable/mod.rs # vortex-array/src/executor.rs # vortex-array/src/vtable/mod.rs
2 parents d70ea13 + 2258dd3 commit 5079069

336 files changed

Lines changed: 22442 additions & 4256 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/scripts/fuzz_report/cli.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -363,8 +363,9 @@ def cmd_report(args: argparse.Namespace) -> int:
363363
print(f"Commented on #{existing_issue}", file=sys.stderr)
364364
_write_github_output("issue_number", str(existing_issue))
365365
else:
366-
fuzz_target = variables.get("FUZZ_TARGET", "unknown")
367-
title = f"Fuzzing Crash: {crash_info.error_variant} in {fuzz_target}"
366+
# Use FUZZ_NAME for the title (descriptive name), fall back to FUZZ_TARGET
367+
fuzz_name = variables.get("FUZZ_NAME") or variables.get("FUZZ_TARGET", "unknown")
368+
title = f"Fuzzing Crash: {crash_info.error_variant} in {fuzz_name}"
368369

369370
body = render_template(str(TEMPLATES_DIR / "new_issue.md"), variables, use_env=False)
370371
body_file = Path("issue_body.md")
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Dispatches benchmark workflows when benchmark labels are added to pull requests.
2+
# This is a separate workflow so that non-benchmark label events don't create
3+
# phantom check suites that obscure in-progress benchmark runs on the PR.
4+
5+
name: Benchmark Dispatch
6+
7+
on:
8+
pull_request:
9+
types: [labeled]
10+
branches: ["develop"]
11+
12+
permissions:
13+
actions: write
14+
contents: read
15+
pull-requests: write # for label removal and PR comments
16+
id-token: write # enables AWS-GitHub OIDC
17+
18+
jobs:
19+
remove-bench-label:
20+
runs-on: ubuntu-latest
21+
timeout-minutes: 2
22+
if: github.event.label.name == 'action/benchmark'
23+
steps:
24+
- uses: actions-ecosystem/action-remove-labels@v1
25+
if: github.event.pull_request.head.repo.full_name == 'vortex-data/vortex'
26+
with:
27+
labels: action/benchmark
28+
fail_on_error: true
29+
30+
bench:
31+
needs: remove-bench-label
32+
uses: ./.github/workflows/bench-pr.yml
33+
secrets: inherit
34+
35+
remove-sql-label:
36+
runs-on: ubuntu-latest
37+
timeout-minutes: 2
38+
if: github.event.label.name == 'action/benchmark-sql'
39+
steps:
40+
- uses: actions-ecosystem/action-remove-labels@v1
41+
if: github.event.pull_request.head.repo.full_name == 'vortex-data/vortex'
42+
with:
43+
labels: action/benchmark-sql
44+
fail_on_error: true
45+
46+
sql-bench:
47+
needs: remove-sql-label
48+
uses: ./.github/workflows/sql-pr.yml
49+
secrets: inherit

.github/workflows/bench-pr.yml

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# Runs all benchmarks once when we add the `action/benchmark` tag to a pull request.
1+
# Runs all benchmarks once for a pull request.
2+
# Called from bench-dispatch.yml when the `action/benchmark` label is added.
23

34
name: PR Benchmarks
45

@@ -9,32 +10,16 @@ concurrency:
910
cancel-in-progress: false
1011

1112
on:
12-
pull_request:
13-
types: [labeled, synchronize]
14-
branches: ["develop"]
13+
workflow_call: { }
1514
workflow_dispatch: { }
1615

1716
permissions:
18-
actions: write # for removing labels
1917
contents: read
2018
pull-requests: write # for commenting on PRs
2119
id-token: write # enables AWS-GitHub OIDC
2220

2321
jobs:
24-
label_trigger:
25-
runs-on: ubuntu-latest
26-
timeout-minutes: 120
27-
if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'action/benchmark' && github.event_name == 'pull_request' }}
28-
steps:
29-
# We remove the benchmark label first so that the workflow can be re-triggered.
30-
- uses: actions-ecosystem/action-remove-labels@v1
31-
if: ${{ github.event.pull_request.head.repo.full_name == 'vortex-data/vortex' }}
32-
with:
33-
labels: action/benchmark
34-
fail_on_error: true
35-
3622
bench:
37-
needs: label_trigger
3823
timeout-minutes: 120
3924
runs-on: >-
4025
${{ github.repository == 'vortex-data/vortex'
@@ -48,7 +33,6 @@ jobs:
4833
build_args: "--features lance"
4934
- id: compress-bench
5035
name: Compression
51-
if: ${{ contains(github.event.head_commit.message, '[benchmark]') || github.event.label.name == 'action/benchmark' && github.event_name == 'pull_request' }}
5236
steps:
5337
- uses: runs-on/action@v2
5438
if: github.event.pull_request.head.repo.fork == false
@@ -137,17 +121,16 @@ jobs:
137121
comment-tag: bench-pr-comment-${{ matrix.benchmark.id }}
138122

139123
- name: Comment PR on failure
140-
if: failure() && inputs.mode == 'pr' && github.event.pull_request.head.repo.fork == false
124+
if: failure() && github.event.pull_request.head.repo.fork == false
141125
uses: thollander/actions-comment-pull-request@v3
142126
with:
143127
message: |
144-
# 🚨🚨🚨❌❌❌ BENCHMARK FAILED ❌❌❌🚨🚨🚨
128+
# BENCHMARK FAILED
145129
146130
Benchmark `${{ matrix.benchmark.name }}` failed! Check the [workflow run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) for details.
147131
comment-tag: bench-pr-comment-${{ matrix.benchmark.id }}
148132

149133
sql:
150-
needs: label_trigger
151134
uses: ./.github/workflows/sql-benchmarks.yml
152135
secrets: inherit
153136
with:

.github/workflows/ci.yml

Lines changed: 137 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,9 @@ jobs:
6969
- name: Python Lint - PyRight
7070
env:
7171
MATURIN_PEP517_ARGS: "--profile dev"
72-
run: uv run basedpyright vortex-python
72+
run: |
73+
uv sync --all-packages
74+
uv run basedpyright vortex-python
7375
7476
python-test:
7577
name: "Python (test)"
@@ -379,51 +381,132 @@ jobs:
379381
flags: ${{ matrix.suite }}
380382
use_oidc: true
381383

382-
rust-test:
383-
name: "Rust tests (sanitizer)"
384-
timeout-minutes: 40
384+
rust-test-sanitizer:
385+
strategy:
386+
fail-fast: false
387+
matrix:
388+
include:
389+
- sanitizer: asan
390+
sanitizer_flags: "-Zsanitizer=address -Zsanitize=leak"
391+
- sanitizer: msan
392+
sanitizer_flags: "-Zsanitizer=memory"
393+
- sanitizer: tsan
394+
sanitizer_flags: "-Zsanitizer=thread"
395+
name: "Rust tests (${{ matrix.sanitizer }})"
385396
runs-on: >-
386397
${{ github.repository == 'vortex-data/vortex'
387398
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=rust-test-sanitizer', github.run_id)
388399
|| 'ubuntu-latest' }}
400+
timeout-minutes: 40
389401
env:
390-
# Add debug symbols and enable ASAN/LSAN with better output
391-
ASAN_OPTIONS: "symbolize=1:print_stats=1:check_initialization_order=1:detect_leaks=1:halt_on_error=0:verbosity=1:leak_check_at_exit=1"
392-
LSAN_OPTIONS: "verbosity=1:report_objects=1"
402+
ASAN_OPTIONS: "symbolize=1:check_initialization_order=1:detect_leaks=1:leak_check_at_exit=1"
403+
LSAN_OPTIONS: "report_objects=1"
393404
ASAN_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer"
394-
# Link against DuckDB debug build
395-
VX_DUCKDB_DEBUG: "1"
396-
# Keep frame pointers for better stack traces
397-
CARGO_PROFILE_DEV_DEBUG: "true"
398-
CARGO_PROFILE_TEST_DEBUG: "true"
399-
# Skip slow tests that are too expensive under sanitizer
405+
MSAN_OPTIONS: "symbolize=1"
406+
MSAN_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer"
407+
TSAN_OPTIONS: "symbolize=1"
408+
TSAN_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer"
400409
VORTEX_SKIP_SLOW_TESTS: "1"
410+
# -Cunsafe-allow-abi-mismatch=sanitizer: libraries like compiler_builtins
411+
# unset -Zsanitizer flag and we should allow that.
412+
RUSTFLAGS: "-A warnings -Cunsafe-allow-abi-mismatch=sanitizer --cfg disable_loom --cfg vortex_nightly -C debuginfo=2 -C opt-level=0 -C strip=none"
401413
steps:
402414
- uses: runs-on/action@v2
403415
if: github.repository == 'vortex-data/vortex'
404416
with:
405417
sccache: s3
406418
- uses: actions/checkout@v6
407419
- uses: ./.github/actions/setup-prebuild
408-
- name: Install nightly for sanitizer
420+
- name: Install Rust nightly toolchain
409421
run: |
410422
rustup toolchain install $NIGHTLY_TOOLCHAIN
411423
rustup component add --toolchain $NIGHTLY_TOOLCHAIN rust-src rustfmt clippy llvm-tools-preview
412-
- name: Rust Tests
413-
env:
414-
RUSTFLAGS: "-A warnings -Zsanitizer=address -Zsanitizer=leak --cfg disable_loom --cfg vortex_nightly -C debuginfo=2 -C opt-level=0 -C strip=none"
424+
export RUSTFLAGS="${RUSTFLAGS} ${{ matrix.sanitizer_flags }}"
425+
- name: Build tests with sanitizer
415426
run: |
416-
# Build with full debug info first (helps with caching)
417-
cargo +$NIGHTLY_TOOLCHAIN build --locked --all-features \
418-
--target x86_64-unknown-linux-gnu \
419-
-p vortex-buffer -p vortex-ffi -p vortex-fastlanes -p vortex-fsst -p vortex-alp -p vortex-array
420-
# Run tests with sanitizers and debug output
421-
cargo +$NIGHTLY_TOOLCHAIN nextest run \
422-
--locked \
423-
--all-features \
424-
--no-fail-fast \
425-
--target x86_64-unknown-linux-gnu \
426-
-p vortex-buffer -p vortex-ffi -p vortex-fastlanes -p vortex-fsst -p vortex-alp -p vortex-array
427+
cargo +$NIGHTLY_TOOLCHAIN build --locked --all-features \
428+
--target x86_64-unknown-linux-gnu -Zbuild-std \
429+
-p vortex-buffer -p vortex-fastlanes -p vortex-fsst -p vortex-alp -p vortex-array
430+
431+
- name: Run tests with sanitizer
432+
run: |
433+
cargo +$NIGHTLY_TOOLCHAIN nextest run --locked --all-features \
434+
--target x86_64-unknown-linux-gnu --no-fail-fast -Zbuild-std \
435+
-p vortex-buffer -p vortex-fastlanes -p vortex-fsst -p vortex-alp -p vortex-array
436+
437+
# vortex-ffi requires --no-default-features as otherwise we pull in
438+
# Mimalloc which interferes with sanitizers
439+
# cargo nextest reports less sanitizer issues than cargo test
440+
# TODO(myrrc): remove --no-default-features once we make Mimalloc opt-in
441+
- name: Run vortex-ffi tests with sanitizer
442+
run: |
443+
cargo +$NIGHTLY_TOOLCHAIN test --locked --no-default-features \
444+
--target x86_64-unknown-linux-gnu --no-fail-fast -Zbuild-std \
445+
-p vortex-ffi -- --no-capture
446+
447+
rust-ffi-test-sanitizer:
448+
strategy:
449+
fail-fast: false
450+
matrix:
451+
include:
452+
# We don't run memory sanitizer as it's clang-only and provides many
453+
# false positives for Catch2
454+
- sanitizer: asan
455+
sanitizer_flags: "-Zsanitizer=address -Zsanitize=leak"
456+
- sanitizer: tsan
457+
sanitizer_flags: "-Zsanitizer=thread"
458+
name: "Rust/C++ FFI tests (${{ matrix.sanitizer }})"
459+
timeout-minutes: 40
460+
env:
461+
ASAN_OPTIONS: "symbolize=1:check_initialization_order=1:detect_leaks=1:leak_check_at_exit=1"
462+
LSAN_OPTIONS: "report_objects=1"
463+
ASAN_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer"
464+
MSAN_OPTIONS: "symbolize=1"
465+
MSAN_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer"
466+
TSAN_OPTIONS: "symbolize=1"
467+
TSAN_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer"
468+
VORTEX_SKIP_SLOW_TESTS: "1"
469+
# -Cunsafe-allow-abi-mismatch=sanitizer: libraries like compiler_builtins
470+
# unset -Zsanitizer flag and we should allow that.
471+
runs-on: >-
472+
${{ github.repository == 'vortex-data/vortex'
473+
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=rust-ffi-test-sanitizer', github.run_id)
474+
|| 'ubuntu-latest' }}
475+
steps:
476+
- uses: runs-on/action@v2
477+
if: github.repository == 'vortex-data/vortex'
478+
with:
479+
sccache: s3
480+
- uses: actions/checkout@v6
481+
- uses: ./.github/actions/setup-prebuild
482+
- name: Install rustfilt
483+
run: |
484+
cargo install rustfilt
485+
- name: Install Rust nightly toolchain
486+
run: |
487+
rustup toolchain install $NIGHTLY_TOOLCHAIN
488+
rustup component add --toolchain $NIGHTLY_TOOLCHAIN rust-src rustfmt clippy llvm-tools-preview
489+
490+
# Export flags here so that rustfilt won't be built with sanitizers
491+
export RUSTFLAGS="-A warnings -Cunsafe-allow-abi-mismatch=sanitizer \
492+
--cfg disable_loom --cfg vortex_nightly -C debuginfo=2 \
493+
-C opt-level=0 -C strip=none -Zexternal-clangrt \
494+
${{ matrix.sanitizer_flags }}"
495+
- name: Build FFI library
496+
run: |
497+
# TODO(myrrc): remove --no-default-features
498+
cargo +$NIGHTLY_TOOLCHAIN build --locked --no-default-features \
499+
--target x86_64-unknown-linux-gnu -Zbuild-std \
500+
-p vortex-ffi
501+
- name: Build FFI library tests
502+
run: |
503+
cd vortex-ffi
504+
cmake -Bbuild -DBUILD_TESTS=1 -DSANITIZER=${{ matrix.sanitizer }} -DTARGET_TRIPLE="x86_64-unknown-linux-gnu"
505+
cmake --build build -j
506+
- name: Run tests
507+
run: |
508+
set -o pipefail
509+
./vortex-ffi/build/test/vortex_ffi_test 2>&1 | rustfilt -i-
427510
428511
cuda-build-lint:
429512
if: github.repository == 'vortex-data/vortex'
@@ -708,7 +791,7 @@ jobs:
708791
mkdir -p vortex-cxx/build
709792
cmake -S vortex-cxx -B vortex-cxx/build -DVORTEX_ENABLE_TESTING=ON -DVORTEX_ENABLE_ASAN=ON
710793
cmake --build vortex-cxx/build --parallel $(nproc)
711-
ctest --test-dir vortex-cxx/build -V
794+
ctest --test-dir vortex-cxx/build -j $(nproc) -V
712795
- name: Build and run the example in release mode
713796
run: |
714797
cmake -S vortex-cxx/examples -B vortex-cxx/examples/build -DCMAKE_BUILD_TYPE=Release
@@ -821,6 +904,31 @@ jobs:
821904
run: |
822905
find flatbuffers/ -type f -name "*.fbs" | sed 's/^flatbuffers\///' | xargs -I{} -n1 flatc -I flatbuffers.HEAD --conform-includes flatbuffers --conform flatbuffers/{} flatbuffers.HEAD/{}
823906
907+
ffi-c-test:
908+
name: "C API test build"
909+
timeout-minutes: 10
910+
runs-on: >-
911+
${{ github.repository == 'vortex-data/vortex'
912+
&& format('runs-on={0}/runner=amd64-medium/image=ubuntu24-full-x64-pre-v2/tag=cxx-build', github.run_id)
913+
|| 'ubuntu-latest' }}
914+
steps:
915+
- uses: runs-on/action@v2
916+
if: github.repository == 'vortex-data/vortex'
917+
with:
918+
sccache: s3
919+
- uses: actions/checkout@v6
920+
- uses: ./.github/actions/setup-prebuild
921+
- name: "regenerate FFI header file"
922+
run: |
923+
cargo +$NIGHTLY_TOOLCHAIN build -p vortex-ffi
924+
- name: Build and run C++ unit tests
925+
run: |
926+
cd vortex-ffi
927+
mkdir build
928+
cmake -Bbuild
929+
cmake --build build -j $(nproc)
930+
ctest --test-dir build -j $(nproc)
931+
824932
check-java-publish-build:
825933
runs-on: ${{ matrix.target.runs-on }}
826934
container:

.github/workflows/compat-gen-upload.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
name: Compat Fixture Upload
22

33
on:
4+
workflow_call:
5+
inputs:
6+
git_ref:
7+
description: "Git ref for version detection (e.g. v0.62.0). Defaults to HEAD."
8+
required: false
9+
type: string
10+
confirm_upload:
11+
description: "Set to 'yes' to confirm upload."
12+
required: true
13+
type: string
414
workflow_dispatch:
515
inputs:
616
git_ref:

.github/workflows/docs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949
mkdir -p docs/_static/vortex-jni
5050
mkdir -p docs/_static/vortex-spark
5151
cp -r java/vortex-jni/build/docs/javadoc/* docs/_static/vortex-jni/
52-
cp -r java/vortex-spark/build/docs/javadoc/* docs/_static/vortex-spark/
52+
cp -r java/vortex-spark/build/vortex-spark_2.13/docs/javadoc/* docs/_static/vortex-spark/
5353
- name: build Python and Rust docs
5454
run: |
5555
uv run --all-packages make -C docs html

0 commit comments

Comments
 (0)