Skip to content

Commit f1e3866

Browse files
author
Kazantsev Maksim
committed
Merge remote-tracking branch 'origin/main' into to_csv
2 parents 78c2e23 + 408152e commit f1e3866

39 files changed

Lines changed: 1339 additions & 131 deletions

.github/actions/java-test/action.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,16 @@ inputs:
3737
description: 'Whether to upload test results including coverage to GitHub'
3838
required: false
3939
default: 'false'
40+
skip-native-build:
41+
description: 'Skip native build (when using pre-built artifact)'
42+
required: false
43+
default: 'false'
4044

4145
runs:
4246
using: "composite"
4347
steps:
4448
- name: Run Cargo release build
49+
if: ${{ inputs.skip-native-build != 'true' }}
4550
shell: bash
4651
# it is important that we run the Scala tests against a release build rather than a debug build
4752
# to make sure that no tests are relying on overflow checks that are present only in debug builds

.github/actions/setup-spark-builder/action.yaml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ inputs:
2424
spark-version:
2525
description: 'The Apache Spark version (e.g., 3.5.7) to build'
2626
required: true
27+
skip-native-build:
28+
description: 'Skip native build (when using pre-built artifact)'
29+
required: false
30+
default: 'false'
2731
runs:
2832
using: "composite"
2933
steps:
@@ -51,7 +55,15 @@ runs:
5155
restore-keys: |
5256
${{ runner.os }}-spark-sql-
5357
54-
- name: Build Comet
58+
- name: Build Comet (with native)
59+
if: ${{ inputs.skip-native-build != 'true' }}
5560
shell: bash
5661
run: |
5762
PROFILES="-Pspark-${{inputs.spark-short-version}}" make release
63+
64+
- name: Build Comet (Maven only, skip native)
65+
if: ${{ inputs.skip-native-build == 'true' }}
66+
shell: bash
67+
run: |
68+
# Native library should already be in native/target/release/
69+
./mvnw install -Prelease -DskipTests -Pspark-${{inputs.spark-short-version}}

.github/workflows/pr_build_linux.yml

Lines changed: 83 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,48 @@ env:
4646
RUST_VERSION: stable
4747

4848
jobs:
49-
50-
# Run Rust tests once per JDK version
49+
50+
# Build native library once and share with all test jobs
51+
build-native:
52+
name: Build Native Library
53+
runs-on: ubuntu-latest
54+
container:
55+
image: amd64/rust
56+
steps:
57+
- uses: actions/checkout@v6
58+
59+
- name: Setup Rust toolchain
60+
uses: ./.github/actions/setup-builder
61+
with:
62+
rust-version: ${{ env.RUST_VERSION }}
63+
jdk-version: 17 # JDK only needed for common module proto generation
64+
65+
- name: Cache Cargo
66+
uses: actions/cache@v4
67+
with:
68+
path: |
69+
~/.cargo/registry
70+
~/.cargo/git
71+
native/target
72+
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
73+
restore-keys: |
74+
${{ runner.os }}-cargo-ci-
75+
76+
- name: Build native library (CI profile)
77+
run: |
78+
cd native
79+
# CI profile: same overflow behavior as release, but faster compilation
80+
# (no LTO, parallel codegen)
81+
cargo build --profile ci
82+
83+
- name: Upload native library
84+
uses: actions/upload-artifact@v4
85+
with:
86+
name: native-lib-linux
87+
path: native/target/ci/libcomet.so
88+
retention-days: 1
89+
90+
# Run Rust tests (runs in parallel with build-native, uses debug builds)
5191
linux-test-rust:
5292
strategy:
5393
matrix:
@@ -60,15 +100,29 @@ jobs:
60100
image: amd64/rust
61101
steps:
62102
- uses: actions/checkout@v6
103+
63104
- name: Setup Rust & Java toolchain
64105
uses: ./.github/actions/setup-builder
65106
with:
66-
rust-version: ${{env.RUST_VERSION}}
107+
rust-version: ${{ env.RUST_VERSION }}
67108
jdk-version: ${{ matrix.java_version }}
109+
110+
- name: Cache Cargo
111+
uses: actions/cache@v4
112+
with:
113+
path: |
114+
~/.cargo/registry
115+
~/.cargo/git
116+
native/target
117+
key: ${{ runner.os }}-cargo-debug-java${{ matrix.java_version }}-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
118+
restore-keys: |
119+
${{ runner.os }}-cargo-debug-java${{ matrix.java_version }}-
120+
68121
- name: Rust test steps
69122
uses: ./.github/actions/rust-test
70-
123+
71124
linux-test:
125+
needs: build-native
72126
strategy:
73127
matrix:
74128
os: [ubuntu-latest]
@@ -134,6 +188,9 @@ jobs:
134188
org.apache.spark.sql.comet.ParquetEncryptionITCase
135189
org.apache.comet.exec.CometNativeReaderSuite
136190
org.apache.comet.CometIcebergNativeSuite
191+
- name: "csv"
192+
value: |
193+
org.apache.comet.csv.CometCsvNativeReadSuite
137194
- name: "exec"
138195
value: |
139196
org.apache.comet.exec.CometAggregateSuite
@@ -187,11 +244,31 @@ jobs:
187244

188245
steps:
189246
- uses: actions/checkout@v6
247+
190248
- name: Setup Rust & Java toolchain
191249
uses: ./.github/actions/setup-builder
192250
with:
193-
rust-version: ${{env.RUST_VERSION}}
251+
rust-version: ${{ env.RUST_VERSION }}
194252
jdk-version: ${{ matrix.profile.java_version }}
253+
254+
- name: Download native library
255+
uses: actions/download-artifact@v4
256+
with:
257+
name: native-lib-linux
258+
# Download to release/ since Maven's -Prelease expects libcomet.so there
259+
path: native/target/release/
260+
261+
# Restore cargo registry cache (for any cargo commands that might run)
262+
- name: Cache Cargo registry
263+
uses: actions/cache@v4
264+
with:
265+
path: |
266+
~/.cargo/registry
267+
~/.cargo/git
268+
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('native/**/Cargo.lock') }}
269+
restore-keys: |
270+
${{ runner.os }}-cargo-registry-
271+
195272
- name: Java test steps
196273
uses: ./.github/actions/java-test
197274
with:
@@ -200,3 +277,4 @@ jobs:
200277
maven_opts: ${{ matrix.profile.maven_opts }}
201278
scan_impl: ${{ matrix.profile.scan_impl }}
202279
upload-test-reports: true
280+
skip-native-build: true

.github/workflows/pr_build_macos.yml

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,48 @@ env:
4747

4848
jobs:
4949

50+
# Build native library once and share with all test jobs
51+
build-native:
52+
name: Build Native Library (macOS)
53+
runs-on: macos-14
54+
steps:
55+
- uses: actions/checkout@v6
56+
57+
- name: Setup Rust & Java toolchain
58+
uses: ./.github/actions/setup-macos-builder
59+
with:
60+
rust-version: ${{ env.RUST_VERSION }}
61+
jdk-version: 17
62+
jdk-architecture: aarch64
63+
protoc-architecture: aarch_64
64+
65+
- name: Cache Cargo
66+
uses: actions/cache@v4
67+
with:
68+
path: |
69+
~/.cargo/registry
70+
~/.cargo/git
71+
native/target
72+
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
73+
restore-keys: |
74+
${{ runner.os }}-cargo-ci-
75+
76+
- name: Build native library (CI profile)
77+
run: |
78+
cd native
79+
# CI profile: same overflow behavior as release, but faster compilation
80+
# (no LTO, parallel codegen)
81+
cargo build --profile ci
82+
83+
- name: Upload native library
84+
uses: actions/upload-artifact@v4
85+
with:
86+
name: native-lib-macos
87+
path: native/target/ci/libcomet.dylib
88+
retention-days: 1
89+
5090
macos-aarch64-test:
91+
needs: build-native
5192
strategy:
5293
matrix:
5394
os: [macos-14]
@@ -97,6 +138,9 @@ jobs:
97138
org.apache.spark.sql.comet.ParquetEncryptionITCase
98139
org.apache.comet.exec.CometNativeReaderSuite
99140
org.apache.comet.CometIcebergNativeSuite
141+
- name: "csv"
142+
value: |
143+
org.apache.comet.csv.CometCsvNativeReadSuite
100144
- name: "exec"
101145
value: |
102146
org.apache.comet.exec.CometAggregateSuite
@@ -146,13 +190,33 @@ jobs:
146190
runs-on: ${{ matrix.os }}
147191
steps:
148192
- uses: actions/checkout@v6
193+
149194
- name: Setup Rust & Java toolchain
150195
uses: ./.github/actions/setup-macos-builder
151196
with:
152-
rust-version: ${{env.RUST_VERSION}}
197+
rust-version: ${{ env.RUST_VERSION }}
153198
jdk-version: ${{ matrix.profile.java_version }}
154199
jdk-architecture: aarch64
155200
protoc-architecture: aarch_64
201+
202+
- name: Download native library
203+
uses: actions/download-artifact@v4
204+
with:
205+
name: native-lib-macos
206+
# Download to release/ since Maven's -Prelease expects libcomet.dylib there
207+
path: native/target/release/
208+
209+
# Restore cargo registry cache (for any cargo commands that might run)
210+
- name: Cache Cargo registry
211+
uses: actions/cache@v4
212+
with:
213+
path: |
214+
~/.cargo/registry
215+
~/.cargo/git
216+
key: ${{ runner.os }}-cargo-registry-${{ hashFiles('native/**/Cargo.lock') }}
217+
restore-keys: |
218+
${{ runner.os }}-cargo-registry-
219+
156220
- name: Set thread thresholds envs for spark test on macOS
157221
# see: https://github.com/apache/datafusion-comet/issues/2965
158222
shell: bash
@@ -161,9 +225,11 @@ jobs:
161225
echo "SPARK_TEST_SQL_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=256" >> $GITHUB_ENV
162226
echo "SPARK_TEST_HIVE_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV
163227
echo "SPARK_TEST_HIVE_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV
228+
164229
- name: Java test steps
165230
uses: ./.github/actions/java-test
166231
with:
167232
artifact_name: ${{ matrix.os }}-${{ matrix.profile.name }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
168233
suites: ${{ matrix.suite.name == 'sql' && matrix.profile.name == 'Spark 3.4, JDK 11, Scala 2.12' && '' || matrix.suite.value }}
169234
maven_opts: ${{ matrix.profile.maven_opts }}
235+
skip-native-build: true

.github/workflows/spark_sql_test.yml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,47 @@ env:
5252
RUST_VERSION: stable
5353

5454
jobs:
55+
56+
# Build native library once and share with all test jobs
57+
build-native:
58+
name: Build Native Library
59+
runs-on: ubuntu-24.04
60+
container:
61+
image: amd64/rust
62+
steps:
63+
- uses: actions/checkout@v6
64+
65+
- name: Setup Rust toolchain
66+
uses: ./.github/actions/setup-builder
67+
with:
68+
rust-version: ${{ env.RUST_VERSION }}
69+
jdk-version: 17
70+
71+
- name: Cache Cargo
72+
uses: actions/cache@v4
73+
with:
74+
path: |
75+
~/.cargo/registry
76+
~/.cargo/git
77+
native/target
78+
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
79+
restore-keys: |
80+
${{ runner.os }}-cargo-ci-
81+
82+
- name: Build native library (CI profile)
83+
run: |
84+
cd native
85+
cargo build --profile ci
86+
87+
- name: Upload native library
88+
uses: actions/upload-artifact@v4
89+
with:
90+
name: native-lib-linux
91+
path: native/target/ci/libcomet.so
92+
retention-days: 1
93+
5594
spark-sql-auto-scan:
95+
needs: build-native
5696
strategy:
5797
matrix:
5898
os: [ubuntu-24.04]
@@ -81,11 +121,17 @@ jobs:
81121
with:
82122
rust-version: ${{env.RUST_VERSION}}
83123
jdk-version: ${{ matrix.spark-version.java }}
124+
- name: Download native library
125+
uses: actions/download-artifact@v4
126+
with:
127+
name: native-lib-linux
128+
path: native/target/release/
84129
- name: Setup Spark
85130
uses: ./.github/actions/setup-spark-builder
86131
with:
87132
spark-version: ${{ matrix.spark-version.full }}
88133
spark-short-version: ${{ matrix.spark-version.short }}
134+
skip-native-build: true
89135
- name: Run Spark tests
90136
run: |
91137
cd apache-spark
@@ -105,6 +151,7 @@ jobs:
105151
path: "**/fallback.log"
106152

107153
spark-sql-native-native-comet:
154+
needs: build-native
108155
strategy:
109156
matrix:
110157
os: [ ubuntu-24.04 ]
@@ -130,11 +177,17 @@ jobs:
130177
with:
131178
rust-version: ${{env.RUST_VERSION}}
132179
jdk-version: ${{ matrix.java-version }}
180+
- name: Download native library
181+
uses: actions/download-artifact@v4
182+
with:
183+
name: native-lib-linux
184+
path: native/target/release/
133185
- name: Setup Spark
134186
uses: ./.github/actions/setup-spark-builder
135187
with:
136188
spark-version: ${{ matrix.spark-version.full }}
137189
spark-short-version: ${{ matrix.spark-version.short }}
190+
skip-native-build: true
138191
- name: Run Spark tests
139192
run: |
140193
cd apache-spark
@@ -154,6 +207,7 @@ jobs:
154207
path: "**/fallback.log"
155208

156209
spark-sql-native-iceberg-compat:
210+
needs: build-native
157211
strategy:
158212
matrix:
159213
os: [ubuntu-24.04]
@@ -179,11 +233,17 @@ jobs:
179233
with:
180234
rust-version: ${{env.RUST_VERSION}}
181235
jdk-version: ${{ matrix.java-version }}
236+
- name: Download native library
237+
uses: actions/download-artifact@v4
238+
with:
239+
name: native-lib-linux
240+
path: native/target/release/
182241
- name: Setup Spark
183242
uses: ./.github/actions/setup-spark-builder
184243
with:
185244
spark-version: ${{ matrix.spark-version.full }}
186245
spark-short-version: ${{ matrix.spark-version.short }}
246+
skip-native-build: true
187247
- name: Run Spark tests
188248
run: |
189249
cd apache-spark

0 commit comments

Comments
 (0)