Skip to content

Commit 97e9b2d

Browse files
andygroveclaude
andcommitted
Add CometPartitionColumnBenchmark for partition column scan performance
Adds a standalone benchmark that writes partitioned parquet tables and measures scan performance with 1 and 5 partition columns. Tests both reading data columns alongside partitions and reading partition columns themselves. This exercises the CometConstantVector path where constant columns are exported as 1-element Arrow arrays and expanded on the native side. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2 parents 19b12c1 + 48ebd28 commit 97e9b2d

227 files changed

Lines changed: 8581 additions & 1805 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/actions/java-test/action.yaml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ inputs:
3232
scan_impl:
3333
description: 'The default Parquet scan implementation'
3434
required: false
35-
default: 'native_comet'
35+
default: 'auto'
3636
upload-test-reports:
3737
description: 'Whether to upload test results including coverage to GitHub'
3838
required: false
@@ -146,7 +146,3 @@ runs:
146146
path: "**/target/surefire-reports/*.txt"
147147
retention-days: 7 # 1 week for test reports
148148
overwrite: true
149-
150-
- name: Upload coverage results
151-
if: ${{ inputs.upload-test-reports == 'true' }}
152-
uses: codecov/codecov-action@v5

.github/actions/setup-spark-builder/action.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ inputs:
2222
description: 'The Apache Spark short version (e.g., 3.5) to build'
2323
required: true
2424
spark-version:
25-
description: 'The Apache Spark version (e.g., 3.5.7) to build'
25+
description: 'The Apache Spark version (e.g., 3.5.8) to build'
2626
required: true
2727
skip-native-build:
2828
description: 'Skip native build (when using pre-built artifact)'

.github/workflows/docs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
python-version: "3.10"
4848

4949
- name: Setup Java
50-
uses: actions/setup-java@v4
50+
uses: actions/setup-java@v5
5151
with:
5252
distribution: 'temurin'
5353
java-version: '17'

.github/workflows/iceberg_spark_test.yml

Lines changed: 97 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,64 @@ env:
4646
RUST_VERSION: stable
4747

4848
jobs:
49+
# Build native library once and share with all test jobs
50+
build-native:
51+
if: contains(github.event.pull_request.title, '[iceberg]')
52+
name: Build Native Library
53+
runs-on: ubuntu-24.04
54+
container:
55+
image: amd64/rust
56+
steps:
57+
- uses: actions/checkout@v6
58+
59+
- name: Setup Rust & Java toolchain
60+
uses: ./.github/actions/setup-builder
61+
with:
62+
rust-version: ${{ env.RUST_VERSION }}
63+
jdk-version: 17
64+
65+
- name: Restore Cargo cache
66+
uses: actions/cache/restore@v5
67+
with:
68+
path: |
69+
~/.cargo/registry
70+
~/.cargo/git
71+
native/target
72+
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
73+
restore-keys: |
74+
${{ runner.os }}-cargo-ci-
75+
76+
- name: Build native library
77+
# Use CI profile for faster builds (no LTO) and to share cache with pr_build_linux.yml.
78+
run: |
79+
cd native && cargo build --profile ci
80+
81+
- name: Save Cargo cache
82+
uses: actions/cache/save@v5
83+
if: github.ref == 'refs/heads/main'
84+
with:
85+
path: |
86+
~/.cargo/registry
87+
~/.cargo/git
88+
native/target
89+
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
90+
91+
- name: Upload native library
92+
uses: actions/upload-artifact@v6
93+
with:
94+
name: native-lib-iceberg
95+
path: native/target/ci/libcomet.so
96+
retention-days: 1
97+
4998
iceberg-spark:
99+
needs: build-native
50100
if: contains(github.event.pull_request.title, '[iceberg]')
51101
strategy:
52102
matrix:
53103
os: [ubuntu-24.04]
54104
java-version: [11, 17]
55105
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
56-
spark-version: [{short: '3.5', full: '3.5.7'}]
106+
spark-version: [{short: '3.5', full: '3.5.8'}]
57107
scala-version: ['2.13']
58108
fail-fast: false
59109
name: iceberg-spark/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
@@ -69,10 +119,14 @@ jobs:
69119
with:
70120
rust-version: ${{env.RUST_VERSION}}
71121
jdk-version: ${{ matrix.java-version }}
122+
- name: Download native library
123+
uses: actions/download-artifact@v7
124+
with:
125+
name: native-lib-iceberg
126+
path: native/target/release/
72127
- name: Build Comet
73-
shell: bash
74128
run: |
75-
PROFILES="-Pspark-${{matrix.spark-version.short}} -Pscala-${{matrix.scala-version}}" make release
129+
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
76130
- name: Setup Iceberg
77131
uses: ./.github/actions/setup-iceberg-builder
78132
with:
@@ -86,13 +140,14 @@ jobs:
86140
-Pquick=true -x javadoc
87141
88142
iceberg-spark-extensions:
143+
needs: build-native
89144
if: contains(github.event.pull_request.title, '[iceberg]')
90145
strategy:
91146
matrix:
92147
os: [ubuntu-24.04]
93148
java-version: [11, 17]
94149
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
95-
spark-version: [{short: '3.5', full: '3.5.7'}]
150+
spark-version: [{short: '3.5', full: '3.5.8'}]
96151
scala-version: ['2.13']
97152
fail-fast: false
98153
name: iceberg-spark-extensions/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
@@ -108,10 +163,14 @@ jobs:
108163
with:
109164
rust-version: ${{env.RUST_VERSION}}
110165
jdk-version: ${{ matrix.java-version }}
166+
- name: Download native library
167+
uses: actions/download-artifact@v7
168+
with:
169+
name: native-lib-iceberg
170+
path: native/target/release/
111171
- name: Build Comet
112-
shell: bash
113172
run: |
114-
PROFILES="-Pspark-${{matrix.spark-version.short}} -Pscala-${{matrix.scala-version}}" make release
173+
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
115174
- name: Setup Iceberg
116175
uses: ./.github/actions/setup-iceberg-builder
117176
with:
@@ -125,13 +184,14 @@ jobs:
125184
-Pquick=true -x javadoc
126185
127186
iceberg-spark-runtime:
187+
needs: build-native
128188
if: contains(github.event.pull_request.title, '[iceberg]')
129189
strategy:
130190
matrix:
131191
os: [ubuntu-24.04]
132192
java-version: [11, 17]
133193
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
134-
spark-version: [{short: '3.5', full: '3.5.7'}]
194+
spark-version: [{short: '3.5', full: '3.5.8'}]
135195
scala-version: ['2.13']
136196
fail-fast: false
137197
name: iceberg-spark-runtime/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
@@ -147,10 +207,14 @@ jobs:
147207
with:
148208
rust-version: ${{env.RUST_VERSION}}
149209
jdk-version: ${{ matrix.java-version }}
210+
- name: Download native library
211+
uses: actions/download-artifact@v7
212+
with:
213+
name: native-lib-iceberg
214+
path: native/target/release/
150215
- name: Build Comet
151-
shell: bash
152216
run: |
153-
PROFILES="-Pspark-${{matrix.spark-version.short}} -Pscala-${{matrix.scala-version}}" make release
217+
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
154218
- name: Setup Iceberg
155219
uses: ./.github/actions/setup-iceberg-builder
156220
with:
@@ -164,13 +228,14 @@ jobs:
164228
-Pquick=true -x javadoc
165229
166230
iceberg-spark-rust:
231+
needs: build-native
167232
if: contains(github.event.pull_request.title, '[iceberg]')
168233
strategy:
169234
matrix:
170235
os: [ubuntu-24.04]
171236
java-version: [11, 17]
172237
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
173-
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
238+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}]
174239
scala-version: ['2.13']
175240
fail-fast: false
176241
name: iceberg-spark-rust/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
@@ -186,10 +251,14 @@ jobs:
186251
with:
187252
rust-version: ${{env.RUST_VERSION}}
188253
jdk-version: ${{ matrix.java-version }}
254+
- name: Download native library
255+
uses: actions/download-artifact@v7
256+
with:
257+
name: native-lib-iceberg
258+
path: native/target/release/
189259
- name: Build Comet
190-
shell: bash
191260
run: |
192-
PROFILES="-Pspark-${{matrix.spark-version.short}} -Pscala-${{matrix.scala-version}}" make release
261+
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
193262
- name: Setup Iceberg
194263
uses: ./.github/actions/setup-iceberg-rust-builder
195264
with:
@@ -203,13 +272,14 @@ jobs:
203272
-Pquick=true -x javadoc
204273
205274
iceberg-spark-extensions-rust:
275+
needs: build-native
206276
if: contains(github.event.pull_request.title, '[iceberg]')
207277
strategy:
208278
matrix:
209279
os: [ubuntu-24.04]
210280
java-version: [11, 17]
211281
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
212-
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
282+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}]
213283
scala-version: ['2.13']
214284
fail-fast: false
215285
name: iceberg-spark-extensions-rust/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
@@ -225,10 +295,14 @@ jobs:
225295
with:
226296
rust-version: ${{env.RUST_VERSION}}
227297
jdk-version: ${{ matrix.java-version }}
298+
- name: Download native library
299+
uses: actions/download-artifact@v7
300+
with:
301+
name: native-lib-iceberg
302+
path: native/target/release/
228303
- name: Build Comet
229-
shell: bash
230304
run: |
231-
PROFILES="-Pspark-${{matrix.spark-version.short}} -Pscala-${{matrix.scala-version}}" make release
305+
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
232306
- name: Setup Iceberg
233307
uses: ./.github/actions/setup-iceberg-rust-builder
234308
with:
@@ -242,13 +316,14 @@ jobs:
242316
-Pquick=true -x javadoc
243317
244318
iceberg-spark-runtime-rust:
319+
needs: build-native
245320
if: contains(github.event.pull_request.title, '[iceberg]')
246321
strategy:
247322
matrix:
248323
os: [ubuntu-24.04]
249324
java-version: [11, 17]
250325
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}, {short: '1.10', full: '1.10.0'}]
251-
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
326+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}]
252327
scala-version: ['2.13']
253328
fail-fast: false
254329
name: iceberg-spark-runtime-rust/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
@@ -264,10 +339,14 @@ jobs:
264339
with:
265340
rust-version: ${{env.RUST_VERSION}}
266341
jdk-version: ${{ matrix.java-version }}
342+
- name: Download native library
343+
uses: actions/download-artifact@v7
344+
with:
345+
name: native-lib-iceberg
346+
path: native/target/release/
267347
- name: Build Comet
268-
shell: bash
269348
run: |
270-
PROFILES="-Pspark-${{matrix.spark-version.short}} -Pscala-${{matrix.scala-version}}" make release
349+
./mvnw install -Prelease -DskipTests -Pspark-${{ matrix.spark-version.short }} -Pscala-${{ matrix.scala-version }}
271350
- name: Setup Iceberg
272351
uses: ./.github/actions/setup-iceberg-rust-builder
273352
with:

.github/workflows/pr_build_linux.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ jobs:
164164
- name: "Spark 3.4, JDK 11, Scala 2.12"
165165
java_version: "11"
166166
maven_opts: "-Pspark-3.4 -Pscala-2.12"
167-
scan_impl: "native_comet"
167+
scan_impl: "auto"
168168

169169
- name: "Spark 3.5.5, JDK 17, Scala 2.13"
170170
java_version: "17"
@@ -174,7 +174,7 @@ jobs:
174174
- name: "Spark 3.5.6, JDK 17, Scala 2.13"
175175
java_version: "17"
176176
maven_opts: "-Pspark-3.5 -Dspark.version=3.5.6 -Pscala-2.13"
177-
scan_impl: "native_comet"
177+
scan_impl: "auto"
178178

179179
- name: "Spark 3.5, JDK 17, Scala 2.12"
180180
java_version: "17"
@@ -250,6 +250,7 @@ jobs:
250250
- name: "expressions"
251251
value: |
252252
org.apache.comet.CometExpressionSuite
253+
org.apache.comet.CometSqlFileTestSuite
253254
org.apache.comet.CometExpressionCoverageSuite
254255
org.apache.comet.CometHashExpressionSuite
255256
org.apache.comet.CometTemporalExpressionSuite

.github/workflows/pr_build_macos.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ jobs:
193193
- name: "expressions"
194194
value: |
195195
org.apache.comet.CometExpressionSuite
196+
org.apache.comet.CometSqlFileTestSuite
196197
org.apache.comet.CometExpressionCoverageSuite
197198
org.apache.comet.CometHashExpressionSuite
198199
org.apache.comet.CometTemporalExpressionSuite

.github/workflows/spark_sql_test.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,12 @@ jobs:
116116
- {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
117117
# Test combinations:
118118
# - auto scan: all Spark versions (3.4, 3.5, 4.0)
119-
# - native_comet: Spark 3.4, 3.5
120119
# - native_iceberg_compat: Spark 3.5 only
121120
config:
122121
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto', scan-env: ''}
123-
- {spark-short: '3.5', spark-full: '3.5.7', java: 11, scan-impl: 'auto', scan-env: ''}
122+
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'auto', scan-env: ''}
123+
- {spark-short: '3.5', spark-full: '3.5.8', java: 11, scan-impl: 'native_iceberg_compat', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_iceberg_compat'}
124124
- {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''}
125-
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'}
126-
- {spark-short: '3.5', spark-full: '3.5.7', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'}
127-
- {spark-short: '3.5', spark-full: '3.5.7', java: 11, scan-impl: 'native_iceberg_compat', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_iceberg_compat'}
128125
# Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946
129126
exclude:
130127
- config: {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''}

.github/workflows/spark_sql_test_native_datafusion.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
matrix:
3636
os: [ubuntu-24.04]
3737
java-version: [11]
38-
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
38+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}]
3939
module:
4040
- {name: "catalyst", args1: "catalyst/test", args2: ""}
4141
- {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}

.github/workflows/spark_sql_test_native_iceberg_compat.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
matrix:
3636
os: [ubuntu-24.04]
3737
java-version: [11]
38-
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
38+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.8'}]
3939
module:
4040
- {name: "catalyst", args1: "catalyst/test", args2: ""}
4141
- {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}

0 commit comments

Comments
 (0)