@@ -101,12 +101,11 @@ jobs:
101101 native/target
102102 key : ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
103103
104- spark-sql-auto-scan :
104+ spark-sql-test :
105105 needs : build-native
106106 strategy :
107107 matrix :
108108 os : [ubuntu-24.04]
109- spark-version : [{short: '3.4', full: '3.4.3', java: 11}, {short: '3.5', full: '3.5.7', java: 11}, {short: '4.0', full: '4.0.1', java: 17}]
110109 module :
111110 - {name: "catalyst", args1: "catalyst/test", args2: ""}
112111 - {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
@@ -115,12 +114,23 @@ jobs:
115114 - {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
116115 - {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
117116 - {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
117+ # Test combinations:
118+ # - auto scan: all Spark versions (3.4, 3.5, 4.0)
119+ # - native_comet: Spark 3.4, 3.5
120+ # - native_iceberg_compat: Spark 3.5 only
121+ config :
122+ - {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto', scan-env: ''}
123+ - {spark-short: '3.5', spark-full: '3.5.7', java: 11, scan-impl: 'auto', scan-env: ''}
124+ - {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''}
125+ - {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'}
126+ - {spark-short: '3.5', spark-full: '3.5.7', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'}
127+ - {spark-short: '3.5', spark-full: '3.5.7', java: 11, scan-impl: 'native_iceberg_compat', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_iceberg_compat'}
118128 # Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946
119129 exclude :
120- - spark-version : {short: '4.0', full: '4.0.1', java: 17}
130+ - config : {spark- short: '4.0', spark- full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: '' }
121131 module : {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
122132 fail-fast : false
123- name : spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark -${{ matrix.spark-version.full }}/java -${{ matrix.spark-version.java }}
133+ name : spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark -${{ matrix.config. spark-full }}
124134 runs-on : ${{ matrix.os }}
125135 container :
126136 image : amd64/rust
@@ -130,7 +140,7 @@ jobs:
130140 uses : ./.github/actions/setup-builder
131141 with :
132142 rust-version : ${{env.RUST_VERSION}}
133- jdk-version : ${{ matrix.spark-version .java }}
143+ jdk-version : ${{ matrix.config .java }}
134144 - name : Download native library
135145 uses : actions/download-artifact@v7
136146 with :
@@ -139,14 +149,14 @@ jobs:
139149 - name : Setup Spark
140150 uses : ./.github/actions/setup-spark-builder
141151 with :
142- spark-version : ${{ matrix.spark-version. full }}
143- spark-short-version : ${{ matrix.spark-version. short }}
152+ spark-version : ${{ matrix.config. spark-full }}
153+ spark-short-version : ${{ matrix.config. spark-short }}
144154 skip-native-build : true
145155 - name : Run Spark tests
146156 run : |
147157 cd apache-spark
148158 rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
149- ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
159+ ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ${{ matrix.config.scan-env }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
150160 build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
151161 if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
152162 find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
@@ -157,125 +167,13 @@ jobs:
157167 if : ${{ github.event.inputs.collect-fallback-logs == 'true' }}
158168 uses : actions/upload-artifact@v6
159169 with :
160- name : fallback-log-spark-sql-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.spark-version.java }}
161- path : " **/fallback.log"
162-
163- spark-sql-native-native-comet :
164- needs : build-native
165- strategy :
166- matrix :
167- os : [ ubuntu-24.04 ]
168- java-version : [ 11 ]
169- spark-version : [ { short: '3.4', full: '3.4.3' }, { short: '3.5', full: '3.5.7' } ]
170- module :
171- - { name: "catalyst", args1: "catalyst/test", args2: "" }
172- - { name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest }
173- - { name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest" }
174- - { name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest" }
175- - { name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest" }
176- - { name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest" }
177- - { name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest" }
178- fail-fast : false
179- name : spark-sql-native-comet-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }}
180- runs-on : ${{ matrix.os }}
181- container :
182- image : amd64/rust
183- steps :
184- - uses : actions/checkout@v6
185- - name : Setup Rust & Java toolchain
186- uses : ./.github/actions/setup-builder
187- with :
188- rust-version : ${{env.RUST_VERSION}}
189- jdk-version : ${{ matrix.java-version }}
190- - name : Download native library
191- uses : actions/download-artifact@v7
192- with :
193- name : native-lib-linux
194- path : native/target/release/
195- - name : Setup Spark
196- uses : ./.github/actions/setup-spark-builder
197- with :
198- spark-version : ${{ matrix.spark-version.full }}
199- spark-short-version : ${{ matrix.spark-version.short }}
200- skip-native-build : true
201- - name : Run Spark tests
202- run : |
203- cd apache-spark
204- rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
205- ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=native_comet ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
206- build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
207- if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
208- find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
209- fi
210- env :
211- LC_ALL : " C.UTF-8"
212- - name : Upload fallback log
213- if : ${{ github.event.inputs.collect-fallback-logs == 'true' }}
214- uses : actions/upload-artifact@v6
215- with :
216- name : fallback-log-spark-sql-native-comet-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.java-version }}
217- path : " **/fallback.log"
218-
219- spark-sql-native-iceberg-compat :
220- needs : build-native
221- strategy :
222- matrix :
223- os : [ubuntu-24.04]
224- java-version : [11]
225- spark-version : [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
226- module :
227- - {name: "catalyst", args1: "catalyst/test", args2: ""}
228- - {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
229- - {name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"}
230- - {name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"}
231- - {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
232- - {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
233- - {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
234- fail-fast : false
235- name : spark-sql-iceberg-compat-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }}
236- runs-on : ${{ matrix.os }}
237- container :
238- image : amd64/rust
239- steps :
240- - uses : actions/checkout@v6
241- - name : Setup Rust & Java toolchain
242- uses : ./.github/actions/setup-builder
243- with :
244- rust-version : ${{env.RUST_VERSION}}
245- jdk-version : ${{ matrix.java-version }}
246- - name : Download native library
247- uses : actions/download-artifact@v7
248- with :
249- name : native-lib-linux
250- path : native/target/release/
251- - name : Setup Spark
252- uses : ./.github/actions/setup-spark-builder
253- with :
254- spark-version : ${{ matrix.spark-version.full }}
255- spark-short-version : ${{ matrix.spark-version.short }}
256- skip-native-build : true
257- - name : Run Spark tests
258- run : |
259- cd apache-spark
260- rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
261- ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=native_iceberg_compat ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
262- build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
263- if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
264- find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
265- fi
266- env :
267- LC_ALL : " C.UTF-8"
268- - name : Upload fallback log
269- if : ${{ github.event.inputs.collect-fallback-logs == 'true' }}
270- uses : actions/upload-artifact@v6
271- with :
272- name : fallback-log-spark-sql-iceberg-compat-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.java-version }}
170+ name : fallback-log-spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}-spark-${{ matrix.config.spark-full }}
273171 path : " **/fallback.log"
274172
275173 merge-fallback-logs :
276174 if : ${{ github.event.inputs.collect-fallback-logs == 'true' }}
277175 name : merge-fallback-logs
278- needs : [ spark-sql-auto-scan, spark-sql-native-native-comet, spark-sql-native-iceberg-compat ]
176+ needs : [spark-sql-test ]
279177 runs-on : ubuntu-24.04
280178 steps :
281179 - name : Download fallback log artifacts
0 commit comments