Skip to content

Commit 72496da

Browse files
authored
ci: Consolidate Spark SQL test jobs to reduce CI time (#3271)
1 parent bbcca1f commit 72496da

1 file changed

Lines changed: 20 additions & 122 deletions

File tree

.github/workflows/spark_sql_test.yml

Lines changed: 20 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,11 @@ jobs:
101101
native/target
102102
key: ${{ runner.os }}-cargo-ci-${{ hashFiles('native/**/Cargo.lock', 'native/**/Cargo.toml') }}
103103

104-
spark-sql-auto-scan:
104+
spark-sql-test:
105105
needs: build-native
106106
strategy:
107107
matrix:
108108
os: [ubuntu-24.04]
109-
spark-version: [{short: '3.4', full: '3.4.3', java: 11}, {short: '3.5', full: '3.5.7', java: 11}, {short: '4.0', full: '4.0.1', java: 17}]
110109
module:
111110
- {name: "catalyst", args1: "catalyst/test", args2: ""}
112111
- {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
@@ -115,12 +114,23 @@ jobs:
115114
- {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
116115
- {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
117116
- {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
117+
# Test combinations:
118+
# - auto scan: all Spark versions (3.4, 3.5, 4.0)
119+
# - native_comet: Spark 3.4, 3.5
120+
# - native_iceberg_compat: Spark 3.5 only
121+
config:
122+
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'auto', scan-env: ''}
123+
- {spark-short: '3.5', spark-full: '3.5.7', java: 11, scan-impl: 'auto', scan-env: ''}
124+
- {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''}
125+
- {spark-short: '3.4', spark-full: '3.4.3', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'}
126+
- {spark-short: '3.5', spark-full: '3.5.7', java: 11, scan-impl: 'native_comet', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_comet'}
127+
- {spark-short: '3.5', spark-full: '3.5.7', java: 11, scan-impl: 'native_iceberg_compat', scan-env: 'COMET_PARQUET_SCAN_IMPL=native_iceberg_compat'}
118128
# Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946
119129
exclude:
120-
- spark-version: {short: '4.0', full: '4.0.1', java: 17}
130+
- config: {spark-short: '4.0', spark-full: '4.0.1', java: 17, scan-impl: 'auto', scan-env: ''}
121131
module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
122132
fail-fast: false
123-
name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.spark-version.java }}
133+
name: spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}/spark-${{ matrix.config.spark-full }}
124134
runs-on: ${{ matrix.os }}
125135
container:
126136
image: amd64/rust
@@ -130,7 +140,7 @@ jobs:
130140
uses: ./.github/actions/setup-builder
131141
with:
132142
rust-version: ${{env.RUST_VERSION}}
133-
jdk-version: ${{ matrix.spark-version.java }}
143+
jdk-version: ${{ matrix.config.java }}
134144
- name: Download native library
135145
uses: actions/download-artifact@v7
136146
with:
@@ -139,14 +149,14 @@ jobs:
139149
- name: Setup Spark
140150
uses: ./.github/actions/setup-spark-builder
141151
with:
142-
spark-version: ${{ matrix.spark-version.full }}
143-
spark-short-version: ${{ matrix.spark-version.short }}
152+
spark-version: ${{ matrix.config.spark-full }}
153+
spark-short-version: ${{ matrix.config.spark-short }}
144154
skip-native-build: true
145155
- name: Run Spark tests
146156
run: |
147157
cd apache-spark
148158
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
149-
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
159+
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ${{ matrix.config.scan-env }} ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
150160
build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
151161
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
152162
find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
@@ -157,125 +167,13 @@ jobs:
157167
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
158168
uses: actions/upload-artifact@v6
159169
with:
160-
name: fallback-log-spark-sql-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.spark-version.java }}
161-
path: "**/fallback.log"
162-
163-
spark-sql-native-native-comet:
164-
needs: build-native
165-
strategy:
166-
matrix:
167-
os: [ ubuntu-24.04 ]
168-
java-version: [ 11 ]
169-
spark-version: [ { short: '3.4', full: '3.4.3' }, { short: '3.5', full: '3.5.7' } ]
170-
module:
171-
- { name: "catalyst", args1: "catalyst/test", args2: "" }
172-
- { name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest }
173-
- { name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest" }
174-
- { name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest" }
175-
- { name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest" }
176-
- { name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest" }
177-
- { name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest" }
178-
fail-fast: false
179-
name: spark-sql-native-comet-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }}
180-
runs-on: ${{ matrix.os }}
181-
container:
182-
image: amd64/rust
183-
steps:
184-
- uses: actions/checkout@v6
185-
- name: Setup Rust & Java toolchain
186-
uses: ./.github/actions/setup-builder
187-
with:
188-
rust-version: ${{env.RUST_VERSION}}
189-
jdk-version: ${{ matrix.java-version }}
190-
- name: Download native library
191-
uses: actions/download-artifact@v7
192-
with:
193-
name: native-lib-linux
194-
path: native/target/release/
195-
- name: Setup Spark
196-
uses: ./.github/actions/setup-spark-builder
197-
with:
198-
spark-version: ${{ matrix.spark-version.full }}
199-
spark-short-version: ${{ matrix.spark-version.short }}
200-
skip-native-build: true
201-
- name: Run Spark tests
202-
run: |
203-
cd apache-spark
204-
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
205-
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=native_comet ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
206-
build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
207-
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
208-
find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
209-
fi
210-
env:
211-
LC_ALL: "C.UTF-8"
212-
- name: Upload fallback log
213-
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
214-
uses: actions/upload-artifact@v6
215-
with:
216-
name: fallback-log-spark-sql-native-comet-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.java-version }}
217-
path: "**/fallback.log"
218-
219-
spark-sql-native-iceberg-compat:
220-
needs: build-native
221-
strategy:
222-
matrix:
223-
os: [ubuntu-24.04]
224-
java-version: [11]
225-
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
226-
module:
227-
- {name: "catalyst", args1: "catalyst/test", args2: ""}
228-
- {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
229-
- {name: "sql_core-2", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.ExtendedSQLTest"}
230-
- {name: "sql_core-3", args1: "", args2: "sql/testOnly * -- -n org.apache.spark.tags.SlowSQLTest"}
231-
- {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
232-
- {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
233-
- {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
234-
fail-fast: false
235-
name: spark-sql-iceberg-compat-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.java-version }}
236-
runs-on: ${{ matrix.os }}
237-
container:
238-
image: amd64/rust
239-
steps:
240-
- uses: actions/checkout@v6
241-
- name: Setup Rust & Java toolchain
242-
uses: ./.github/actions/setup-builder
243-
with:
244-
rust-version: ${{env.RUST_VERSION}}
245-
jdk-version: ${{ matrix.java-version }}
246-
- name: Download native library
247-
uses: actions/download-artifact@v7
248-
with:
249-
name: native-lib-linux
250-
path: native/target/release/
251-
- name: Setup Spark
252-
uses: ./.github/actions/setup-spark-builder
253-
with:
254-
spark-version: ${{ matrix.spark-version.full }}
255-
spark-short-version: ${{ matrix.spark-version.short }}
256-
skip-native-build: true
257-
- name: Run Spark tests
258-
run: |
259-
cd apache-spark
260-
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
261-
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true COMET_PARQUET_SCAN_IMPL=native_iceberg_compat ENABLE_COMET_LOG_FALLBACK_REASONS=${{ github.event.inputs.collect-fallback-logs || 'false' }} \
262-
build/sbt -Dsbt.log.noformat=true ${{ matrix.module.args1 }} "${{ matrix.module.args2 }}"
263-
if [ "${{ github.event.inputs.collect-fallback-logs }}" = "true" ]; then
264-
find . -type f -name "unit-tests.log" -print0 | xargs -0 grep -h "Comet cannot accelerate" | sed 's/.*Comet cannot accelerate/Comet cannot accelerate/' | sort -u > fallback.log
265-
fi
266-
env:
267-
LC_ALL: "C.UTF-8"
268-
- name: Upload fallback log
269-
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
270-
uses: actions/upload-artifact@v6
271-
with:
272-
name: fallback-log-spark-sql-iceberg-compat-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.java-version }}
170+
name: fallback-log-spark-sql-${{ matrix.config.scan-impl }}-${{ matrix.module.name }}-spark-${{ matrix.config.spark-full }}
273171
path: "**/fallback.log"
274172

275173
merge-fallback-logs:
276174
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
277175
name: merge-fallback-logs
278-
needs: [ spark-sql-auto-scan, spark-sql-native-native-comet, spark-sql-native-iceberg-compat ]
176+
needs: [spark-sql-test]
279177
runs-on: ubuntu-24.04
280178
steps:
281179
- name: Download fallback log artifacts

0 commit comments

Comments
 (0)