Skip to content

Commit 48f7b03

Browse files
authored
ci: consolidate pr_build test matrix and switch triggers to allow-list (#4410)
1 parent ccb9175 commit 48f7b03

2 files changed

Lines changed: 141 additions & 99 deletions

File tree

.github/workflows/pr_build_linux.yml

Lines changed: 72 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,55 @@ concurrency:
2222
cancel-in-progress: true
2323

2424
on:
25+
# Allow-list of paths that affect this workflow. A change must match a positive
26+
# pattern (and not a trailing "!" exclusion) for the build to run. Editing
27+
# pr_build_macos.yml does not trigger this workflow, and vice versa.
2528
push:
2629
branches:
2730
- main
28-
paths-ignore:
29-
- "benchmarks/**"
30-
- "doc/**"
31-
- "docs/**"
32-
- "**.md"
33-
- "native/core/benches/**"
34-
- "native/spark-expr/benches/**"
35-
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
36-
- "spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
31+
paths:
32+
- "native/**"
33+
- "common/**"
34+
- "spark/**"
35+
- "spark-integration/**"
36+
- "pom.xml"
37+
- "**/pom.xml"
38+
- ".mvn/**"
39+
- "mvnw"
40+
- "Makefile"
41+
- "rust-toolchain.toml"
42+
- "dev/ci/**"
43+
- ".github/workflows/pr_build_linux.yml"
44+
- ".github/actions/setup-builder/**"
45+
- ".github/actions/java-test/**"
46+
- ".github/actions/rust-test/**"
47+
- "!**.md"
48+
- "!native/core/benches/**"
49+
- "!native/spark-expr/benches/**"
50+
- "!spark/src/test/scala/org/apache/spark/sql/benchmark/**"
51+
- "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
3752
pull_request:
38-
paths-ignore:
39-
- "benchmarks/**"
40-
- "doc/**"
41-
- "docs/**"
42-
- "**.md"
43-
- "native/core/benches/**"
44-
- "native/spark-expr/benches/**"
45-
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
46-
- "spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
53+
paths:
54+
- "native/**"
55+
- "common/**"
56+
- "spark/**"
57+
- "spark-integration/**"
58+
- "pom.xml"
59+
- "**/pom.xml"
60+
- ".mvn/**"
61+
- "mvnw"
62+
- "Makefile"
63+
- "rust-toolchain.toml"
64+
- "dev/ci/**"
65+
- ".github/workflows/pr_build_linux.yml"
66+
- ".github/actions/setup-builder/**"
67+
- ".github/actions/java-test/**"
68+
- ".github/actions/rust-test/**"
69+
- "!**.md"
70+
- "!native/core/benches/**"
71+
- "!native/spark-expr/benches/**"
72+
- "!spark/src/test/scala/org/apache/spark/sql/benchmark/**"
73+
- "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
4774
# manual trigger
4875
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
4976
workflow_dispatch:
@@ -295,14 +322,28 @@ jobs:
295322
- name: "Spark 4.2, JDK 17"
296323
java_version: "17"
297324
maven_opts: "-Pspark-4.2"
325+
# Suites are grouped by functional area into balanced buckets so that no test
326+
# job runs much longer than ~23 min. See
327+
# docs/superpowers/specs/2026-05-22-pr-build-consolidation-design.md for the
328+
# per-suite timing analysis behind this grouping.
298329
suite:
299-
- name: "fuzz"
330+
- name: "scans"
300331
value: |
332+
org.apache.comet.parquet.CometParquetWriterSuite
333+
org.apache.comet.parquet.ParquetReadV1Suite
334+
org.apache.comet.parquet.ParquetReadV2Suite
335+
org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite
336+
org.apache.comet.parquet.ParquetTimestampLtzAsNtzSuite
337+
org.apache.spark.sql.comet.ParquetDatetimeRebaseV1Suite
338+
org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
339+
org.apache.spark.sql.comet.ParquetEncryptionITCase
340+
org.apache.comet.exec.CometNativeReaderSuite
341+
org.apache.comet.CometIcebergNativeSuite
342+
org.apache.comet.CometIcebergRewriteActionSuite
343+
org.apache.comet.iceberg.IcebergReflectionSuite
344+
org.apache.comet.csv.CometCsvNativeReadSuite
301345
org.apache.comet.CometFuzzTestSuite
302-
org.apache.comet.CometFuzzAggregateSuite
303346
org.apache.comet.CometFuzzIcebergSuite
304-
org.apache.comet.CometFuzzMathSuite
305-
org.apache.comet.CometCodegenFuzzSuite
306347
org.apache.comet.DataGeneratorSuite
307348
- name: "shuffle"
308349
value: |
@@ -316,23 +357,6 @@ jobs:
316357
org.apache.comet.exec.DisableAQECometShuffleSuite
317358
org.apache.comet.exec.DisableAQECometAsyncShuffleSuite
318359
org.apache.spark.shuffle.sort.SpillSorterSuite
319-
- name: "parquet"
320-
value: |
321-
org.apache.comet.parquet.CometParquetWriterSuite
322-
org.apache.comet.parquet.ParquetReadV1Suite
323-
org.apache.comet.parquet.ParquetReadV2Suite
324-
org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite
325-
org.apache.comet.parquet.ParquetTimestampLtzAsNtzSuite
326-
org.apache.spark.sql.comet.ParquetDatetimeRebaseV1Suite
327-
org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
328-
org.apache.spark.sql.comet.ParquetEncryptionITCase
329-
org.apache.comet.exec.CometNativeReaderSuite
330-
org.apache.comet.CometIcebergNativeSuite
331-
org.apache.comet.CometIcebergRewriteActionSuite
332-
org.apache.comet.iceberg.IcebergReflectionSuite
333-
- name: "csv"
334-
value: |
335-
org.apache.comet.csv.CometCsvNativeReadSuite
336360
- name: "exec"
337361
value: |
338362
org.apache.comet.exec.CometAggregateSuite
@@ -360,6 +384,9 @@ jobs:
360384
org.apache.spark.sql.comet.CometShuffleFallbackStickinessSuite
361385
org.apache.spark.sql.comet.CometDecimalArithmeticViewSuite
362386
org.apache.comet.objectstore.NativeConfigSuite
387+
org.apache.spark.sql.CometToPrettyStringSuite
388+
org.apache.spark.sql.CometCollationSuite
389+
org.apache.comet.CometFuzzAggregateSuite
363390
- name: "expressions"
364391
value: |
365392
org.apache.comet.CometExpressionSuite
@@ -376,18 +403,15 @@ jobs:
376403
org.apache.comet.CometMapExpressionSuite
377404
org.apache.comet.CometCsvExpressionSuite
378405
org.apache.comet.CometJsonExpressionSuite
379-
org.apache.comet.CometDateTimeUtilsSuite
380406
org.apache.comet.SparkErrorConverterSuite
381407
org.apache.comet.expressions.conditional.CometIfSuite
382408
org.apache.comet.expressions.conditional.CometCoalesceSuite
383409
org.apache.comet.expressions.conditional.CometCaseWhenSuite
384410
org.apache.comet.CometCodegenSuite
385411
org.apache.comet.CometCodegenSourceSuite
386412
org.apache.comet.CometCodegenHOFSuite
387-
- name: "sql"
388-
value: |
389-
org.apache.spark.sql.CometToPrettyStringSuite
390-
org.apache.spark.sql.CometCollationSuite
413+
org.apache.comet.CometFuzzMathSuite
414+
org.apache.comet.CometCodegenFuzzSuite
391415
fail-fast: false
392416
name: ${{ matrix.profile.name }} [${{ matrix.suite.name }}]
393417
runs-on: ubuntu-24.04
@@ -427,7 +451,7 @@ jobs:
427451
uses: ./.github/actions/java-test
428452
with:
429453
artifact_name: ${{ matrix.profile.name }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
430-
suites: ${{ matrix.suite.name == 'sql' && matrix.profile.name == 'Spark 3.4, JDK 11, Scala 2.12' && '' || matrix.suite.value }}
454+
suites: ${{ matrix.suite.value }}
431455
maven_opts: ${{ matrix.profile.maven_opts }}
432456
upload-test-reports: true
433457
skip-native-build: true
@@ -486,19 +510,16 @@ jobs:
486510
run: |
487511
SPARK_HOME=`pwd` SPARK_TPCH_DATA=`pwd`/tpch/sf1_parquet ./mvnw -B -Prelease -Dsuites=org.apache.spark.sql.CometTPCHQuerySuite test
488512
489-
# TPC-DS correctness tests - verifies benchmark queries produce correct results
513+
# TPC-DS correctness tests - verifies benchmark queries produce correct results.
514+
# The three join strategies run sequentially in one job so the project is built once.
490515
verify-benchmark-results-tpcds:
491516
needs: build-native
492-
name: Verify TPC-DS Results (${{ matrix.join }})
517+
name: Verify TPC-DS Results
493518
runs-on: ubuntu-24.04
494519
container:
495520
image: amd64/rust
496521
env:
497522
JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED
498-
strategy:
499-
matrix:
500-
join: [sort_merge, broadcast, hash]
501-
fail-fast: false
502523
steps:
503524
- uses: actions/checkout@v6
504525

@@ -555,7 +576,6 @@ jobs:
555576
cd spark && MAVEN_OPTS='-Xmx20g' ../mvnw -B -Prelease exec:java -Dexec.mainClass="org.apache.spark.sql.GenTPCDSData" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="--dsdgenDir `pwd`/../tpcds-kit/tools --location `pwd`/../tpcds-sf-1 --scaleFactor 1 --numPartitions 1"
556577
557578
- name: Run TPC-DS queries (Sort merge join)
558-
if: matrix.join == 'sort_merge'
559579
run: |
560580
SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B -Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
561581
env:
@@ -564,15 +584,13 @@ jobs:
564584
spark.sql.join.preferSortMergeJoin=true
565585
566586
- name: Run TPC-DS queries (Broadcast hash join)
567-
if: matrix.join == 'broadcast'
568587
run: |
569588
SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B -Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
570589
env:
571590
SPARK_TPCDS_JOIN_CONF: |
572591
spark.sql.autoBroadcastJoinThreshold=10485760
573592
574593
- name: Run TPC-DS queries (Shuffled hash join)
575-
if: matrix.join == 'hash'
576594
run: |
577595
SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B -Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
578596
env:

.github/workflows/pr_build_macos.yml

Lines changed: 69 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -22,28 +22,53 @@ concurrency:
2222
cancel-in-progress: true
2323

2424
on:
25+
# Allow-list of paths that affect this workflow. A change must match a positive
26+
# pattern (and not a trailing "!" exclusion) for the build to run. Editing
27+
# pr_build_linux.yml does not trigger this workflow, and vice versa.
2528
push:
2629
branches:
2730
- main
28-
paths-ignore:
29-
- "benchmarks/**"
30-
- "doc/**"
31-
- "docs/**"
32-
- "**.md"
33-
- "native/core/benches/**"
34-
- "native/spark-expr/benches/**"
35-
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
36-
- "spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
31+
paths:
32+
- "native/**"
33+
- "common/**"
34+
- "spark/**"
35+
- "spark-integration/**"
36+
- "pom.xml"
37+
- "**/pom.xml"
38+
- ".mvn/**"
39+
- "mvnw"
40+
- "Makefile"
41+
- "rust-toolchain.toml"
42+
- "dev/ci/**"
43+
- ".github/workflows/pr_build_macos.yml"
44+
- ".github/actions/setup-macos-builder/**"
45+
- ".github/actions/java-test/**"
46+
- "!**.md"
47+
- "!native/core/benches/**"
48+
- "!native/spark-expr/benches/**"
49+
- "!spark/src/test/scala/org/apache/spark/sql/benchmark/**"
50+
- "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
3751
pull_request:
38-
paths-ignore:
39-
- "benchmarks/**"
40-
- "doc/**"
41-
- "docs/**"
42-
- "**.md"
43-
- "native/core/benches/**"
44-
- "native/spark-expr/benches/**"
45-
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
46-
- "spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
52+
paths:
53+
- "native/**"
54+
- "common/**"
55+
- "spark/**"
56+
- "spark-integration/**"
57+
- "pom.xml"
58+
- "**/pom.xml"
59+
- ".mvn/**"
60+
- "mvnw"
61+
- "Makefile"
62+
- "rust-toolchain.toml"
63+
- "dev/ci/**"
64+
- ".github/workflows/pr_build_macos.yml"
65+
- ".github/actions/setup-macos-builder/**"
66+
- ".github/actions/java-test/**"
67+
- "!**.md"
68+
- "!native/core/benches/**"
69+
- "!native/spark-expr/benches/**"
70+
- "!spark/src/test/scala/org/apache/spark/sql/benchmark/**"
71+
- "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
4772
# manual trigger
4873
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
4974
workflow_dispatch:
@@ -136,14 +161,29 @@ jobs:
136161
java_version: "17"
137162
maven_opts: "-Pspark-4.0 -Pscala-2.13"
138163

164+
# Suites are grouped by functional area into balanced buckets so that no test
165+
# job runs much longer than ~23 min. See
166+
# docs/superpowers/specs/2026-05-22-pr-build-consolidation-design.md for the
167+
# per-suite timing analysis behind this grouping. Keep this list in sync with
168+
# pr_build_linux.yml; dev/ci/check-suites.py requires every suite in both files.
139169
suite:
140-
- name: "fuzz"
170+
- name: "scans"
141171
value: |
172+
org.apache.comet.parquet.CometParquetWriterSuite
173+
org.apache.comet.parquet.ParquetReadV1Suite
174+
org.apache.comet.parquet.ParquetReadV2Suite
175+
org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite
176+
org.apache.comet.parquet.ParquetTimestampLtzAsNtzSuite
177+
org.apache.spark.sql.comet.ParquetDatetimeRebaseV1Suite
178+
org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
179+
org.apache.spark.sql.comet.ParquetEncryptionITCase
180+
org.apache.comet.exec.CometNativeReaderSuite
181+
org.apache.comet.CometIcebergNativeSuite
182+
org.apache.comet.CometIcebergRewriteActionSuite
183+
org.apache.comet.iceberg.IcebergReflectionSuite
184+
org.apache.comet.csv.CometCsvNativeReadSuite
142185
org.apache.comet.CometFuzzTestSuite
143-
org.apache.comet.CometFuzzAggregateSuite
144186
org.apache.comet.CometFuzzIcebergSuite
145-
org.apache.comet.CometFuzzMathSuite
146-
org.apache.comet.CometCodegenFuzzSuite
147187
org.apache.comet.DataGeneratorSuite
148188
- name: "shuffle"
149189
value: |
@@ -157,23 +197,6 @@ jobs:
157197
org.apache.comet.exec.DisableAQECometShuffleSuite
158198
org.apache.comet.exec.DisableAQECometAsyncShuffleSuite
159199
org.apache.spark.shuffle.sort.SpillSorterSuite
160-
- name: "parquet"
161-
value: |
162-
org.apache.comet.parquet.CometParquetWriterSuite
163-
org.apache.comet.parquet.ParquetReadV1Suite
164-
org.apache.comet.parquet.ParquetReadV2Suite
165-
org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite
166-
org.apache.comet.parquet.ParquetTimestampLtzAsNtzSuite
167-
org.apache.spark.sql.comet.ParquetDatetimeRebaseV1Suite
168-
org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
169-
org.apache.spark.sql.comet.ParquetEncryptionITCase
170-
org.apache.comet.exec.CometNativeReaderSuite
171-
org.apache.comet.CometIcebergNativeSuite
172-
org.apache.comet.CometIcebergRewriteActionSuite
173-
org.apache.comet.iceberg.IcebergReflectionSuite
174-
- name: "csv"
175-
value: |
176-
org.apache.comet.csv.CometCsvNativeReadSuite
177200
- name: "exec"
178201
value: |
179202
org.apache.comet.exec.CometAggregateSuite
@@ -201,6 +224,9 @@ jobs:
201224
org.apache.spark.sql.comet.CometShuffleFallbackStickinessSuite
202225
org.apache.spark.sql.comet.CometDecimalArithmeticViewSuite
203226
org.apache.comet.objectstore.NativeConfigSuite
227+
org.apache.spark.sql.CometToPrettyStringSuite
228+
org.apache.spark.sql.CometCollationSuite
229+
org.apache.comet.CometFuzzAggregateSuite
204230
- name: "expressions"
205231
value: |
206232
org.apache.comet.CometExpressionSuite
@@ -210,24 +236,22 @@ jobs:
210236
org.apache.comet.CometTemporalExpressionSuite
211237
org.apache.comet.CometArrayExpressionSuite
212238
org.apache.comet.CometCastSuite
239+
org.apache.comet.CometDateTimeUtilsSuite
213240
org.apache.comet.CometMathExpressionSuite
214241
org.apache.comet.CometStringExpressionSuite
215242
org.apache.comet.CometBitwiseExpressionSuite
216243
org.apache.comet.CometMapExpressionSuite
217-
org.apache.comet.CometJsonExpressionSuite
218244
org.apache.comet.CometCsvExpressionSuite
219-
org.apache.comet.CometDateTimeUtilsSuite
245+
org.apache.comet.CometJsonExpressionSuite
220246
org.apache.comet.SparkErrorConverterSuite
221247
org.apache.comet.expressions.conditional.CometIfSuite
222248
org.apache.comet.expressions.conditional.CometCoalesceSuite
223249
org.apache.comet.expressions.conditional.CometCaseWhenSuite
224250
org.apache.comet.CometCodegenSuite
225251
org.apache.comet.CometCodegenSourceSuite
226252
org.apache.comet.CometCodegenHOFSuite
227-
- name: "sql"
228-
value: |
229-
org.apache.spark.sql.CometToPrettyStringSuite
230-
org.apache.spark.sql.CometCollationSuite
253+
org.apache.comet.CometFuzzMathSuite
254+
org.apache.comet.CometCodegenFuzzSuite
231255
232256
fail-fast: false
233257
name: ${{ matrix.os }}/${{ matrix.profile.name }} [${{ matrix.suite.name }}]

0 commit comments

Comments
 (0)