Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 72 additions & 54 deletions .github/workflows/pr_build_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,55 @@ concurrency:
cancel-in-progress: true

on:
# Allow-list of paths that affect this workflow. A change must match a positive
# pattern (and not a trailing "!" exclusion) for the build to run. Editing
# pr_build_macos.yml does not trigger this workflow, and vice versa.
push:
branches:
- main
paths-ignore:
- "benchmarks/**"
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- "spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
paths:
- "native/**"
- "common/**"
- "spark/**"
- "spark-integration/**"
- "pom.xml"
- "**/pom.xml"
- ".mvn/**"
- "mvnw"
- "Makefile"
- "rust-toolchain.toml"
- "dev/ci/**"
- ".github/workflows/pr_build_linux.yml"
- ".github/actions/setup-builder/**"
- ".github/actions/java-test/**"
- ".github/actions/rust-test/**"
- "!**.md"
- "!native/core/benches/**"
- "!native/spark-expr/benches/**"
- "!spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
pull_request:
paths-ignore:
- "benchmarks/**"
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- "spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
paths:
- "native/**"
- "common/**"
- "spark/**"
- "spark-integration/**"
- "pom.xml"
- "**/pom.xml"
- ".mvn/**"
- "mvnw"
- "Makefile"
- "rust-toolchain.toml"
- "dev/ci/**"
- ".github/workflows/pr_build_linux.yml"
- ".github/actions/setup-builder/**"
- ".github/actions/java-test/**"
- ".github/actions/rust-test/**"
- "!**.md"
- "!native/core/benches/**"
- "!native/spark-expr/benches/**"
- "!spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:
Expand Down Expand Up @@ -295,14 +322,28 @@ jobs:
- name: "Spark 4.2, JDK 17"
java_version: "17"
maven_opts: "-Pspark-4.2"
# Suites are grouped by functional area into balanced buckets so that no test
# job runs much longer than ~23 min. See
# docs/superpowers/specs/2026-05-22-pr-build-consolidation-design.md for the
# per-suite timing analysis behind this grouping.
suite:
- name: "fuzz"
- name: "scans"
value: |
org.apache.comet.parquet.CometParquetWriterSuite
org.apache.comet.parquet.ParquetReadV1Suite
org.apache.comet.parquet.ParquetReadV2Suite
org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite
org.apache.comet.parquet.ParquetTimestampLtzAsNtzSuite
org.apache.spark.sql.comet.ParquetDatetimeRebaseV1Suite
org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
org.apache.spark.sql.comet.ParquetEncryptionITCase
org.apache.comet.exec.CometNativeReaderSuite
org.apache.comet.CometIcebergNativeSuite
org.apache.comet.CometIcebergRewriteActionSuite
org.apache.comet.iceberg.IcebergReflectionSuite
org.apache.comet.csv.CometCsvNativeReadSuite
org.apache.comet.CometFuzzTestSuite
org.apache.comet.CometFuzzAggregateSuite
org.apache.comet.CometFuzzIcebergSuite
org.apache.comet.CometFuzzMathSuite
org.apache.comet.CometCodegenFuzzSuite
org.apache.comet.DataGeneratorSuite
- name: "shuffle"
value: |
Expand All @@ -316,23 +357,6 @@ jobs:
org.apache.comet.exec.DisableAQECometShuffleSuite
org.apache.comet.exec.DisableAQECometAsyncShuffleSuite
org.apache.spark.shuffle.sort.SpillSorterSuite
- name: "parquet"
value: |
org.apache.comet.parquet.CometParquetWriterSuite
org.apache.comet.parquet.ParquetReadV1Suite
org.apache.comet.parquet.ParquetReadV2Suite
org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite
org.apache.comet.parquet.ParquetTimestampLtzAsNtzSuite
org.apache.spark.sql.comet.ParquetDatetimeRebaseV1Suite
org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
org.apache.spark.sql.comet.ParquetEncryptionITCase
org.apache.comet.exec.CometNativeReaderSuite
org.apache.comet.CometIcebergNativeSuite
org.apache.comet.CometIcebergRewriteActionSuite
org.apache.comet.iceberg.IcebergReflectionSuite
- name: "csv"
value: |
org.apache.comet.csv.CometCsvNativeReadSuite
- name: "exec"
value: |
org.apache.comet.exec.CometAggregateSuite
Expand Down Expand Up @@ -360,6 +384,9 @@ jobs:
org.apache.spark.sql.comet.CometShuffleFallbackStickinessSuite
org.apache.spark.sql.comet.CometDecimalArithmeticViewSuite
org.apache.comet.objectstore.NativeConfigSuite
org.apache.spark.sql.CometToPrettyStringSuite
org.apache.spark.sql.CometCollationSuite
org.apache.comet.CometFuzzAggregateSuite
- name: "expressions"
value: |
org.apache.comet.CometExpressionSuite
Expand All @@ -376,18 +403,15 @@ jobs:
org.apache.comet.CometMapExpressionSuite
org.apache.comet.CometCsvExpressionSuite
org.apache.comet.CometJsonExpressionSuite
org.apache.comet.CometDateTimeUtilsSuite
org.apache.comet.SparkErrorConverterSuite
org.apache.comet.expressions.conditional.CometIfSuite
org.apache.comet.expressions.conditional.CometCoalesceSuite
org.apache.comet.expressions.conditional.CometCaseWhenSuite
org.apache.comet.CometCodegenSuite
org.apache.comet.CometCodegenSourceSuite
org.apache.comet.CometCodegenHOFSuite
- name: "sql"
value: |
org.apache.spark.sql.CometToPrettyStringSuite
org.apache.spark.sql.CometCollationSuite
org.apache.comet.CometFuzzMathSuite
org.apache.comet.CometCodegenFuzzSuite
fail-fast: false
name: ${{ matrix.profile.name }} [${{ matrix.suite.name }}]
runs-on: ubuntu-24.04
Expand Down Expand Up @@ -427,7 +451,7 @@ jobs:
uses: ./.github/actions/java-test
with:
artifact_name: ${{ matrix.profile.name }}-${{ matrix.suite.name }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}
suites: ${{ matrix.suite.name == 'sql' && matrix.profile.name == 'Spark 3.4, JDK 11, Scala 2.12' && '' || matrix.suite.value }}
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the dead Spark 3.4 + sql special case, whose && / || expression always evaluated to matrix.suite.value

suites: ${{ matrix.suite.value }}
maven_opts: ${{ matrix.profile.maven_opts }}
upload-test-reports: true
skip-native-build: true
Expand Down Expand Up @@ -486,19 +510,16 @@ jobs:
run: |
SPARK_HOME=`pwd` SPARK_TPCH_DATA=`pwd`/tpch/sf1_parquet ./mvnw -B -Prelease -Dsuites=org.apache.spark.sql.CometTPCHQuerySuite test

# TPC-DS correctness tests - verifies benchmark queries produce correct results
# TPC-DS correctness tests - verifies benchmark queries produce correct results.
# The three join strategies run sequentially in one job so the project is built once.
verify-benchmark-results-tpcds:
needs: build-native
name: Verify TPC-DS Results (${{ matrix.join }})
name: Verify TPC-DS Results
runs-on: ubuntu-24.04
container:
image: amd64/rust
env:
JAVA_TOOL_OPTIONS: --add-exports=java.base/sun.nio.ch=ALL-UNNAMED --add-exports=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED
strategy:
matrix:
join: [sort_merge, broadcast, hash]
fail-fast: false
steps:
- uses: actions/checkout@v6

Expand Down Expand Up @@ -555,7 +576,6 @@ jobs:
cd spark && MAVEN_OPTS='-Xmx20g' ../mvnw -B -Prelease exec:java -Dexec.mainClass="org.apache.spark.sql.GenTPCDSData" -Dexec.classpathScope="test" -Dexec.cleanupDaemonThreads="false" -Dexec.args="--dsdgenDir `pwd`/../tpcds-kit/tools --location `pwd`/../tpcds-sf-1 --scaleFactor 1 --numPartitions 1"

- name: Run TPC-DS queries (Sort merge join)
if: matrix.join == 'sort_merge'
run: |
SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B -Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
env:
Expand All @@ -564,15 +584,13 @@ jobs:
spark.sql.join.preferSortMergeJoin=true

- name: Run TPC-DS queries (Broadcast hash join)
if: matrix.join == 'broadcast'
run: |
SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B -Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
env:
SPARK_TPCDS_JOIN_CONF: |
spark.sql.autoBroadcastJoinThreshold=10485760

- name: Run TPC-DS queries (Shuffled hash join)
if: matrix.join == 'hash'
run: |
SPARK_HOME=`pwd` SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 ./mvnw -B -Prelease -Dsuites=org.apache.spark.sql.CometTPCDSQuerySuite test
env:
Expand Down
114 changes: 69 additions & 45 deletions .github/workflows/pr_build_macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,53 @@ concurrency:
cancel-in-progress: true

on:
# Allow-list of paths that affect this workflow. A change must match a positive
# pattern (and not a trailing "!" exclusion) for the build to run. Editing
# pr_build_linux.yml does not trigger this workflow, and vice versa.
push:
branches:
- main
paths-ignore:
- "benchmarks/**"
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- "spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
paths:
- "native/**"
- "common/**"
- "spark/**"
- "spark-integration/**"
- "pom.xml"
- "**/pom.xml"
- ".mvn/**"
- "mvnw"
- "Makefile"
- "rust-toolchain.toml"
- "dev/ci/**"
- ".github/workflows/pr_build_macos.yml"
- ".github/actions/setup-macos-builder/**"
- ".github/actions/java-test/**"
- "!**.md"
- "!native/core/benches/**"
- "!native/spark-expr/benches/**"
- "!spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
pull_request:
paths-ignore:
- "benchmarks/**"
- "doc/**"
- "docs/**"
- "**.md"
- "native/core/benches/**"
- "native/spark-expr/benches/**"
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- "spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
paths:
- "native/**"
- "common/**"
- "spark/**"
- "spark-integration/**"
- "pom.xml"
- "**/pom.xml"
- ".mvn/**"
- "mvnw"
- "Makefile"
- "rust-toolchain.toml"
- "dev/ci/**"
- ".github/workflows/pr_build_macos.yml"
- ".github/actions/setup-macos-builder/**"
- ".github/actions/java-test/**"
- "!**.md"
- "!native/core/benches/**"
- "!native/spark-expr/benches/**"
- "!spark/src/test/scala/org/apache/spark/sql/benchmark/**"
- "!spark/src/main/scala/org/apache/comet/GenerateDocs.scala"
# manual trigger
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
workflow_dispatch:
Expand Down Expand Up @@ -136,14 +161,29 @@ jobs:
java_version: "17"
maven_opts: "-Pspark-4.0 -Pscala-2.13"

# Suites are grouped by functional area into balanced buckets so that no test
# job runs much longer than ~23 min. See
# docs/superpowers/specs/2026-05-22-pr-build-consolidation-design.md for the
# per-suite timing analysis behind this grouping. Keep this list in sync with
# pr_build_linux.yml; dev/ci/check-suites.py requires every suite in both files.
suite:
- name: "fuzz"
- name: "scans"
value: |
org.apache.comet.parquet.CometParquetWriterSuite
org.apache.comet.parquet.ParquetReadV1Suite
org.apache.comet.parquet.ParquetReadV2Suite
org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite
org.apache.comet.parquet.ParquetTimestampLtzAsNtzSuite
org.apache.spark.sql.comet.ParquetDatetimeRebaseV1Suite
org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
org.apache.spark.sql.comet.ParquetEncryptionITCase
org.apache.comet.exec.CometNativeReaderSuite
org.apache.comet.CometIcebergNativeSuite
org.apache.comet.CometIcebergRewriteActionSuite
org.apache.comet.iceberg.IcebergReflectionSuite
org.apache.comet.csv.CometCsvNativeReadSuite
org.apache.comet.CometFuzzTestSuite
org.apache.comet.CometFuzzAggregateSuite
org.apache.comet.CometFuzzIcebergSuite
org.apache.comet.CometFuzzMathSuite
org.apache.comet.CometCodegenFuzzSuite
org.apache.comet.DataGeneratorSuite
- name: "shuffle"
value: |
Expand All @@ -157,23 +197,6 @@ jobs:
org.apache.comet.exec.DisableAQECometShuffleSuite
org.apache.comet.exec.DisableAQECometAsyncShuffleSuite
org.apache.spark.shuffle.sort.SpillSorterSuite
- name: "parquet"
value: |
org.apache.comet.parquet.CometParquetWriterSuite
org.apache.comet.parquet.ParquetReadV1Suite
org.apache.comet.parquet.ParquetReadV2Suite
org.apache.comet.parquet.ParquetReadFromFakeHadoopFsSuite
org.apache.comet.parquet.ParquetTimestampLtzAsNtzSuite
org.apache.spark.sql.comet.ParquetDatetimeRebaseV1Suite
org.apache.spark.sql.comet.ParquetDatetimeRebaseV2Suite
org.apache.spark.sql.comet.ParquetEncryptionITCase
org.apache.comet.exec.CometNativeReaderSuite
org.apache.comet.CometIcebergNativeSuite
org.apache.comet.CometIcebergRewriteActionSuite
org.apache.comet.iceberg.IcebergReflectionSuite
- name: "csv"
value: |
org.apache.comet.csv.CometCsvNativeReadSuite
- name: "exec"
value: |
org.apache.comet.exec.CometAggregateSuite
Expand Down Expand Up @@ -201,6 +224,9 @@ jobs:
org.apache.spark.sql.comet.CometShuffleFallbackStickinessSuite
org.apache.spark.sql.comet.CometDecimalArithmeticViewSuite
org.apache.comet.objectstore.NativeConfigSuite
org.apache.spark.sql.CometToPrettyStringSuite
org.apache.spark.sql.CometCollationSuite
org.apache.comet.CometFuzzAggregateSuite
- name: "expressions"
value: |
org.apache.comet.CometExpressionSuite
Expand All @@ -210,24 +236,22 @@ jobs:
org.apache.comet.CometTemporalExpressionSuite
org.apache.comet.CometArrayExpressionSuite
org.apache.comet.CometCastSuite
org.apache.comet.CometDateTimeUtilsSuite
org.apache.comet.CometMathExpressionSuite
org.apache.comet.CometStringExpressionSuite
org.apache.comet.CometBitwiseExpressionSuite
org.apache.comet.CometMapExpressionSuite
org.apache.comet.CometJsonExpressionSuite
org.apache.comet.CometCsvExpressionSuite
org.apache.comet.CometDateTimeUtilsSuite
org.apache.comet.CometJsonExpressionSuite
org.apache.comet.SparkErrorConverterSuite
org.apache.comet.expressions.conditional.CometIfSuite
org.apache.comet.expressions.conditional.CometCoalesceSuite
org.apache.comet.expressions.conditional.CometCaseWhenSuite
org.apache.comet.CometCodegenSuite
org.apache.comet.CometCodegenSourceSuite
org.apache.comet.CometCodegenHOFSuite
- name: "sql"
value: |
org.apache.spark.sql.CometToPrettyStringSuite
org.apache.spark.sql.CometCollationSuite
org.apache.comet.CometFuzzMathSuite
org.apache.comet.CometCodegenFuzzSuite

fail-fast: false
name: ${{ matrix.os }}/${{ matrix.profile.name }} [${{ matrix.suite.name }}]
Expand Down
Loading