Skip to content

Commit 6cd6cf3

Browse files
authored
ci: Enable Comet PR test matrix and TPCDS plan-stability for Spark 4.2 (#4126)
1 parent 4d01ce1 commit 6cd6cf3

278 files changed

Lines changed: 27593 additions & 15 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/pr_build_linux.yml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,10 @@ jobs:
9797
- name: "Spark 4.0, JDK 21"
9898
java_version: "21"
9999
maven_opts: "-Pspark-4.0"
100-
# Spark 4.1 is intentionally absent: the lint job invokes -Psemanticdb,
101-
# but semanticdb-scalac_2.13.17 is not yet published, so we cannot
102-
# currently run scalafix against the spark-4.1 profile.
100+
# Spark 4.1 and 4.2 are intentionally absent: the lint job invokes -Psemanticdb,
101+
# but semanticdb-scalac for those Scala patch versions (2.13.17 / 2.13.18) is not
102+
# yet published, so we cannot currently run scalafix against the spark-4.1 or
103+
# spark-4.2 profiles.
103104
fail-fast: false
104105
steps:
105106
- uses: runs-on/action@742bf56072eb4845a0f94b3394673e4903c90ff0 # v2.1.0
@@ -305,6 +306,11 @@ jobs:
305306
java_version: "17"
306307
maven_opts: "-Pspark-4.1"
307308
scan_impl: "auto"
309+
310+
- name: "Spark 4.2, JDK 17"
311+
java_version: "17"
312+
maven_opts: "-Pspark-4.2"
313+
scan_impl: "auto"
308314
suite:
309315
- name: "fuzz"
310316
value: |

.github/workflows/pr_build_macos.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,12 @@ jobs:
144144
# runtime; the scala-2.13 profile would override it back to 2.13.16 and break.
145145
maven_opts: "-Pspark-4.1"
146146

147+
- name: "Spark 4.2, JDK 17, Scala 2.13"
148+
java_version: "17"
149+
# The spark-4.2 profile pins Scala to 2.13.18 to match Spark 4.2.0-preview4's
150+
# runtime; the scala-2.13 profile would override it back to 2.13.16 and break.
151+
maven_opts: "-Pspark-4.2"
152+
147153
suite:
148154
- name: "fuzz"
149155
value: |

spark/pom.xml

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,7 +300,28 @@ under the License.
300300
</profile>
301301
<profile>
302302
<id>spark-4.2</id>
303-
<!-- 4.2 preview profile is build-only; no Iceberg or Jetty test dependencies are wired up. -->
303+
<dependencies>
304+
<!-- iceberg-spark-runtime-4.2 is not yet published; reuse the 4.0 runtime -->
305+
<dependency>
306+
<groupId>org.apache.iceberg</groupId>
307+
<artifactId>iceberg-spark-runtime-4.0_${scala.binary.version}</artifactId>
308+
<version>1.10.0</version>
309+
<scope>test</scope>
310+
</dependency>
311+
<!-- Jetty 11.x for Spark 4.2 (jakarta.servlet); matches Spark 4.2.0-preview4's jetty.version -->
312+
<dependency>
313+
<groupId>org.eclipse.jetty</groupId>
314+
<artifactId>jetty-server</artifactId>
315+
<version>11.0.26</version>
316+
<scope>test</scope>
317+
</dependency>
318+
<dependency>
319+
<groupId>org.eclipse.jetty</groupId>
320+
<artifactId>jetty-servlet</artifactId>
321+
<version>11.0.26</version>
322+
<scope>test</scope>
323+
</dependency>
324+
</dependencies>
304325
</profile>
305326
<profile>
306327
<id>generate-docs</id>

spark/src/main/scala/org/apache/comet/CometSparkSessionExtensions.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,10 @@ object CometSparkSessionExtensions extends Logging {
169169
org.apache.spark.SPARK_VERSION >= "4.1"
170170
}
171171

172+
def isSpark42Plus: Boolean = {
173+
org.apache.spark.SPARK_VERSION >= "4.2"
174+
}
175+
172176
/**
173177
* Whether we should override Spark memory configuration for Comet. This only returns true when
174178
* Comet native execution is enabled and/or Comet shuffle is enabled and Comet doesn't use
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
CometNativeColumnarToRow
2+
+- CometTakeOrderedAndProject
3+
+- CometProject
4+
+- CometBroadcastHashJoin
5+
:- CometProject
6+
: +- CometBroadcastHashJoin
7+
: :- CometProject
8+
: : +- CometBroadcastHashJoin
9+
: : :- CometFilter
10+
: : : +- CometHashAggregate
11+
: : : +- CometExchange
12+
: : : +- CometHashAggregate
13+
: : : +- CometProject
14+
: : : +- CometBroadcastHashJoin
15+
: : : :- CometFilter
16+
: : : : +- CometNativeScan parquet spark_catalog.default.store_returns
17+
: : : : +- CometSubqueryBroadcast
18+
: : : : +- CometBroadcastExchange
19+
: : : : +- CometProject
20+
: : : : +- CometFilter
21+
: : : : +- CometNativeScan parquet spark_catalog.default.date_dim
22+
: : : +- CometBroadcastExchange
23+
: : : +- CometProject
24+
: : : +- CometFilter
25+
: : : +- CometNativeScan parquet spark_catalog.default.date_dim
26+
: : +- CometBroadcastExchange
27+
: : +- CometFilter
28+
: : +- CometHashAggregate
29+
: : +- CometExchange
30+
: : +- CometHashAggregate
31+
: : +- CometHashAggregate
32+
: : +- CometExchange
33+
: : +- CometHashAggregate
34+
: : +- CometProject
35+
: : +- CometBroadcastHashJoin
36+
: : :- CometFilter
37+
: : : +- CometNativeScan parquet spark_catalog.default.store_returns
38+
: : : +- ReusedSubquery
39+
: : +- CometBroadcastExchange
40+
: : +- CometProject
41+
: : +- CometFilter
42+
: : +- CometNativeScan parquet spark_catalog.default.date_dim
43+
: +- CometBroadcastExchange
44+
: +- CometProject
45+
: +- CometFilter
46+
: +- CometNativeScan parquet spark_catalog.default.store
47+
+- CometBroadcastExchange
48+
+- CometProject
49+
+- CometFilter
50+
+- CometNativeScan parquet spark_catalog.default.customer
51+
52+
Comet accelerated 47 out of 49 eligible operators (95%). Final plan contains 1 transitions between Spark and Comet.
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
CometNativeColumnarToRow
2+
+- CometTakeOrderedAndProject
3+
+- CometProject
4+
+- CometBroadcastHashJoin
5+
:- CometProject
6+
: +- CometBroadcastHashJoin
7+
: :- CometProject
8+
: : +- CometBroadcastHashJoin
9+
: : :- CometFilter
10+
: : : +- CometHashAggregate
11+
: : : +- CometExchange
12+
: : : +- CometHashAggregate
13+
: : : +- CometProject
14+
: : : +- CometBroadcastHashJoin
15+
: : : :- CometFilter
16+
: : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.store_returns
17+
: : : : +- SubqueryBroadcast
18+
: : : : +- BroadcastExchange
19+
: : : : +- CometNativeColumnarToRow
20+
: : : : +- CometProject
21+
: : : : +- CometFilter
22+
: : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
23+
: : : +- CometBroadcastExchange
24+
: : : +- CometProject
25+
: : : +- CometFilter
26+
: : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
27+
: : +- CometBroadcastExchange
28+
: : +- CometFilter
29+
: : +- CometHashAggregate
30+
: : +- CometExchange
31+
: : +- CometHashAggregate
32+
: : +- CometHashAggregate
33+
: : +- CometExchange
34+
: : +- CometHashAggregate
35+
: : +- CometProject
36+
: : +- CometBroadcastHashJoin
37+
: : :- CometFilter
38+
: : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.store_returns
39+
: : : +- ReusedSubquery
40+
: : +- CometBroadcastExchange
41+
: : +- CometProject
42+
: : +- CometFilter
43+
: : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
44+
: +- CometBroadcastExchange
45+
: +- CometProject
46+
: +- CometFilter
47+
: +- CometScan [native_iceberg_compat] parquet spark_catalog.default.store
48+
+- CometBroadcastExchange
49+
+- CometProject
50+
+- CometFilter
51+
+- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer
52+
53+
Comet accelerated 46 out of 49 eligible operators (93%). Final plan contains 2 transitions between Spark and Comet.
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
TakeOrderedAndProject
2+
+- HashAggregate
3+
+- Exchange
4+
+- HashAggregate
5+
+- Project
6+
+- BroadcastHashJoin
7+
:- Project
8+
: +- BroadcastHashJoin
9+
: :- Project
10+
: : +- Filter
11+
: : +- BroadcastHashJoin
12+
: : :- BroadcastHashJoin [COMET: Unsupported join type ExistenceJoin(exists#1)]
13+
: : : :- CometNativeColumnarToRow
14+
: : : : +- CometBroadcastHashJoin
15+
: : : : :- CometFilter
16+
: : : : : +- CometNativeScan parquet spark_catalog.default.customer
17+
: : : : +- CometBroadcastExchange
18+
: : : : +- CometProject
19+
: : : : +- CometBroadcastHashJoin
20+
: : : : :- CometNativeScan parquet spark_catalog.default.store_sales
21+
: : : : : +- CometSubqueryBroadcast
22+
: : : : : +- CometBroadcastExchange
23+
: : : : : +- CometProject
24+
: : : : : +- CometFilter
25+
: : : : : +- CometNativeScan parquet spark_catalog.default.date_dim
26+
: : : : +- CometBroadcastExchange
27+
: : : : +- CometProject
28+
: : : : +- CometFilter
29+
: : : : +- CometNativeScan parquet spark_catalog.default.date_dim
30+
: : : +- BroadcastExchange
31+
: : : +- CometNativeColumnarToRow
32+
: : : +- CometProject
33+
: : : +- CometBroadcastHashJoin
34+
: : : :- CometNativeScan parquet spark_catalog.default.web_sales
35+
: : : : +- ReusedSubquery
36+
: : : +- CometBroadcastExchange
37+
: : : +- CometProject
38+
: : : +- CometFilter
39+
: : : +- CometNativeScan parquet spark_catalog.default.date_dim
40+
: : +- BroadcastExchange
41+
: : +- CometNativeColumnarToRow
42+
: : +- CometProject
43+
: : +- CometBroadcastHashJoin
44+
: : :- CometNativeScan parquet spark_catalog.default.catalog_sales
45+
: : : +- ReusedSubquery
46+
: : +- CometBroadcastExchange
47+
: : +- CometProject
48+
: : +- CometFilter
49+
: : +- CometNativeScan parquet spark_catalog.default.date_dim
50+
: +- BroadcastExchange
51+
: +- CometNativeColumnarToRow
52+
: +- CometProject
53+
: +- CometFilter
54+
: +- CometNativeScan parquet spark_catalog.default.customer_address
55+
+- BroadcastExchange
56+
+- CometNativeColumnarToRow
57+
+- CometProject
58+
+- CometFilter
59+
+- CometNativeScan parquet spark_catalog.default.customer_demographics
60+
61+
Comet accelerated 35 out of 54 eligible operators (64%). Final plan contains 5 transitions between Spark and Comet.
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
TakeOrderedAndProject
2+
+- HashAggregate
3+
+- Exchange
4+
+- HashAggregate
5+
+- Project
6+
+- BroadcastHashJoin
7+
:- Project
8+
: +- BroadcastHashJoin
9+
: :- Project
10+
: : +- Filter
11+
: : +- BroadcastHashJoin
12+
: : :- BroadcastHashJoin [COMET: Unsupported join type ExistenceJoin(exists#1)]
13+
: : : :- CometNativeColumnarToRow
14+
: : : : +- CometBroadcastHashJoin
15+
: : : : :- CometFilter
16+
: : : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer
17+
: : : : +- CometBroadcastExchange
18+
: : : : +- CometProject
19+
: : : : +- CometBroadcastHashJoin
20+
: : : : :- CometScan [native_iceberg_compat] parquet spark_catalog.default.store_sales
21+
: : : : : +- SubqueryBroadcast
22+
: : : : : +- BroadcastExchange
23+
: : : : : +- CometNativeColumnarToRow
24+
: : : : : +- CometProject
25+
: : : : : +- CometFilter
26+
: : : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
27+
: : : : +- CometBroadcastExchange
28+
: : : : +- CometProject
29+
: : : : +- CometFilter
30+
: : : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
31+
: : : +- BroadcastExchange
32+
: : : +- CometNativeColumnarToRow
33+
: : : +- CometProject
34+
: : : +- CometBroadcastHashJoin
35+
: : : :- CometScan [native_iceberg_compat] parquet spark_catalog.default.web_sales
36+
: : : : +- ReusedSubquery
37+
: : : +- CometBroadcastExchange
38+
: : : +- CometProject
39+
: : : +- CometFilter
40+
: : : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
41+
: : +- BroadcastExchange
42+
: : +- CometNativeColumnarToRow
43+
: : +- CometProject
44+
: : +- CometBroadcastHashJoin
45+
: : :- CometScan [native_iceberg_compat] parquet spark_catalog.default.catalog_sales
46+
: : : +- ReusedSubquery
47+
: : +- CometBroadcastExchange
48+
: : +- CometProject
49+
: : +- CometFilter
50+
: : +- CometScan [native_iceberg_compat] parquet spark_catalog.default.date_dim
51+
: +- BroadcastExchange
52+
: +- CometNativeColumnarToRow
53+
: +- CometProject
54+
: +- CometFilter
55+
: +- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer_address
56+
+- BroadcastExchange
57+
+- CometNativeColumnarToRow
58+
+- CometProject
59+
+- CometFilter
60+
+- CometScan [native_iceberg_compat] parquet spark_catalog.default.customer_demographics
61+
62+
Comet accelerated 34 out of 54 eligible operators (62%). Final plan contains 6 transitions between Spark and Comet.

0 commit comments

Comments
 (0)