Skip to content

Commit ca3466a

Browse files
authored
Merge branch 'apache:main' into main
2 parents 723dda4 + 239fc1e commit ca3466a

997 files changed

Lines changed: 96139 additions & 10337 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/actions/setup-spark-builder/action.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ inputs:
2222
description: 'The Apache Spark short version (e.g., 3.5) to build'
2323
required: true
2424
spark-version:
25-
description: 'The Apache Spark version (e.g., 3.5.6) to build'
25+
description: 'The Apache Spark version (e.g., 3.5.7) to build'
2626
required: true
2727
runs:
2828
using: "composite"

.github/workflows/iceberg_spark_test.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ jobs:
4747
os: [ubuntu-24.04]
4848
java-version: [11, 17]
4949
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}]
50-
spark-version: [{short: '3.5', full: '3.5.6'}]
50+
spark-version: [{short: '3.5', full: '3.5.7'}]
5151
scala-version: ['2.13']
5252
fail-fast: false
5353
name: iceberg-spark/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
@@ -86,7 +86,7 @@ jobs:
8686
os: [ubuntu-24.04]
8787
java-version: [11, 17]
8888
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}]
89-
spark-version: [{short: '3.5', full: '3.5.6'}]
89+
spark-version: [{short: '3.5', full: '3.5.7'}]
9090
scala-version: ['2.13']
9191
fail-fast: false
9292
name: iceberg-spark-extensions/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
@@ -125,7 +125,7 @@ jobs:
125125
os: [ubuntu-24.04]
126126
java-version: [11, 17]
127127
iceberg-version: [{short: '1.8', full: '1.8.1'}, {short: '1.9', full: '1.9.1'}]
128-
spark-version: [{short: '3.5', full: '3.5.6'}]
128+
spark-version: [{short: '3.5', full: '3.5.7'}]
129129
scala-version: ['2.13']
130130
fail-fast: false
131131
name: iceberg-spark-runtime/${{ matrix.os }}/iceberg-${{ matrix.iceberg-version.full }}/spark-${{ matrix.spark-version.full }}/scala-${{ matrix.scala-version }}/java-${{ matrix.java-version }}
@@ -155,4 +155,4 @@ jobs:
155155
rm -rf /root/.m2/repository/org/apache/parquet # somehow parquet cache requires cleanups
156156
ENABLE_COMET=true ENABLE_COMET_ONHEAP=true ./gradlew -DsparkVersions=${{ matrix.spark-version.short }} -DscalaVersion=${{ matrix.scala-version }} -DflinkVersions= -DkafkaVersions= \
157157
:iceberg-spark:iceberg-spark-runtime-${{ matrix.spark-version.short }}_${{ matrix.scala-version }}:integrationTest \
158-
-Pquick=true -x javadoc
158+
-Pquick=true -x javadoc

.github/workflows/pr_build_linux.yml

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,16 +74,16 @@ jobs:
7474
maven_opts: "-Pspark-3.4 -Pscala-2.12"
7575
scan_impl: "native_comet"
7676

77-
- name: "Spark 3.5.4, JDK 11, Scala 2.12"
78-
java_version: "11"
79-
maven_opts: "-Pspark-3.5 -Dspark.version=3.5.4 -Pscala-2.12"
80-
scan_impl: "native_comet"
81-
8277
- name: "Spark 3.5.5, JDK 17, Scala 2.13"
8378
java_version: "17"
8479
maven_opts: "-Pspark-3.5 -Dspark.version=3.5.5 -Pscala-2.13"
8580
scan_impl: "native_comet"
8681

82+
- name: "Spark 3.5.6, JDK 17, Scala 2.13"
83+
java_version: "17"
84+
maven_opts: "-Pspark-3.5 -Dspark.version=3.5.6 -Pscala-2.13"
85+
scan_impl: "native_comet"
86+
8787
- name: "Spark 3.5, JDK 17, Scala 2.12 native_datafusion"
8888
java_version: "17"
8989
maven_opts: "-Pspark-3.5 -Pscala-2.12"
@@ -134,13 +134,15 @@ jobs:
134134
org.apache.comet.CometCastSuite
135135
org.apache.comet.CometExpressionSuite
136136
org.apache.comet.CometExpressionCoverageSuite
137+
org.apache.comet.CometMathExpressionSuite
137138
org.apache.comet.CometNativeSuite
138139
org.apache.comet.CometSparkSessionExtensionsSuite
139140
org.apache.comet.CometStringExpressionSuite
140141
org.apache.spark.CometPluginsSuite
141142
org.apache.spark.CometPluginsDefaultSuite
142143
org.apache.spark.CometPluginsNonOverrideSuite
143144
org.apache.spark.CometPluginsUnifiedModeOverrideSuite
145+
org.apache.comet.CometTemporalExpressionSuite
144146
org.apache.spark.sql.CometTPCDSQuerySuite
145147
org.apache.spark.sql.CometTPCDSQueryTestSuite
146148
org.apache.spark.sql.CometTPCHQuerySuite
@@ -175,4 +177,4 @@ jobs:
175177
suites: ${{ matrix.suite.name == 'sql' && matrix.profile.name == 'Spark 3.4, JDK 11, Scala 2.12' && '' || matrix.suite.value }}
176178
maven_opts: ${{ matrix.profile.maven_opts }}
177179
scan_impl: ${{ matrix.profile.scan_impl }}
178-
upload-test-reports: true
180+
upload-test-reports: true

.github/workflows/pr_build_macos.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,15 @@ jobs:
9999
org.apache.comet.CometCastSuite
100100
org.apache.comet.CometExpressionSuite
101101
org.apache.comet.CometExpressionCoverageSuite
102+
org.apache.comet.CometMathExpressionSuite
102103
org.apache.comet.CometNativeSuite
103104
org.apache.comet.CometSparkSessionExtensionsSuite
104105
org.apache.comet.CometStringExpressionSuite
105106
org.apache.spark.CometPluginsSuite
106107
org.apache.spark.CometPluginsDefaultSuite
107108
org.apache.spark.CometPluginsNonOverrideSuite
108109
org.apache.spark.CometPluginsUnifiedModeOverrideSuite
110+
org.apache.comet.CometTemporalExpressionSuite
109111
org.apache.spark.sql.CometTPCDSQuerySuite
110112
org.apache.spark.sql.CometTPCDSQueryTestSuite
111113
org.apache.spark.sql.CometTPCHQuerySuite

.github/workflows/spark_sql_test.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ jobs:
5151
matrix:
5252
os: [ubuntu-24.04]
5353
java-version: [11]
54-
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.6'}]
54+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
5555
module:
5656
- {name: "catalyst", args1: "catalyst/test", args2: ""}
5757
- {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
@@ -90,7 +90,7 @@ jobs:
9090
LC_ALL: "C.UTF-8"
9191
- name: Upload fallback log
9292
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
93-
uses: actions/upload-artifact@v4
93+
uses: actions/upload-artifact@v5
9494
with:
9595
name: fallback-log-spark-sql-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.java-version }}
9696
path: "**/fallback.log"
@@ -100,7 +100,7 @@ jobs:
100100
matrix:
101101
os: [ ubuntu-24.04 ]
102102
java-version: [ 11 ]
103-
spark-version: [ { short: '3.4', full: '3.4.3' }, { short: '3.5', full: '3.5.6' } ]
103+
spark-version: [ { short: '3.4', full: '3.4.3' }, { short: '3.5', full: '3.5.7' } ]
104104
module:
105105
- { name: "catalyst", args1: "catalyst/test", args2: "" }
106106
- { name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest }
@@ -139,7 +139,7 @@ jobs:
139139
LC_ALL: "C.UTF-8"
140140
- name: Upload fallback log
141141
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
142-
uses: actions/upload-artifact@v4
142+
uses: actions/upload-artifact@v5
143143
with:
144144
name: fallback-log-spark-sql-native-comet-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.java-version }}
145145
path: "**/fallback.log"
@@ -149,7 +149,7 @@ jobs:
149149
matrix:
150150
os: [ubuntu-24.04]
151151
java-version: [11]
152-
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.6'}]
152+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
153153
module:
154154
- {name: "catalyst", args1: "catalyst/test", args2: ""}
155155
- {name: "sql_core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
@@ -188,7 +188,7 @@ jobs:
188188
LC_ALL: "C.UTF-8"
189189
- name: Upload fallback log
190190
if: ${{ github.event.inputs.collect-fallback-logs == 'true' }}
191-
uses: actions/upload-artifact@v4
191+
uses: actions/upload-artifact@v5
192192
with:
193193
name: fallback-log-spark-sql-iceberg-compat-${{ matrix.module.name }}-${{ matrix.os }}-spark-${{ matrix.spark-version.full }}-java-${{ matrix.java-version }}
194194
path: "**/fallback.log"
@@ -200,14 +200,14 @@ jobs:
200200
runs-on: ubuntu-24.04
201201
steps:
202202
- name: Download fallback log artifacts
203-
uses: actions/download-artifact@v5
203+
uses: actions/download-artifact@v6
204204
with:
205205
path: fallback-logs/
206206
- name: Merge fallback logs
207207
run: |
208208
find ./fallback-logs/ -type f -name "fallback.log" -print0 | xargs -0 cat | sort -u > all_fallback.log
209209
- name: Upload merged fallback log
210-
uses: actions/upload-artifact@v4
210+
uses: actions/upload-artifact@v5
211211
with:
212212
name: all-fallback-log
213213
path: all_fallback.log

.github/workflows/spark_sql_test_native_datafusion.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
matrix:
3636
os: [ubuntu-24.04]
3737
java-version: [11]
38-
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.6'}]
38+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
3939
module:
4040
- {name: "catalyst", args1: "catalyst/test", args2: ""}
4141
- {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}

.github/workflows/spark_sql_test_native_iceberg_compat.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
matrix:
3636
os: [ubuntu-24.04]
3737
java-version: [11]
38-
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.6'}]
38+
spark-version: [{short: '3.4', full: '3.4.3'}, {short: '3.5', full: '3.5.7'}]
3939
module:
4040
- {name: "catalyst", args1: "catalyst/test", args2: ""}
4141
- {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,23 +48,23 @@ The following chart shows the time it takes to run the 22 TPC-H queries against
4848
using a single executor with 8 cores. See the [Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html)
4949
for details of the environment used for these benchmarks.
5050

51-
When using Comet, the overall run time is reduced from 652 seconds to 268 seconds, a 2.4x speedup.
51+
When using Comet, the overall run time is reduced from 687 seconds to 302 seconds, a 2.2x speedup.
5252

53-
![](docs/source/_static/images/benchmark-results/0.9.0/tpch_allqueries.png)
53+
![](docs/source/_static/images/benchmark-results/0.11.0/tpch_allqueries.png)
5454

5555
Here is a breakdown showing relative performance of Spark and Comet for each TPC-H query.
5656

57-
![](docs/source/_static/images/benchmark-results/0.9.0/tpch_queries_compare.png)
57+
![](docs/source/_static/images/benchmark-results/0.11.0/tpch_queries_compare.png)
5858

5959
The following charts shows how much Comet currently accelerates each query from the benchmark.
6060

6161
### Relative speedup
6262

63-
![](docs/source/_static/images/benchmark-results/0.9.0/tpch_queries_speedup_rel.png)
63+
![](docs/source/_static/images/benchmark-results/0.11.0/tpch_queries_speedup_rel.png)
6464

6565
### Absolute speedup
6666

67-
![](docs/source/_static/images/benchmark-results/0.9.0/tpch_queries_speedup_abs.png)
67+
![](docs/source/_static/images/benchmark-results/0.11.0/tpch_queries_speedup_abs.png)
6868

6969
These benchmarks can be reproduced in any environment using the documentation in the
7070
[Comet Benchmarking Guide](https://datafusion.apache.org/comet/contributor-guide/benchmarking.html). We encourage

common/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ under the License.
2626
<parent>
2727
<groupId>org.apache.datafusion</groupId>
2828
<artifactId>comet-parent-spark${spark.version.short}_${scala.binary.version}</artifactId>
29-
<version>0.11.0-SNAPSHOT</version>
29+
<version>0.12.0-SNAPSHOT</version>
3030
<relativePath>../pom.xml</relativePath>
3131
</parent>
3232

common/src/main/java/org/apache/comet/parquet/Native.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,8 @@ public static native long initRecordBatchReader(
268268
int batchSize,
269269
boolean caseSensitive,
270270
Map<String, String> objectStoreOptions,
271-
CometFileKeyUnwrapper keyUnwrapper);
271+
CometFileKeyUnwrapper keyUnwrapper,
272+
Object metricsNode);
272273

273274
// arrow native version of read batch
274275

0 commit comments

Comments
 (0)