Skip to content

Commit 9be3e51

Browse files
author
Kazantsev Maksim
committed
Merge remote-tracking branch 'origin/main' into make_map
# Conflicts: # native/proto/src/proto/expr.proto # spark/src/test/scala/org/apache/comet/CometMapExpressionSuite.scala
2 parents c0d5c91 + 559741e commit 9be3e51

132 files changed

Lines changed: 12066 additions & 2559 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/actions/rust-test/action.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,5 +70,7 @@ runs:
7070
shell: bash
7171
run: |
7272
cd native
73+
# Set LD_LIBRARY_PATH to include JVM library path for tests that use JNI
74+
export LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${LD_LIBRARY_PATH}
7375
RUST_BACKTRACE=1 cargo nextest run
7476

.github/workflows/benchmark-tpcds.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:

.github/workflows/benchmark-tpch.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:

.github/workflows/iceberg_spark_test.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:

.github/workflows/miri.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,17 @@ on:
2323
- "doc/**"
2424
- "docs/**"
2525
- "**.md"
26+
- "native/core/benches/**"
27+
- "native/spark-expr/benches/**"
28+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
2629
pull_request:
2730
paths-ignore:
2831
- "doc/**"
2932
- "docs/**"
3033
- "**.md"
34+
- "native/core/benches/**"
35+
- "native/spark-expr/benches/**"
36+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3137
# manual trigger
3238
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3339
workflow_dispatch:
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
# Lightweight CI for benchmark-only changes - verifies compilation and linting
19+
# without running full test suites
20+
21+
name: PR Benchmark Check
22+
23+
concurrency:
24+
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
25+
cancel-in-progress: true
26+
27+
on:
28+
push:
29+
paths:
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
33+
pull_request:
34+
paths:
35+
- "native/core/benches/**"
36+
- "native/spark-expr/benches/**"
37+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
38+
workflow_dispatch:
39+
40+
env:
41+
RUST_VERSION: stable
42+
43+
jobs:
44+
benchmark-check:
45+
name: Benchmark Compile & Lint Check
46+
runs-on: ubuntu-latest
47+
container:
48+
image: amd64/rust
49+
steps:
50+
- uses: actions/checkout@v6
51+
52+
- name: Setup Rust & Java toolchain
53+
uses: ./.github/actions/setup-builder
54+
with:
55+
rust-version: ${{ env.RUST_VERSION }}
56+
jdk-version: 17
57+
58+
- name: Check Cargo fmt
59+
run: |
60+
cd native
61+
cargo fmt --all -- --check --color=never
62+
63+
- name: Check Cargo clippy
64+
run: |
65+
cd native
66+
cargo clippy --color=never --all-targets --workspace -- -D warnings
67+
68+
- name: Check benchmark compilation
69+
run: |
70+
cd native
71+
cargo check --benches
72+
73+
- name: Cache Maven dependencies
74+
uses: actions/cache@v5
75+
with:
76+
path: |
77+
~/.m2/repository
78+
/root/.m2/repository
79+
key: ${{ runner.os }}-benchmark-maven-${{ hashFiles('**/pom.xml') }}
80+
restore-keys: |
81+
${{ runner.os }}-benchmark-maven-
82+
83+
- name: Check Scala compilation and linting
84+
run: |
85+
./mvnw -B compile test-compile scalafix:scalafix -Dscalafix.mode=CHECK -Psemanticdb -DskipTests

.github/workflows/pr_build_linux.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:
@@ -116,6 +122,7 @@ jobs:
116122
org.apache.comet.exec.CometAsyncShuffleSuite
117123
org.apache.comet.exec.DisableAQECometShuffleSuite
118124
org.apache.comet.exec.DisableAQECometAsyncShuffleSuite
125+
org.apache.spark.shuffle.sort.SpillSorterSuite
119126
- name: "parquet"
120127
value: |
121128
org.apache.comet.parquet.CometParquetWriterSuite
@@ -154,13 +161,15 @@ jobs:
154161
value: |
155162
org.apache.comet.CometExpressionSuite
156163
org.apache.comet.CometExpressionCoverageSuite
164+
org.apache.comet.CometHashExpressionSuite
157165
org.apache.comet.CometTemporalExpressionSuite
158166
org.apache.comet.CometArrayExpressionSuite
159167
org.apache.comet.CometCastSuite
160168
org.apache.comet.CometMathExpressionSuite
161169
org.apache.comet.CometStringExpressionSuite
162170
org.apache.comet.CometBitwiseExpressionSuite
163171
org.apache.comet.CometMapExpressionSuite
172+
org.apache.comet.CometJsonExpressionSuite
164173
org.apache.comet.expressions.conditional.CometIfSuite
165174
org.apache.comet.expressions.conditional.CometCoalesceSuite
166175
org.apache.comet.expressions.conditional.CometCaseWhenSuite

.github/workflows/pr_build_macos.yml

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:
@@ -57,11 +63,9 @@ jobs:
5763
java_version: "17"
5864
maven_opts: "-Pspark-3.5 -Pscala-2.13"
5965

60-
# TODO fails with OOM
61-
# https://github.com/apache/datafusion-comet/issues/1949
62-
# - name: "Spark 4.0, JDK 17, Scala 2.13"
63-
# java_version: "17"
64-
# maven_opts: "-Pspark-4.0 -Pscala-2.13"
66+
- name: "Spark 4.0, JDK 17, Scala 2.13"
67+
java_version: "17"
68+
maven_opts: "-Pspark-4.0 -Pscala-2.13"
6569

6670
suite:
6771
- name: "fuzz"
@@ -81,6 +85,7 @@ jobs:
8185
org.apache.comet.exec.CometAsyncShuffleSuite
8286
org.apache.comet.exec.DisableAQECometShuffleSuite
8387
org.apache.comet.exec.DisableAQECometAsyncShuffleSuite
88+
org.apache.spark.shuffle.sort.SpillSorterSuite
8489
- name: "parquet"
8590
value: |
8691
org.apache.comet.parquet.CometParquetWriterSuite
@@ -119,19 +124,22 @@ jobs:
119124
value: |
120125
org.apache.comet.CometExpressionSuite
121126
org.apache.comet.CometExpressionCoverageSuite
127+
org.apache.comet.CometHashExpressionSuite
122128
org.apache.comet.CometTemporalExpressionSuite
123129
org.apache.comet.CometArrayExpressionSuite
124130
org.apache.comet.CometCastSuite
125131
org.apache.comet.CometMathExpressionSuite
126132
org.apache.comet.CometStringExpressionSuite
127133
org.apache.comet.CometBitwiseExpressionSuite
128134
org.apache.comet.CometMapExpressionSuite
135+
org.apache.comet.CometJsonExpressionSuite
129136
org.apache.comet.expressions.conditional.CometIfSuite
130137
org.apache.comet.expressions.conditional.CometCoalesceSuite
131138
org.apache.comet.expressions.conditional.CometCaseWhenSuite
132139
- name: "sql"
133140
value: |
134141
org.apache.spark.sql.CometToPrettyStringSuite
142+
135143
fail-fast: false
136144
name: ${{ matrix.os }}/${{ matrix.profile.name }} [${{ matrix.suite.name }}]
137145
runs-on: ${{ matrix.os }}
@@ -144,6 +152,14 @@ jobs:
144152
jdk-version: ${{ matrix.profile.java_version }}
145153
jdk-architecture: aarch64
146154
protoc-architecture: aarch_64
155+
- name: Set thread thresholds envs for spark test on macOS
156+
# see: https://github.com/apache/datafusion-comet/issues/2965
157+
shell: bash
158+
run: |
159+
echo "SPARK_TEST_SQL_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD=256" >> $GITHUB_ENV
160+
echo "SPARK_TEST_SQL_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=256" >> $GITHUB_ENV
161+
echo "SPARK_TEST_HIVE_SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV
162+
echo "SPARK_TEST_HIVE_RESULT_QUERY_STAGE_MAX_THREAD_THRESHOLD=48" >> $GITHUB_ENV
147163
- name: Java test steps
148164
uses: ./.github/actions/java-test
149165
with:

.github/workflows/spark_sql_test.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,17 @@ on:
2727
- "doc/**"
2828
- "docs/**"
2929
- "**.md"
30+
- "native/core/benches/**"
31+
- "native/spark-expr/benches/**"
32+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3033
pull_request:
3134
paths-ignore:
3235
- "doc/**"
3336
- "docs/**"
3437
- "**.md"
38+
- "native/core/benches/**"
39+
- "native/spark-expr/benches/**"
40+
- "spark/src/test/scala/org/apache/spark/sql/benchmark/**"
3541
# manual trigger
3642
# https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
3743
workflow_dispatch:
@@ -59,6 +65,10 @@ jobs:
5965
- {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
6066
- {name: "sql_hive-2", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.ExtendedHiveTest"}
6167
- {name: "sql_hive-3", args1: "", args2: "hive/testOnly * -- -n org.apache.spark.tags.SlowHiveTest"}
68+
# Skip sql_hive-1 for Spark 4.0 due to https://github.com/apache/datafusion-comet/issues/2946
69+
exclude:
70+
- spark-version: {short: '4.0', full: '4.0.1', java: 17}
71+
module: {name: "sql_hive-1", args1: "", args2: "hive/testOnly * -- -l org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
6272
fail-fast: false
6373
name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{ matrix.spark-version.full }}/java-${{ matrix.spark-version.java }}
6474
runs-on: ${{ matrix.os }}

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,5 @@ apache-rat-*.jar
1818
venv
1919
dev/release/comet-rm/workdir
2020
spark/benchmarks
21+
.DS_Store
22+
comet-event-trace.json

0 commit comments

Comments
 (0)