Skip to content

Commit 6012934

Browse files
committed
[SPARK-56710][INFRA] branch-4.x and branch-4.2 scheduled CI, snapshots, and build_and_test Python split
### What changes were proposed in this pull request? Scheduled GitHub workflows for `branch-4.x` and `branch-4.2`, snapshot matrix + README badges, `build_and_test` for (`branch-4.2`/`branch-4.x` vs `branch-4.1`), small `merge_spark_pr.py` updates. ### Why are the changes needed? Spark 4 cut new integration/maintenance branches; CI and snapshots should track them without hand-editing workflow inputs. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually tested some logics. CI will be monitored separately. ### Was this patch authored or co-authored using generative AI tooling? No, Closes #55662 from HyukjinKwon/SPARK-56710. Authored-by: Hyukjin Kwon <gurwls223@apache.org> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
1 parent 84d9c84 commit 6012934

21 files changed

Lines changed: 1023 additions & 43 deletions

.github/workflows/build_and_test.yml

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ on:
3030
description: Branch to run the build against
3131
required: false
3232
type: string
33-
# Change 'master' to 'branch-4.0' in branch-4.0 branch after cutting it.
33+
# Caller workflows (build_main, build_branch*) pass branch explicitly. Python toolchain used in
34+
# lint/docs/Python CodeGen (buf job): branch-3.5 / branch-4.0 use 3.9; branch-4.1 uses 3.11
35+
# (black codegen); branch-4.2 / branch-4.x and master use 3.12 (ruff codegen).
3436
default: master
3537
hadoop:
3638
description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it.
@@ -844,7 +846,7 @@ jobs:
844846
python3.12 -m pip install 'black==26.3.1' 'protobuf==6.33.5' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
845847
python3.12 -m pip list
846848
- name: Install dependencies for Python CodeGen check
847-
if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' && inputs.branch != 'branch-4.1'
849+
if: ${{ inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' && inputs.branch != 'branch-4.1' }}
848850
run: |
849851
python3.12 -m pip install 'ruff==0.14.8' 'protobuf==6.33.5' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
850852
python3.12 -m pip list
@@ -933,6 +935,8 @@ jobs:
933935
run: |
934936
if [[ "$BRANCH" == 'branch-3.5' || "$BRANCH" == 'branch-4.0' ]]; then
935937
python3.9 ./dev/structured_logging_style.py
938+
elif [[ "$BRANCH" == 'branch-4.2' || "$BRANCH" == 'branch-4.x' ]]; then
939+
python3.12 ./dev/structured_logging_style.py
936940
elif [[ "$BRANCH" == 'branch-4.1' ]]; then
937941
python3.11 ./dev/structured_logging_style.py
938942
else
@@ -956,6 +960,9 @@ jobs:
956960
if [[ "$BRANCH" == 'branch-3.5' || "$BRANCH" == 'branch-4.0' ]]; then
957961
python3.9 --version
958962
python3.9 -m pip list
963+
elif [[ "$BRANCH" == 'branch-4.2' || "$BRANCH" == 'branch-4.x' ]]; then
964+
python3.12 --version
965+
python3.12 -m pip list
959966
elif [[ "$BRANCH" == 'branch-4.1' ]]; then
960967
python3.11 --version
961968
python3.11 -m pip list
@@ -968,6 +975,8 @@ jobs:
968975
run: |
969976
if [[ "$BRANCH" == 'branch-3.5' || "$BRANCH" == 'branch-4.0' ]]; then
970977
PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
978+
elif [[ "$BRANCH" == 'branch-4.2' || "$BRANCH" == 'branch-4.x' ]]; then
979+
PYTHON_EXECUTABLE=python3.12 ./dev/lint-python
971980
elif [[ "$BRANCH" == 'branch-4.1' ]]; then
972981
PYTHON_EXECUTABLE=python3.11 ./dev/lint-python
973982
else
@@ -1129,11 +1138,14 @@ jobs:
11291138
- name: List Python packages for branch-3.5 and branch-4.0
11301139
if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0'
11311140
run: python3.9 -m pip list
1141+
- name: List Python packages for branch-4.2 and branch-4.x (Python 3.12)
1142+
if: inputs.branch == 'branch-4.2' || inputs.branch == 'branch-4.x'
1143+
run: python3.12 -m pip list
11321144
- name: List Python packages for branch-4.1
11331145
if: inputs.branch == 'branch-4.1'
11341146
run: python3.11 -m pip list
11351147
- name: List Python packages
1136-
if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' && inputs.branch != 'branch-4.1'
1148+
if: ${{ inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' && inputs.branch != 'branch-4.1' && inputs.branch != 'branch-4.2' && inputs.branch != 'branch-4.x' }}
11371149
run: |
11381150
lsb_release -a
11391151
python3.12 -m pip list
@@ -1167,6 +1179,30 @@ jobs:
11671179
echo "SKIP_SQLDOC: $SKIP_SQLDOC"
11681180
cd docs
11691181
bundle exec jekyll build
1182+
- name: Run documentation build for branch-4.2 and branch-4.x (Python 3.12)
1183+
if: inputs.branch == 'branch-4.2' || inputs.branch == 'branch-4.x'
1184+
run: |
1185+
# We need this link to make sure `python3` points to `python3.12` which contains the prerequisite packages.
1186+
ln -s "$(which python3.12)" "/usr/local/bin/python3"
1187+
# Build docs first with SKIP_API to ensure they are buildable without requiring any
1188+
# language docs to be built beforehand.
1189+
cd docs; SKIP_ERRORDOC=1 SKIP_API=1 bundle exec jekyll build; cd ..
1190+
if [ -f "./dev/is-changed.py" ]; then
1191+
# Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs
1192+
pyspark_modules=`cd dev && python3.12 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
1193+
if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then export SKIP_PYTHONDOC=1; fi
1194+
if [ `./dev/is-changed.py -m sparkr` = false ]; then export SKIP_RDOC=1; fi
1195+
fi
1196+
export PYSPARK_DRIVER_PYTHON=python3.12
1197+
export PYSPARK_PYTHON=python3.12
1198+
# Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and `SKIP_SQLDOC`
1199+
echo "SKIP_ERRORDOC: $SKIP_ERRORDOC"
1200+
echo "SKIP_SCALADOC: $SKIP_SCALADOC"
1201+
echo "SKIP_PYTHONDOC: $SKIP_PYTHONDOC"
1202+
echo "SKIP_RDOC: $SKIP_RDOC"
1203+
echo "SKIP_SQLDOC: $SKIP_SQLDOC"
1204+
cd docs
1205+
bundle exec jekyll build
11701206
- name: Run documentation build for branch-4.1
11711207
if: inputs.branch == 'branch-4.1'
11721208
run: |
@@ -1192,7 +1228,7 @@ jobs:
11921228
cd docs
11931229
bundle exec jekyll build
11941230
- name: Run documentation build
1195-
if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' && inputs.branch != 'branch-4.1'
1231+
if: ${{ inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' && inputs.branch != 'branch-4.1' && inputs.branch != 'branch-4.2' && inputs.branch != 'branch-4.x' }}
11961232
run: |
11971233
# We need this link to make sure `python3` points to `python3.12` which contains the prerequisite packages.
11981234
ln -s "$(which python3.12)" "/usr/local/bin/python3"
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Build (branch-4.2, Scala 2.13, Hadoop 3, JDK 17)"
21+
22+
on:
23+
schedule:
24+
- cron: '0 13 * * *'
25+
workflow_dispatch:
26+
27+
jobs:
28+
run-build:
29+
permissions:
30+
packages: write
31+
name: Run
32+
uses: ./.github/workflows/build_and_test.yml
33+
if: github.repository == 'apache/spark'
34+
with:
35+
java: 17
36+
branch: branch-4.2
37+
hadoop: hadoop3
38+
envs: >-
39+
{
40+
"SCALA_PROFILE": "scala2.13",
41+
"PYSPARK_IMAGE_TO_TEST": "",
42+
"PYTHON_TO_TEST": "",
43+
"ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-free:23.7-slim"
44+
}
45+
jobs: >-
46+
{
47+
"build": "true",
48+
"sparkr": "true",
49+
"tpcds-1g": "true",
50+
"docker-integration-tests": "true",
51+
"k8s-integration-tests": "true",
52+
"lint" : "true"
53+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Build (branch-4.2, Scala 2.13, Hadoop 3, JDK 21)"
21+
22+
on:
23+
schedule:
24+
- cron: '0 6 * * *'
25+
workflow_dispatch:
26+
27+
jobs:
28+
run-build:
29+
permissions:
30+
packages: write
31+
name: Run
32+
uses: ./.github/workflows/build_and_test.yml
33+
if: github.repository == 'apache/spark'
34+
with:
35+
java: 21
36+
branch: branch-4.2
37+
hadoop: hadoop3
38+
envs: >-
39+
{
40+
"PYSPARK_IMAGE_TO_TEST": "python-311",
41+
"PYTHON_TO_TEST": "python3.11",
42+
"SKIP_MIMA": "true",
43+
"SKIP_UNIDOC": "true",
44+
"DEDICATED_JVM_SBT_TESTS": "org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormatV1Suite,org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormatV2Suite,org.apache.spark.sql.execution.datasources.orc.OrcSourceV1Suite,org.apache.spark.sql.execution.datasources.orc.OrcSourceV2Suite"
45+
}
46+
jobs: >-
47+
{
48+
"build": "true",
49+
"pyspark": "true",
50+
"sparkr": "true",
51+
"tpcds-1g": "true",
52+
"docker-integration-tests": "true",
53+
"yarn": "true",
54+
"k8s-integration-tests": "true",
55+
"buf": "true",
56+
"ui": "true"
57+
}
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Build / Maven (branch-4.2, Scala 2.13, Hadoop 3, JDK 17)"
21+
22+
on:
23+
schedule:
24+
- cron: '0 15 * * *'
25+
workflow_dispatch:
26+
27+
jobs:
28+
run-build:
29+
permissions:
30+
packages: write
31+
name: Run
32+
uses: ./.github/workflows/maven_test.yml
33+
if: github.repository == 'apache/spark'
34+
with:
35+
branch: branch-4.2
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Build / Maven (branch-4.2, Scala 2.13, Hadoop 3, JDK 21)"
21+
22+
on:
23+
schedule:
24+
- cron: '0 17 * * *'
25+
workflow_dispatch:
26+
27+
jobs:
28+
run-build:
29+
permissions:
30+
packages: write
31+
name: Run
32+
uses: ./.github/workflows/maven_test.yml
33+
if: github.repository == 'apache/spark'
34+
with:
35+
branch: branch-4.2
36+
java: 21
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Build / Non-ANSI (branch-4.2, Hadoop 3, JDK 17, Scala 2.13)"
21+
22+
on:
23+
schedule:
24+
- cron: '0 3 * * *'
25+
workflow_dispatch:
26+
27+
jobs:
28+
run-build:
29+
permissions:
30+
packages: write
31+
name: Run
32+
uses: ./.github/workflows/build_and_test.yml
33+
if: github.repository == 'apache/spark'
34+
with:
35+
java: 17
36+
branch: branch-4.2
37+
hadoop: hadoop3
38+
envs: >-
39+
{
40+
"PYSPARK_IMAGE_TO_TEST": "python-311",
41+
"PYTHON_TO_TEST": "python3.11",
42+
"SPARK_ANSI_SQL_MODE": "false",
43+
}
44+
jobs: >-
45+
{
46+
"build": "true",
47+
"docs": "true",
48+
"pyspark": "true",
49+
"sparkr": "true",
50+
"tpcds-1g": "true",
51+
"docker-integration-tests": "true",
52+
"yarn": "true"
53+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
#
19+
20+
name: "Build / Python-only (branch-4.2)"
21+
22+
on:
23+
schedule:
24+
- cron: '0 18 * * *'
25+
workflow_dispatch:
26+
27+
jobs:
28+
run-build:
29+
permissions:
30+
packages: write
31+
name: Run
32+
uses: ./.github/workflows/build_and_test.yml
33+
if: github.repository == 'apache/spark'
34+
with:
35+
java: 17
36+
branch: branch-4.2
37+
hadoop: hadoop3
38+
envs: >-
39+
{
40+
"PYSPARK_IMAGE_TO_TEST": "python-311",
41+
"PYTHON_TO_TEST": "python3.11"
42+
}
43+
jobs: >-
44+
{
45+
"pyspark": "true",
46+
"pyspark-pandas": "true"
47+
}

0 commit comments

Comments
 (0)