From 8d0c138565a4c078bc6f4b268364146989ff330d Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Tue, 9 Jun 2026 09:50:32 +0000 Subject: [PATCH 1/3] [INFRA] Make pages.yml reuse the documentation image used in build_and_test Run the documentation job inside the prebuilt documentation image (apache-spark-github-action-image-docs-cache:master-static) that build_and_test.yml already uses, dropping the redundant inline setup of the Python docs dependencies, Ruby, and Pandoc now provided by the image. --- .github/workflows/pages.yml | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index fdf15337cf672..dd7b2beffb927 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -37,9 +37,13 @@ jobs: pages: write environment: name: github-pages # https://github.com/actions/deploy-pages/issues/271 + container: + image: ghcr.io/apache/spark/apache-spark-github-action-image-docs-cache:master-static env: SPARK_TESTING: 1 # Reduce some noise in the logs RELEASE_VERSION: 'In-Progress' + LC_ALL: C.UTF-8 + LANG: C.UTF-8 if: github.repository == 'apache/spark' steps: - name: Checkout Spark repository @@ -47,37 +51,23 @@ jobs: with: repository: apache/spark ref: 'master' + - name: Add GITHUB_WORKSPACE to git trust safe.directory + run: | + git config --global --add safe.directory ${GITHUB_WORKSPACE} + - name: Free up disk space + run: ./dev/free_disk_space_container - name: Install Java 17 uses: actions/setup-java@v5 with: distribution: zulu java-version: 17 - - name: Install Python 3.11 - uses: actions/setup-python@v6 - with: - python-version: '3.11' - architecture: x64 - cache: 'pip' - - name: Install Python dependencies - run: | - pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ - ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.23.2' pyarrow 'pandas==2.3.3' 'plotly>=4.8' 'docutils<0.18.0' \ - 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'ruff==0.14.8' \ - 'pandas-stubs==1.2.0.53' 'grpcio==1.76.0' 'grpcio-status==1.76.0' 'protobuf==6.33.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ - 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' - - name: Install Ruby for documentation generation - uses: ruby/setup-ruby@4dc28cf14d77b0afa6832d9765ac422dbf0dfedd # v1 - with: - ruby-version: '3.3' - bundler-cache: true - - name: Install Pandoc - run: | - sudo apt-get update -y - sudo apt-get install pandoc - name: Install dependencies for documentation generation run: | + # Keep the version of Bundler here in sync with the following locations: + # - dev/create-release/spark-rm/Dockerfile + # - docs/README.md + gem install bundler -v 2.4.22 cd docs - gem install bundler -v 2.4.22 -n /usr/local/bin bundle install --retry=100 - name: Run documentation build run: | From 6ee4b60cf30c6c33dff37a578816a2090a5a1e97 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 12 Jun 2026 11:36:52 +0000 Subject: [PATCH 2/3] [TEMP][DO NOT MERGE] Trigger pages.yml on the fork branch for validation Temporarily run the GitHub Pages workflow on the fork to validate the container-based doc build: trigger on push to this branch, drop the apache/spark job guard, and skip the Pages configure/deploy steps on the fork. To be reverted before merge. --- .github/workflows/pages.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index dd7b2beffb927..25a53c1bbd39a 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -23,6 +23,8 @@ on: push: branches: - master + # TEMP: validate this workflow on the fork, remove before merge + - pages-reuse-doc-image-dev1 concurrency: group: 'docs preview' @@ -44,7 +46,7 @@ jobs: RELEASE_VERSION: 'In-Progress' LC_ALL: C.UTF-8 LANG: C.UTF-8 - if: github.repository == 'apache/spark' + # TEMP: job-level repository guard removed to run on the fork, restore before merge steps: - name: Checkout Spark repository uses: actions/checkout@v6 @@ -78,11 +80,15 @@ jobs: cd docs SKIP_RDOC=1 bundle exec jekyll build - name: Setup Pages + # TEMP: skip Pages API calls on the fork (Pages not enabled there), remove the if before merge + if: github.repository == 'apache/spark' uses: actions/configure-pages@v6 - name: Upload artifact uses: actions/upload-pages-artifact@v5 with: path: 'docs/_site' - name: Deploy to GitHub Pages + # TEMP: skip deployment on the fork, remove the if before merge + if: github.repository == 'apache/spark' id: deployment uses: actions/deploy-pages@v5 From e2854b1e6ead005ec463ec70242035ded50eb7c5 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 12 Jun 2026 12:14:11 +0000 Subject: [PATCH 3/3] Revert "[TEMP][DO NOT MERGE] Trigger pages.yml on the fork branch for validation" This reverts commit c1f16f3508dbca0950af002897eeb28924e7da8a. --- .github/workflows/pages.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 25a53c1bbd39a..dd7b2beffb927 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -23,8 +23,6 @@ on: push: branches: - master - # TEMP: validate this workflow on the fork, remove before merge - - pages-reuse-doc-image-dev1 concurrency: group: 'docs preview' @@ -46,7 +44,7 @@ jobs: RELEASE_VERSION: 'In-Progress' LC_ALL: C.UTF-8 LANG: C.UTF-8 - # TEMP: job-level repository guard removed to run on the fork, restore before merge + if: github.repository == 'apache/spark' steps: - name: Checkout Spark repository uses: actions/checkout@v6 @@ -80,15 +78,11 @@ jobs: cd docs SKIP_RDOC=1 bundle exec jekyll build - name: Setup Pages - # TEMP: skip Pages API calls on the fork (Pages not enabled there), remove the if before merge - if: github.repository == 'apache/spark' uses: actions/configure-pages@v6 - name: Upload artifact uses: actions/upload-pages-artifact@v5 with: path: 'docs/_site' - name: Deploy to GitHub Pages - # TEMP: skip deployment on the fork, remove the if before merge - if: github.repository == 'apache/spark' id: deployment uses: actions/deploy-pages@v5