From 026d36a84121c9c3235100aaf011bba81518c966 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 10 Mar 2026 03:44:36 +0800 Subject: [PATCH 01/10] [WIP] Bump PySpark to 4.0. --- containers/conda_env/aarch64_test.yml | 2 +- containers/conda_env/linux_cpu_test.yml | 2 +- containers/dockerfile/Dockerfile.gpu | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/containers/conda_env/aarch64_test.yml b/containers/conda_env/aarch64_test.yml index bb9a186..bbf649b 100644 --- a/containers/conda_env/aarch64_test.yml +++ b/containers/conda_env/aarch64_test.yml @@ -27,7 +27,7 @@ dependencies: - llvmlite - loky>=3.5.1 - pyarrow -- pyspark>=3.4.0,<4.0 +- pyspark>=4.0 - cloudpickle - pip: - awscli diff --git a/containers/conda_env/linux_cpu_test.yml b/containers/conda_env/linux_cpu_test.yml index 846bb35..64e23d7 100644 --- a/containers/conda_env/linux_cpu_test.yml +++ b/containers/conda_env/linux_cpu_test.yml @@ -35,6 +35,6 @@ dependencies: - protobuf - cloudpickle - modin -- pyspark>=3.4.0,<4.0 +- pyspark>=4.0 - pip: - py-ubjson diff --git a/containers/dockerfile/Dockerfile.gpu b/containers/dockerfile/Dockerfile.gpu index 7d8a089..5f09bd2 100644 --- a/containers/dockerfile/Dockerfile.gpu +++ b/containers/dockerfile/Dockerfile.gpu @@ -44,7 +44,7 @@ RUN \ "dask-cuda=$RAPIDS_VERSION.*" "dask-cudf=$RAPIDS_VERSION.*" cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel \ python-kubernetes urllib3 graphviz hypothesis "loky>=3.5.1" \ - "pyspark>=3.4.0,<4.0" cloudpickle cuda-python && \ + "pyspark>=4.0" cloudpickle cuda-python && \ mamba clean --all --yes # Install lightweight sudo (not bound to TTY) From 554198f9cb38588dac1afcd2cd7b14590ea6e2a8 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 10 Mar 2026 03:59:14 +0800 Subject: [PATCH 02/10] Update maven. --- containers/dockerfile/Dockerfile.jvm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/dockerfile/Dockerfile.jvm b/containers/dockerfile/Dockerfile.jvm index aea4f15..6a33694 100644 --- a/containers/dockerfile/Dockerfile.jvm +++ b/containers/dockerfile/Dockerfile.jvm @@ -1,7 +1,7 @@ FROM rockylinux:8 ARG MINIFORGE_VERSION=24.9.2-0 ARG CMAKE_VERSION=3.31.2 -ARG MAVEN_VERSION=3.9.12 +ARG MAVEN_VERSION=3.9.13 SHELL ["/bin/bash", "-c"] From f88ea7d2b65debc24f0342b5ed4cbfd143f1416d Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 10 Mar 2026 04:12:37 +0800 Subject: [PATCH 03/10] mvn --- containers/dockerfile/Dockerfile.jvm_gpu_build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/dockerfile/Dockerfile.jvm_gpu_build b/containers/dockerfile/Dockerfile.jvm_gpu_build index c396e1d..ec94d08 100644 --- a/containers/dockerfile/Dockerfile.jvm_gpu_build +++ b/containers/dockerfile/Dockerfile.jvm_gpu_build @@ -4,7 +4,7 @@ ARG CUDA_VERSION ARG NCCL_VERSION ARG MINIFORGE_VERSION=24.9.2-0 ARG CMAKE_VERSION=3.31.2 -ARG MAVEN_VERSION=3.9.12 +ARG MAVEN_VERSION=3.9.13 SHELL ["/bin/bash", "-c"] From 3a0149991e0666cd4b07888ce83bb1536c3be235 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 12 Mar 2026 00:17:12 +0800 Subject: [PATCH 04/10] Bump java versions except for manylinux 2014. --- containers/dockerfile/Dockerfile.cpu | 4 ++-- containers/dockerfile/Dockerfile.gpu | 4 ++-- containers/dockerfile/Dockerfile.jvm | 2 +- containers/dockerfile/Dockerfile.jvm_gpu_build | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/containers/dockerfile/Dockerfile.cpu b/containers/dockerfile/Dockerfile.cpu index e40c90b..15fc560 100644 --- a/containers/dockerfile/Dockerfile.cpu +++ b/containers/dockerfile/Dockerfile.cpu @@ -10,7 +10,7 @@ ENV CC=gcc-10 ENV CXX=g++-10 ENV CPP=cpp-10 ENV GOSU_VERSION=1.10 -ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ +ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64/ # Install all basic requirements RUN \ @@ -19,7 +19,7 @@ RUN \ apt-get install -y software-properties-common && \ add-apt-repository ppa:ubuntu-toolchain-r/test && \ apt-get update && \ - apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libidn12 cmake ninja-build gcc-10 g++-10 openjdk-8-jdk-headless && \ + apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libidn12 cmake ninja-build gcc-10 g++-10 openjdk-17-jdk-headless && \ # Miniforge wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/$MINIFORGE_VERSION/Miniforge3-$MINIFORGE_VERSION-Linux-${ARCH}.sh && \ bash conda.sh -b -p /opt/miniforge diff --git a/containers/dockerfile/Dockerfile.gpu b/containers/dockerfile/Dockerfile.gpu index 5f09bd2..126458a 100644 --- a/containers/dockerfile/Dockerfile.gpu +++ b/containers/dockerfile/Dockerfile.gpu @@ -13,7 +13,7 @@ SHELL ["/bin/bash", "-c"] ENV DEBIAN_FRONTEND=noninteractive ENV PATH=/opt/miniforge/bin:$PATH ENV GOSU_VERSION=1.10 -ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ +ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64/ # Install all basic requirements RUN \ @@ -23,7 +23,7 @@ RUN \ sed -i 's/ports.ubuntu.com/mirrors.ocf.berkeley.edu/g' /etc/apt/sources.list && \ apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${CUDA_REPO_ARCH}/3bf863cc.pub && \ apt-get update && \ - apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \ + apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-17-jdk-headless && \ apt-get install "libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT}" \ "libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT}" -y --allow-change-held-packages && \ # Miniforge diff --git a/containers/dockerfile/Dockerfile.jvm b/containers/dockerfile/Dockerfile.jvm index 6a33694..a7a6afd 100644 --- a/containers/dockerfile/Dockerfile.jvm +++ b/containers/dockerfile/Dockerfile.jvm @@ -15,7 +15,7 @@ ENV GOSU_VERSION=1.10 RUN dnf -y update && \ dnf -y install dnf-plugins-core && \ dnf config-manager --set-enabled powertools && \ - dnf install -y tar unzip make bzip2 wget xz git which ninja-build java-1.8.0-openjdk-devel \ + dnf install -y tar unzip make bzip2 wget xz git which ninja-build java-17-openjdk-devel \ gcc-toolset-10-gcc gcc-toolset-10-binutils gcc-toolset-10-gcc-c++ \ gcc-toolset-10-runtime gcc-toolset-10-libstdc++-devel && \ # Miniforge diff --git a/containers/dockerfile/Dockerfile.jvm_gpu_build b/containers/dockerfile/Dockerfile.jvm_gpu_build index ec94d08..02171f8 100644 --- a/containers/dockerfile/Dockerfile.jvm_gpu_build +++ b/containers/dockerfile/Dockerfile.jvm_gpu_build @@ -21,7 +21,7 @@ RUN \ dnf -y update && \ dnf -y install dnf-plugins-core && \ dnf config-manager --set-enabled powertools && \ - dnf install -y tar unzip wget xz git which ninja-build java-1.8.0-openjdk-devel gcc-toolset-10-gcc gcc-toolset-10-binutils gcc-toolset-10-gcc-c++ && \ + dnf install -y tar unzip wget xz git which ninja-build java-17-openjdk-devel gcc-toolset-10-gcc gcc-toolset-10-binutils gcc-toolset-10-gcc-c++ && \ # Miniforge wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/$MINIFORGE_VERSION/Miniforge3-$MINIFORGE_VERSION-Linux-x86_64.sh && \ bash conda.sh -b -p /opt/miniforge && \ From 6e0f5c11aeb8d44dfd47107fa1aae5c8dc4be827 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 12 Mar 2026 06:05:16 +0800 Subject: [PATCH 05/10] grpcio. --- containers/conda_env/aarch64_test.yml | 1 + containers/conda_env/linux_cpu_test.yml | 1 + containers/dockerfile/Dockerfile.gpu | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/containers/conda_env/aarch64_test.yml b/containers/conda_env/aarch64_test.yml index bbf649b..4a3b0d8 100644 --- a/containers/conda_env/aarch64_test.yml +++ b/containers/conda_env/aarch64_test.yml @@ -28,6 +28,7 @@ dependencies: - loky>=3.5.1 - pyarrow - pyspark>=4.0 +- grpcio # used by spark connect - cloudpickle - pip: - awscli diff --git a/containers/conda_env/linux_cpu_test.yml b/containers/conda_env/linux_cpu_test.yml index 64e23d7..eb8d41d 100644 --- a/containers/conda_env/linux_cpu_test.yml +++ b/containers/conda_env/linux_cpu_test.yml @@ -36,5 +36,6 @@ dependencies: - cloudpickle - modin - pyspark>=4.0 +- grpcio # used by spark connect - pip: - py-ubjson diff --git a/containers/dockerfile/Dockerfile.gpu b/containers/dockerfile/Dockerfile.gpu index 126458a..a646803 100644 --- a/containers/dockerfile/Dockerfile.gpu +++ b/containers/dockerfile/Dockerfile.gpu @@ -44,7 +44,7 @@ RUN \ "dask-cuda=$RAPIDS_VERSION.*" "dask-cudf=$RAPIDS_VERSION.*" cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel \ python-kubernetes urllib3 graphviz hypothesis "loky>=3.5.1" \ - "pyspark>=4.0" cloudpickle cuda-python && \ + "pyspark>=4.0" grpcio cloudpickle cuda-python && \ mamba clean --all --yes # Install lightweight sudo (not bound to TTY) From 97e2feb981ef0feacc54b9c7882eb2a3171f085f Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sun, 15 Mar 2026 23:24:18 +0800 Subject: [PATCH 06/10] grpcio-status. --- containers/conda_env/aarch64_test.yml | 1 + containers/conda_env/linux_cpu_test.yml | 1 + containers/dockerfile/Dockerfile.gpu | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/containers/conda_env/aarch64_test.yml b/containers/conda_env/aarch64_test.yml index 4a3b0d8..cffc940 100644 --- a/containers/conda_env/aarch64_test.yml +++ b/containers/conda_env/aarch64_test.yml @@ -29,6 +29,7 @@ dependencies: - pyarrow - pyspark>=4.0 - grpcio # used by spark connect +- grpcio-status - cloudpickle - pip: - awscli diff --git a/containers/conda_env/linux_cpu_test.yml b/containers/conda_env/linux_cpu_test.yml index eb8d41d..78ff781 100644 --- a/containers/conda_env/linux_cpu_test.yml +++ b/containers/conda_env/linux_cpu_test.yml @@ -37,5 +37,6 @@ dependencies: - modin - pyspark>=4.0 - grpcio # used by spark connect +- grpcio-status - pip: - py-ubjson diff --git a/containers/dockerfile/Dockerfile.gpu b/containers/dockerfile/Dockerfile.gpu index a646803..05605b9 100644 --- a/containers/dockerfile/Dockerfile.gpu +++ b/containers/dockerfile/Dockerfile.gpu @@ -44,7 +44,7 @@ RUN \ "dask-cuda=$RAPIDS_VERSION.*" "dask-cudf=$RAPIDS_VERSION.*" cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel \ python-kubernetes urllib3 graphviz hypothesis "loky>=3.5.1" \ - "pyspark>=4.0" grpcio cloudpickle cuda-python && \ + "pyspark>=4.0" grpcio grpcio-status cloudpickle cuda-python && \ mamba clean --all --yes # Install lightweight sudo (not bound to TTY) From a5e2baea0c0eff39a9562d29713be277417c0a0a Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 16 Mar 2026 00:17:53 +0800 Subject: [PATCH 07/10] maven. --- containers/dockerfile/Dockerfile.jvm_gpu_build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/dockerfile/Dockerfile.jvm_gpu_build b/containers/dockerfile/Dockerfile.jvm_gpu_build index 02171f8..c824419 100644 --- a/containers/dockerfile/Dockerfile.jvm_gpu_build +++ b/containers/dockerfile/Dockerfile.jvm_gpu_build @@ -4,7 +4,7 @@ ARG CUDA_VERSION ARG NCCL_VERSION ARG MINIFORGE_VERSION=24.9.2-0 ARG CMAKE_VERSION=3.31.2 -ARG MAVEN_VERSION=3.9.13 +ARG MAVEN_VERSION=3.9.14 SHELL ["/bin/bash", "-c"] From 04f101bae14e78521265dbee679a54d2dd2494e9 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 16 Mar 2026 00:23:52 +0800 Subject: [PATCH 08/10] maven. --- containers/dockerfile/Dockerfile.jvm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/containers/dockerfile/Dockerfile.jvm b/containers/dockerfile/Dockerfile.jvm index a7a6afd..c243712 100644 --- a/containers/dockerfile/Dockerfile.jvm +++ b/containers/dockerfile/Dockerfile.jvm @@ -1,7 +1,7 @@ FROM rockylinux:8 ARG MINIFORGE_VERSION=24.9.2-0 ARG CMAKE_VERSION=3.31.2 -ARG MAVEN_VERSION=3.9.13 +ARG MAVEN_VERSION=3.9.14 SHELL ["/bin/bash", "-c"] From 757c926d6288f7f49a8547b11f4dfe085a891e9e Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 16 Mar 2026 00:26:16 +0800 Subject: [PATCH 09/10] try upgrade. --- containers/dockerfile/Dockerfile.i386 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/containers/dockerfile/Dockerfile.i386 b/containers/dockerfile/Dockerfile.i386 index 5ecae14..5643e17 100644 --- a/containers/dockerfile/Dockerfile.i386 +++ b/containers/dockerfile/Dockerfile.i386 @@ -5,8 +5,8 @@ SHELL ["/bin/bash", "-c"] ENV DEBIAN_FRONTEND=noninteractive ENV GOSU_VERSION=1.10 -RUN apt-get update && \ - apt-get install -y tar unzip wget git build-essential ninja-build cmake curl +RUN apt-get update && apt-get upgrade -y && \ + apt-get install -y tar unzip wget git build-essential ninja-build cmake curl ca-certificates # Install lightweight sudo (not bound to TTY) RUN set -ex; \ From 926e5d26c81d50d2046b77d24cde24972995a862 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 17 Mar 2026 19:54:42 +0800 Subject: [PATCH 10/10] All missing deps. --- containers/conda_env/aarch64_test.yml | 5 ++++- containers/conda_env/linux_cpu_test.yml | 5 ++++- containers/dockerfile/Dockerfile.gpu | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/containers/conda_env/aarch64_test.yml b/containers/conda_env/aarch64_test.yml index cffc940..8944de0 100644 --- a/containers/conda_env/aarch64_test.yml +++ b/containers/conda_env/aarch64_test.yml @@ -27,9 +27,12 @@ dependencies: - llvmlite - loky>=3.5.1 - pyarrow +# PySpark - pyspark>=4.0 -- grpcio # used by spark connect +- grpcio - grpcio-status +- googleapis-common-protos +- zstandard - cloudpickle - pip: - awscli diff --git a/containers/conda_env/linux_cpu_test.yml b/containers/conda_env/linux_cpu_test.yml index 78ff781..c5e8023 100644 --- a/containers/conda_env/linux_cpu_test.yml +++ b/containers/conda_env/linux_cpu_test.yml @@ -35,8 +35,11 @@ dependencies: - protobuf - cloudpickle - modin +# PySpark - pyspark>=4.0 -- grpcio # used by spark connect +- grpcio - grpcio-status +- googleapis-common-protos +- zstandard - pip: - py-ubjson diff --git a/containers/dockerfile/Dockerfile.gpu b/containers/dockerfile/Dockerfile.gpu index 05605b9..0fb06e6 100644 --- a/containers/dockerfile/Dockerfile.gpu +++ b/containers/dockerfile/Dockerfile.gpu @@ -43,8 +43,8 @@ RUN \ distributed \ "dask-cuda=$RAPIDS_VERSION.*" "dask-cudf=$RAPIDS_VERSION.*" cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel \ - python-kubernetes urllib3 graphviz hypothesis "loky>=3.5.1" \ - "pyspark>=4.0" grpcio grpcio-status cloudpickle cuda-python && \ + python-kubernetes urllib3 graphviz hypothesis "loky>=3.5.1" cuda-python \ + "pyspark>=4.0" grpcio grpcio-status googleapis-common-protos zstandard cloudpickle && \ mamba clean --all --yes # Install lightweight sudo (not bound to TTY)