File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -7,8 +7,8 @@ x-rapids_versions:
77 stable : &rapids_version "25.12"
88
99x-cuda_versions :
10- cuda : &cuda_version "12.8 .0"
11- nccl : &nccl_version "2.27.7 -1"
10+ cuda : &cuda_version "12.9 .0"
11+ nccl : &nccl_version "2.29.2 -1"
1212
1313xgb-ci.gpu_build_rockylinux8 :
1414 container_def : gpu_build_rockylinux8
Original file line number Diff line number Diff line change @@ -18,13 +18,14 @@ ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
1818# Install all basic requirements
1919RUN \
2020 { [ $ARCH = "aarch64" ] && export CUDA_REPO_ARCH="sbsa" || export CUDA_REPO_ARCH="x86_64"; } && \
21+ export CUDA_SHORT=`echo $CUDA_VERSION | grep -o -E '[0-9]+\.[0-9]'` && \
2122 export NCCL_VERSION=$NCCL_VERSION && \
2223 sed -i 's/ports.ubuntu.com/mirrors.ocf.berkeley.edu/g' /etc/apt/sources.list && \
2324 apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${CUDA_REPO_ARCH}/3bf863cc.pub && \
2425 apt-get update && \
2526 apt-get install -y wget unzip bzip2 libgomp1 build-essential openjdk-8-jdk-headless && \
26- apt-get install "libnccl2=${NCCL_VERSION}+cuda12.9 " \
27- "libnccl-dev=${NCCL_VERSION}+cuda12.9 " -y --allow-change-held-packages && \
27+ apt-get install "libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} " \
28+ "libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} " -y --allow-change-held-packages && \
2829 # Miniforge
2930 wget -nv -O conda.sh https://github.com/conda-forge/miniforge/releases/download/$MINIFORGE_VERSION/Miniforge3-$MINIFORGE_VERSION-Linux-${ARCH}.sh && \
3031 bash conda.sh -b -p /opt/miniforge
Original file line number Diff line number Diff line change @@ -35,10 +35,11 @@ RUN \
3535# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
3636RUN \
3737 { [ $ARCH = "aarch64" ] && export CUDA_REPO_ARCH="sbsa" || export CUDA_REPO_ARCH="x86_64"; } && \
38+ export CUDA_SHORT=`echo $CUDA_VERSION | grep -o -E '[0-9]+\.[0-9]'` && \
3839 export NCCL_VERSION=$NCCL_VERSION && \
3940 dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/${CUDA_REPO_ARCH}/cuda-rhel8.repo && \
4041 dnf -y update && \
41- dnf install -y libnccl-${NCCL_VERSION}+cuda13.0 libnccl-devel-${NCCL_VERSION}+cuda13.0 libnccl-static-${NCCL_VERSION}+cuda13.0
42+ dnf install -y libnccl-${NCCL_VERSION}+cuda13.1 libnccl-devel-${NCCL_VERSION}+cuda13.1 libnccl-static-${NCCL_VERSION}+cuda13.1
4243
4344# Install lightweight sudo (not bound to TTY)
4445RUN set -ex; \
Original file line number Diff line number Diff line change @@ -36,10 +36,11 @@ RUN \
3636# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
3737RUN \
3838 { [ $ARCH = "aarch64" ] && export CUDA_REPO_ARCH="sbsa" || export CUDA_REPO_ARCH="x86_64"; } && \
39+ export CUDA_SHORT=`echo $CUDA_VERSION | grep -o -E '[0-9]+\.[0-9]'` && \
3940 export NCCL_VERSION=$NCCL_VERSION && \
4041 dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/${CUDA_REPO_ARCH}/cuda-rhel8.repo && \
4142 dnf -y update && \
42- dnf install -y libnccl-${NCCL_VERSION}+cuda12.9 libnccl-devel-${NCCL_VERSION}+cuda12.9
43+ dnf install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT}
4344
4445# Install gRPC
4546# Patch Abseil to apply https://github.com/abseil/abseil-cpp/issues/1629
Original file line number Diff line number Diff line change @@ -35,10 +35,11 @@ RUN \
3535
3636# NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html)
3737RUN \
38+ export CUDA_SHORT=`echo $CUDA_VERSION | grep -o -E '[0-9]+\.[0-9]'` && \
3839 export NCCL_VERSION=$NCCL_VERSION && \
3940 dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && \
4041 dnf -y update && \
41- dnf install -y libnccl-${NCCL_VERSION}+cuda12.9 libnccl-devel-${NCCL_VERSION}+cuda12.9 libnccl-static-${NCCL_VERSION}+cuda12.9
42+ dnf install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT}
4243
4344# Install Python packages
4445RUN pip install numpy pytest scipy scikit-learn wheel kubernetes awscli
You can’t perform that action at this time.
0 commit comments