File tree Expand file tree Collapse file tree 4 files changed +31
-20
lines changed
Expand file tree Collapse file tree 4 files changed +31
-20
lines changed Original file line number Diff line number Diff line change @@ -69,15 +69,12 @@ jobs:
6969 run : |
7070 if [ "${{ matrix.flavor }}" = "base" ]; then
7171 TAG_SUFFIX=""
72- FLAVOR="base"
7372 FILE="base/Dockerfile"
7473 elif [ "${{ matrix.flavor }}" = "efa" ]; then
7574 TAG_SUFFIX="-efa"
76- FLAVOR="devel"
7775 FILE="base/efa/Dockerfile"
7876 else
7977 TAG_SUFFIX="-devel"
80- FLAVOR="devel"
8178 FILE="base/devel/Dockerfile"
8279 fi
8380 docker buildx build \
Original file line number Diff line number Diff line change 1- ARG FLAVOR
2- FROM nvidia/cuda:12.1.1-${FLAVOR}-ubuntu20.04
1+ FROM nvidia/cuda:12.1.1-base-ubuntu20.04
32
43ARG PYTHON
54ARG _UV_HOME="/opt/uv"
Original file line number Diff line number Diff line change @@ -9,20 +9,30 @@ ARG NCCL_VERSION=2.26.2-1
99ENV NCCL_HOME=/usr/local
1010ENV CUDA_PATH=/usr/local/cuda
1111ENV OPEN_MPI_PATH=/usr/lib/x86_64-linux-gnu/openmpi
12+ ENV NCCL_TESTS_PATH=/opt/nccl-tests
13+ ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${NCCL_TESTS_PATH}:${PATH}"
1214ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${NCCL_HOME}/lib:${LD_LIBRARY_PATH}"
13- ENV PATH="${OPEN_MPI_PATH}/bin:${HOME}/nccl-tests/build:${PATH}"
1415
15- RUN apt-get install -y --no-install-recommends \
16- libopenmpi-dev \
17- && cd $HOME \
16+ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }' ) \
17+ && apt-get install -y --no-install-recommends \
18+ cuda-libraries-dev-${cuda_version} \
19+ cuda-nvcc-${cuda_version} \
20+ libhwloc-dev \
21+ autoconf \
22+ automake \
23+ libtool \
24+ libopenmpi-dev
25+
26+ RUN cd $HOME \
1827 && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
1928 && cd nccl \
2029 && make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} \
21- && cd $HOME \
22- && git clone https://github.com/NVIDIA/nccl-tests \
23- && cd nccl-tests \
30+ && git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_PATH} \
31+ && cd ${NCCL_TESTS_PATH} \
2432 && make -j$(nproc) \
2533 MPI=1 \
2634 MPI_HOME=${OPEN_MPI_PATH} \
2735 CUDA_HOME=${CUDA_PATH} \
28- NCCL_HOME=${NCCL_HOME}
36+ NCCL_HOME=${NCCL_HOME} \
37+ && echo "${NCCL_HOME}/lib" >> /etc/ld.so.conf.d/nccl.conf \
38+ && ldconfig
Original file line number Diff line number Diff line change @@ -6,13 +6,16 @@ ENV NCCL_HOME=/usr/local
66ENV CUDA_PATH=/usr/local/cuda
77ENV LIBFABRIC_PATH=/opt/amazon/efa
88ENV OPEN_MPI_PATH=/opt/amazon/openmpi
9- ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${HOME}/nccl-tests/build:${PATH}"
10- ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}"
9+ ENV NCCL_TESTS_PATH=/opt/nccl-tests
10+ ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${NCCL_TESTS_PATH}:${PATH}"
11+ ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${NCCL_HOME}/lib:${LD_LIBRARY_PATH}"
1112
1213# Prerequisites
1314
14- RUN apt-get update \
15+ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }' ) \
1516 && apt-get install -y --no-install-recommends \
17+ cuda-libraries-dev-${cuda_version} \
18+ cuda-nvcc-${cuda_version} \
1619 libhwloc-dev \
1720 autoconf \
1821 automake \
@@ -58,11 +61,13 @@ RUN cd $HOME \
5861
5962# NCCL Tests
6063
61- RUN cd $HOME \
62- && git clone https://github.com/NVIDIA/nccl-tests \
63- && cd nccl-tests \
64+ RUN git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_PATH} \
65+ && cd ${NCCL_TESTS_PATH} \
6466 && make -j$(numproc) \
6567 MPI=1 \
6668 MPI_HOME=${OPEN_MPI_PATH} \
6769 CUDA_HOME=${CUDA_PATH} \
68- NCCL_HOME=${NCCL_HOME}
70+ NCCL_HOME=${NCCL_HOME} \
71+ && echo "${NCCL_HOME}/lib" >> /etc/ld.so.conf.d/nccl.conf \
72+ && ldconfig
73+
You can’t perform that action at this time.
0 commit comments