1- # syntax = edrevo/dockerfile-plus
1+ FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
22
3- INCLUDE+ base/Dockerfile
4-
5- ENV PREFIX=/usr/local
3+ ENV NCCL_HOME=/usr/local
64ENV CUDA_PATH=/usr/local/cuda
75ENV LIBFABRIC_PATH=/opt/amazon/efa
86ENV OPEN_MPI_PATH=/opt/amazon/openmpi
9- ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${PATH}"
7+ ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${HOME}/nccl-tests/build:${ PATH}"
108ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}"
119
12- # prerequisites
10+ # Prerequisites
1311
14- RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }' ) \
15- && apt-get update \
12+ RUN apt-get update \
1613 && apt-get install -y --no-install-recommends \
17- cuda-libraries-dev-${cuda_version} \
18- cuda-nvcc-${cuda_version} \
1914 libhwloc-dev \
2015 autoconf \
2116 automake \
@@ -38,7 +33,7 @@ ARG NCCL_VERSION=2.26.2-1
3833RUN cd $HOME \
3934 && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
4035 && cd nccl \
41- && make -j$(nproc) src.build BUILDDIR=${PREFIX }
36+ && make -j$(nproc) src.build BUILDDIR=${NCCL_HOME }
4237
4338# AWS OFI NCCL
4439
@@ -53,9 +48,9 @@ RUN cd $HOME \
5348 --with-libfabric=${LIBFABRIC_PATH} \
5449 --with-mpi=${OPEN_MPI_PATH} \
5550 --with-cuda=${CUDA_PATH} \
56- --with-nccl=${PREFIX } \
51+ --with-nccl=${NCCL_HOME } \
5752 --disable-tests \
58- --prefix=${PREFIX } \
53+ --prefix=${NCCL_HOME } \
5954 && make -j$(numproc) \
6055 && make install
6156
@@ -68,12 +63,4 @@ RUN cd $HOME \
6863 MPI=1 \
6964 MPI_HOME=${OPEN_MPI_PATH} \
7065 CUDA_HOME=${CUDA_PATH} \
71- NCCL_HOME=${PREFIX}
72-
73- ARG BUILD_DATE
74- ARG IMAGE_NAME
75- ARG DSTACK_REVISION
76-
77- LABEL org.opencontainers.image.title="${IMAGE_NAME}"
78- LABEL org.opencontainers.image.version="${EFA_VERSION}-${DSTACK_REVISION}"
79- LABEL org.opencontainers.image.created="${BUILD_DATE}"
66+ NCCL_HOME=${NCCL_HOME}
0 commit comments