22
33INCLUDE+ base/Dockerfile.common
44
5+ ENV NCCL_HOME=/opt/nccl
56ENV PREFIX=/usr/local
6- ENV CUDA_PATH =/usr/local/cuda
7+ ENV CUDA_HOME =/usr/local/cuda
78ENV LIBFABRIC_PATH=/opt/amazon/efa
89ENV OPEN_MPI_PATH=/opt/amazon/openmpi
910ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${PATH}"
1011ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}"
1112
12- # prerequisites
13+ # Prerequisites
1314
1415RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }' ) \
1516 && apt-get update \
@@ -25,47 +26,48 @@ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
2526
2627ARG EFA_VERSION=1.38.1
2728
28- RUN cd $HOME \
29+ RUN cd /tmp \
2930 && curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
3031 && tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
3132 && cd aws-efa-installer \
32- && ./efa_installer.sh -y --skip-kmod -g
33+ && ./efa_installer.sh -y --skip-kmod -g \
34+ && rm -rf /tmp/aws-efa-installer /var/lib/apt/lists/*
3335
3436# NCCL
3537
3638ARG NCCL_VERSION=2.26.2-1
3739
38- RUN cd $HOME \
40+ RUN cd /tmp \
3941 && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
4042 && cd nccl \
41- && make -j$(nproc) src.build BUILDDIR=${PREFIX}
43+ && make -j$(nproc) src.build BUILDDIR=${PREFIX} \
44+ && rm -rf /tmp/nccl
4245
4346# AWS OFI NCCL
4447
4548ARG OFI_VERSION=1.14.0
4649
47- RUN cd $HOME \
50+ RUN cd /tmp \
4851 && git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \
4952 && cd aws-ofi-nccl \
5053 && ./autogen.sh \
5154 && ./configure \
52- --with-cuda=${CUDA_PATH } \
55+ --with-cuda=${CUDA_HOME } \
5356 --with-libfabric=${LIBFABRIC_PATH} \
5457 --with-mpi=${OPEN_MPI_PATH} \
55- --with-cuda=${CUDA_PATH} \
56- --with-nccl=${PREFIX} \
5758 --disable-tests \
5859 --prefix=${PREFIX} \
59- && make -j$(numproc) \
60- && make install
60+ && make -j$(nproc) \
61+ && make install \
62+ && rm -rf /tmp/aws-ofi-nccl
6163
6264# NCCL Tests
6365
64- RUN cd $HOME \
66+ RUN cd $NCCL_HOME \
6567 && git clone https://github.com/NVIDIA/nccl-tests \
6668 && cd nccl-tests \
67- && make -j$(numproc ) \
69+ && make -j$(nproc ) \
6870 MPI=1 \
6971 MPI_HOME=${OPEN_MPI_PATH} \
70- CUDA_HOME=${CUDA_PATH } \
72+ CUDA_HOME=${CUDA_HOME } \
7173 NCCL_HOME=${PREFIX}
0 commit comments