22
33INCLUDE+ base/Dockerfile.common
44
5- ENV PREFIX =/usr/local
6- ENV CUDA_PATH =/usr/local/cuda
5+ ENV NCCL_HOME =/usr/local
6+ ENV CUDA_HOME =/usr/local/cuda
77ENV LIBFABRIC_PATH=/opt/amazon/efa
88ENV OPEN_MPI_PATH=/opt/amazon/openmpi
99ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${PATH}"
1010ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}"
1111
12- # prerequisites
12+ # Prerequisites
1313
1414RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }' ) \
1515 && apt-get update \
@@ -19,53 +19,58 @@ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
1919 libhwloc-dev \
2020 autoconf \
2121 automake \
22- libtool
22+ libtool \
23+ && rm -rf /var/lib/apt/lists/*
2324
2425# EFA
2526
2627ARG EFA_VERSION=1.38.1
2728
28- RUN cd $HOME \
29+ RUN cd /tmp \
30+ && apt-get update \
2931 && curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
3032 && tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
3133 && cd aws-efa-installer \
32- && ./efa_installer.sh -y --skip-kmod -g
34+ && ./efa_installer.sh -y --skip-kmod -g \
35+ && rm -rf /tmp/aws-efa-installer /var/lib/apt/lists/*
3336
3437# NCCL
3538
3639ARG NCCL_VERSION=2.26.2-1
3740
38- RUN cd $HOME \
41+ RUN cd /tmp \
3942 && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
4043 && cd nccl \
41- && make -j$(nproc) src.build BUILDDIR=${PREFIX}
44+ && make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} \
45+ && rm -rf /tmp/nccl
4246
4347# AWS OFI NCCL
4448
4549ARG OFI_VERSION=1.14.0
4650
47- RUN cd $HOME \
51+ RUN cd /tmp \
4852 && git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \
4953 && cd aws-ofi-nccl \
5054 && ./autogen.sh \
5155 && ./configure \
52- --with-cuda=${CUDA_PATH } \
56+ --with-cuda=${CUDA_HOME } \
5357 --with-libfabric=${LIBFABRIC_PATH} \
5458 --with-mpi=${OPEN_MPI_PATH} \
55- --with-cuda=${CUDA_PATH } \
56- --with-nccl=${PREFIX } \
59+ --with-cuda=${CUDA_HOME } \
60+ --with-nccl=${NCCL_HOME } \
5761 --disable-tests \
58- --prefix=${PREFIX} \
59- && make -j$(numproc) \
60- && make install
62+ --prefix=${NCCL_HOME} \
63+ && make -j$(nproc) \
64+ && make install \
65+ && rm -rf /tmp/aws-ofi-nccl /var/lib/apt/lists/*
6166
6267# NCCL Tests
6368
64- RUN cd $HOME \
69+ RUN cd /opt \
6570 && git clone https://github.com/NVIDIA/nccl-tests \
6671 && cd nccl-tests \
67- && make -j$(numproc ) \
72+ && make -j$(nproc ) \
6873 MPI=1 \
6974 MPI_HOME=${OPEN_MPI_PATH} \
70- CUDA_HOME=${CUDA_PATH } \
71- NCCL_HOME=${PREFIX }
75+ CUDA_HOME=${CUDA_HOME } \
76+ NCCL_HOME=${NCCL_HOME }
0 commit comments