@@ -10,7 +10,10 @@ ENV NCCL_TESTS_HOME=/opt/nccl-tests
1010ENV PATH="${LIBFABRIC_PATH}/bin:${MPI_HOME}/bin:${NCCL_TESTS_HOME}/build:${PATH}"
1111ENV LD_LIBRARY_PATH="${MPI_HOME}/lib:${NCCL_HOME}/lib:${LD_LIBRARY_PATH}"
1212
13- # Prerequisites
13+ ARG EFA_VERSION=1.38.1
14+ ARG NCCL_VERSION=2.26.2-1
15+ ARG OFI_VERSION=1.14.0
16+ ARG FLAVOR
1417
1518RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }' ) \
1619 && apt-get install -y --no-install-recommends \
@@ -19,32 +22,17 @@ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
1922 libhwloc-dev \
2023 autoconf \
2124 automake \
22- libtool
23-
24- # EFA
25-
26- ARG EFA_VERSION=1.38.1
27-
28- RUN cd $HOME \
25+ libtool \
26+ && cd $HOME \
2927 && curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
3028 && tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
3129 && cd aws-efa-installer \
32- && ./efa_installer.sh -y --skip-kmod -g
33-
34- # NCCL
35-
36- ARG NCCL_VERSION=2.26.2-1
37-
38- RUN cd $HOME \
30+ && ./efa_installer.sh -y --skip-kmod -g \
31+ && cd $HOME \
3932 && git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
4033 && cd nccl \
41- && make -j$(nproc) src.build BUILDDIR=${NCCL_HOME}
42-
43- # AWS OFI NCCL
44-
45- ARG OFI_VERSION=1.14.0
46-
47- RUN cd $HOME \
34+ && make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} \
35+ && cd $HOME \
4836 && git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \
4937 && cd aws-ofi-nccl \
5038 && ./autogen.sh \
@@ -56,33 +44,24 @@ RUN cd $HOME \
5644 --disable-tests \
5745 --prefix=${NCCL_HOME} \
5846 && make -j$(numproc) \
59- && make install
60-
61- # NCCL Tests
62-
63- RUN git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_HOME} \
47+ && make install \
48+ && git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_HOME} \
6449 && cd ${NCCL_TESTS_HOME} \
6550 && make -j$(numproc) \
6651 MPI=1 \
6752 MPI_HOME=${MPI_HOME} \
6853 CUDA_HOME=${CUDA_HOME} \
6954 NCCL_HOME=${NCCL_HOME} \
7055 && echo "${NCCL_HOME}/lib" >> /etc/ld.so.conf.d/nccl.conf \
71- && ldconfig
72-
73- ARG FLAVOR
74- ENV FLAVOR=${FLAVOR}
75-
76- # If FLAVOR is base, uninstall development packages to reduce image size
77- RUN if [ "$FLAVOR" = "base" ]; then \
78- cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }' ) \
79- && apt-get remove -y \
80- cuda-nvcc-${cuda_version} \
81- libhwloc-dev \
82- autoconf \
83- automake \
84- libtool \
85- && apt-get autoremove -y \
86- && apt-get clean \
87- && rm -rf /var/lib/apt/lists/*; \
88- fi
56+ && ldconfig \
57+ && if [ "$FLAVOR" = "base" ]; then \
58+ apt-get remove -y \
59+ cuda-nvcc-${cuda_version} \
60+ libhwloc-dev \
61+ autoconf \
62+ automake \
63+ libtool \
64+ && apt-get autoremove -y \
65+ && apt-get clean \
66+ && rm -rf /var/lib/apt/lists/*; \
67+ fi
0 commit comments