Skip to content

Commit 0172ab7

Browse files
[UX] Pre-build a EFA version of the default Docker image #2793
1 parent eac604a commit 0172ab7

File tree

2 files changed

+39
-67
lines changed

2 files changed

+39
-67
lines changed

docker/base/efa/Dockerfile

Lines changed: 24 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ ENV NCCL_TESTS_HOME=/opt/nccl-tests
1010
ENV PATH="${LIBFABRIC_PATH}/bin:${MPI_HOME}/bin:${NCCL_TESTS_HOME}/build:${PATH}"
1111
ENV LD_LIBRARY_PATH="${MPI_HOME}/lib:${NCCL_HOME}/lib:${LD_LIBRARY_PATH}"
1212

13-
# Prerequisites
13+
ARG EFA_VERSION=1.38.1
14+
ARG NCCL_VERSION=2.26.2-1
15+
ARG OFI_VERSION=1.14.0
16+
ARG FLAVOR
1417

1518
RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
1619
&& apt-get install -y --no-install-recommends \
@@ -19,32 +22,17 @@ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
1922
libhwloc-dev \
2023
autoconf \
2124
automake \
22-
libtool
23-
24-
# EFA
25-
26-
ARG EFA_VERSION=1.38.1
27-
28-
RUN cd $HOME \
25+
libtool \
26+
&& cd $HOME \
2927
&& curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
3028
&& tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
3129
&& cd aws-efa-installer \
32-
&& ./efa_installer.sh -y --skip-kmod -g
33-
34-
# NCCL
35-
36-
ARG NCCL_VERSION=2.26.2-1
37-
38-
RUN cd $HOME \
30+
&& ./efa_installer.sh -y --skip-kmod -g \
31+
&& cd $HOME \
3932
&& git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
4033
&& cd nccl \
41-
&& make -j$(nproc) src.build BUILDDIR=${NCCL_HOME}
42-
43-
# AWS OFI NCCL
44-
45-
ARG OFI_VERSION=1.14.0
46-
47-
RUN cd $HOME \
34+
&& make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} \
35+
&& cd $HOME \
4836
&& git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \
4937
&& cd aws-ofi-nccl \
5038
&& ./autogen.sh \
@@ -56,33 +44,24 @@ RUN cd $HOME \
5644
--disable-tests \
5745
--prefix=${NCCL_HOME} \
5846
&& make -j$(numproc) \
59-
&& make install
60-
61-
# NCCL Tests
62-
63-
RUN git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_HOME} \
47+
&& make install \
48+
&& git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_HOME} \
6449
&& cd ${NCCL_TESTS_HOME} \
6550
&& make -j$(numproc) \
6651
MPI=1 \
6752
MPI_HOME=${MPI_HOME} \
6853
CUDA_HOME=${CUDA_HOME} \
6954
NCCL_HOME=${NCCL_HOME} \
7055
&& echo "${NCCL_HOME}/lib" >> /etc/ld.so.conf.d/nccl.conf \
71-
&& ldconfig
72-
73-
ARG FLAVOR
74-
ENV FLAVOR=${FLAVOR}
75-
76-
# If FLAVOR is base, uninstall development packages to reduce image size
77-
RUN if [ "$FLAVOR" = "base" ]; then \
78-
cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
79-
&& apt-get remove -y \
80-
cuda-nvcc-${cuda_version} \
81-
libhwloc-dev \
82-
autoconf \
83-
automake \
84-
libtool \
85-
&& apt-get autoremove -y \
86-
&& apt-get clean \
87-
&& rm -rf /var/lib/apt/lists/*; \
88-
fi
56+
&& ldconfig \
57+
&& if [ "$FLAVOR" = "base" ]; then \
58+
apt-get remove -y \
59+
cuda-nvcc-${cuda_version} \
60+
libhwloc-dev \
61+
autoconf \
62+
automake \
63+
libtool \
64+
&& apt-get autoremove -y \
65+
&& apt-get clean \
66+
&& rm -rf /var/lib/apt/lists/*; \
67+
fi

docker/base/nvidia/Dockerfile

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ INCLUDE+ base/Dockerfile.common
55
# NCCL & NCCL tests
66

77
ARG NCCL_VERSION=2.26.2-1
8+
ARG FLAVOR
89

910
ENV NCCL_HOME=/usr/local
1011
ENV CUDA_HOME=/usr/local/cuda
@@ -21,9 +22,8 @@ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
2122
autoconf \
2223
automake \
2324
libtool \
24-
libopenmpi-dev
25-
26-
RUN cd $HOME \
25+
libopenmpi-dev \
26+
&& cd $HOME \
2727
&& git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
2828
&& cd nccl \
2929
&& make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} \
@@ -35,22 +35,15 @@ RUN cd $HOME \
3535
CUDA_HOME=${CUDA_HOME} \
3636
NCCL_HOME=${NCCL_HOME} \
3737
&& echo "${NCCL_HOME}/lib" >> /etc/ld.so.conf.d/nccl.conf \
38-
&& ldconfig
39-
40-
ARG FLAVOR
41-
ENV FLAVOR=${FLAVOR}
42-
43-
# If FLAVOR is base, uninstall development packages to reduce image size
44-
RUN if [ "$FLAVOR" = "base" ]; then \
45-
cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
46-
&& apt-get remove -y \
47-
cuda-nvcc-${cuda_version} \
48-
libhwloc-dev \
49-
autoconf \
50-
automake \
51-
libtool \
52-
libopenmpi-dev \
53-
&& apt-get autoremove -y \
54-
&& apt-get clean \
55-
&& rm -rf /var/lib/apt/lists/*; \
56-
fi
38+
&& ldconfig \
39+
&& if [ "$FLAVOR" = "base" ]; then \
40+
apt-get remove -y \
41+
cuda-nvcc-${cuda_version} \
42+
libhwloc-dev \
43+
autoconf \
44+
automake \
45+
libtool \
46+
&& apt-get autoremove -y \
47+
&& apt-get clean \
48+
&& rm -rf /var/lib/apt/lists/*; \
49+
fi

0 commit comments

Comments
 (0)