Skip to content

Commit 9903d01

Browse files
Revert "[UX] Pre-build a EFA version of the default Docker image #2793"
1 parent 2946a20 commit 9903d01

File tree

2 files changed

+42
-33
lines changed

2 files changed

+42
-33
lines changed

docker/base/Dockerfile

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,12 @@
33
# Build stage
44
FROM nvidia/cuda:12.1.1-base-ubuntu20.04 AS builder
55

6-
ARG NCCL_VERSION=2.26.2-1
7-
86
ENV NCCL_HOME=/opt/nccl
97
ENV CUDA_HOME=/usr/local/cuda
108
ENV OPEN_MPI_PATH=/usr/lib/x86_64-linux-gnu/openmpi
11-
ENV NCCL_TESTS_HOME=/opt/nccl-tests
129

13-
# Install build dependencies
10+
# Prerequisites
11+
1412
RUN export DEBIAN_FRONTEND=noninteractive \
1513
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub \
1614
&& apt-get update --fix-missing \
@@ -32,36 +30,42 @@ RUN export DEBIAN_FRONTEND=noninteractive \
3230
python3 \
3331
build-essential
3432

35-
# Build NCCL
33+
# NCCL
34+
35+
ARG NCCL_VERSION=2.26.2-1
36+
3637
RUN cd /tmp \
3738
&& git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
3839
&& cd nccl \
3940
&& make -j$(nproc) src.build BUILDDIR=${NCCL_HOME}
4041

41-
# Build NCCL tests
42-
RUN git clone https://github.com/NVIDIA/nccl-tests ${NCCL_TESTS_HOME} \
43-
&& cd ${NCCL_TESTS_HOME} \
42+
# NCCL tests
43+
44+
RUN cd /opt \
45+
&& git clone https://github.com/NVIDIA/nccl-tests \
46+
&& cd nccl-tests \
4447
&& make -j$(nproc) \
4548
MPI=1 \
4649
MPI_HOME=${OPEN_MPI_PATH} \
4750
CUDA_HOME=${CUDA_HOME} \
4851
NCCL_HOME=${NCCL_HOME}
4952

5053
# Final stage
54+
5155
INCLUDE+ base/Dockerfile.common
5256

5357
ENV NCCL_HOME=/opt/nccl
54-
ENV NCCL_TESTS_HOME=/opt/nccl-tests
5558

56-
COPY --from=builder ${NCCL_HOME}/lib ${NCCL_HOME}/lib
57-
COPY --from=builder ${NCCL_HOME}/include ${NCCL_HOME}/include
58-
COPY --from=builder ${NCCL_TESTS_HOME}/build ${NCCL_TESTS_HOME}
59+
COPY --from=builder ${NCCL_HOME} ${NCCL_HOME}
60+
COPY --from=builder /opt/nccl-tests/build /opt/nccl-tests/build
5961

6062
ARG FLAVOR
6163

62-
# Configure library paths
64+
# MPI, NVCC, and /etc/ld.so.conf.d
65+
6366
RUN apt-get update \
64-
&& apt-get install -y --no-install-recommends openmpi-bin \
67+
&& apt-get install -y --no-install-recommends \
68+
openmpi-bin \
6569
&& if [ "$FLAVOR" = "devel" ]; then \
6670
cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
6771
&& apt-get install -y --no-install-recommends \

docker/base/efa.Dockerfile

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
INCLUDE+ base/Dockerfile.common
44

5-
ENV PREFIX=/usr/local
6-
ENV CUDA_PATH=/usr/local/cuda
5+
ENV NCCL_HOME=/usr/local
6+
ENV CUDA_HOME=/usr/local/cuda
77
ENV LIBFABRIC_PATH=/opt/amazon/efa
88
ENV OPEN_MPI_PATH=/opt/amazon/openmpi
99
ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${PATH}"
1010
ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}"
1111

12-
# prerequisites
12+
# Prerequisites
1313

1414
RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
1515
&& apt-get update \
@@ -19,53 +19,58 @@ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
1919
libhwloc-dev \
2020
autoconf \
2121
automake \
22-
libtool
22+
libtool \
23+
&& rm -rf /var/lib/apt/lists/*
2324

2425
# EFA
2526

2627
ARG EFA_VERSION=1.38.1
2728

28-
RUN cd $HOME \
29+
RUN cd /tmp \
30+
&& apt-get update \
2931
&& curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
3032
&& tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
3133
&& cd aws-efa-installer \
32-
&& ./efa_installer.sh -y --skip-kmod -g
34+
&& ./efa_installer.sh -y --skip-kmod -g \
35+
&& rm -rf /tmp/aws-efa-installer /var/lib/apt/lists/*
3336

3437
# NCCL
3538

3639
ARG NCCL_VERSION=2.26.2-1
3740

38-
RUN cd $HOME \
41+
RUN cd /tmp \
3942
&& git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
4043
&& cd nccl \
41-
&& make -j$(nproc) src.build BUILDDIR=${PREFIX}
44+
&& make -j$(nproc) src.build BUILDDIR=${NCCL_HOME} \
45+
&& rm -rf /tmp/nccl
4246

4347
# AWS OFI NCCL
4448

4549
ARG OFI_VERSION=1.14.0
4650

47-
RUN cd $HOME \
51+
RUN cd /tmp \
4852
&& git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \
4953
&& cd aws-ofi-nccl \
5054
&& ./autogen.sh \
5155
&& ./configure \
52-
--with-cuda=${CUDA_PATH} \
56+
--with-cuda=${CUDA_HOME} \
5357
--with-libfabric=${LIBFABRIC_PATH} \
5458
--with-mpi=${OPEN_MPI_PATH} \
55-
--with-cuda=${CUDA_PATH} \
56-
--with-nccl=${PREFIX} \
59+
--with-cuda=${CUDA_HOME} \
60+
--with-nccl=${NCCL_HOME} \
5761
--disable-tests \
58-
--prefix=${PREFIX} \
59-
&& make -j$(numproc) \
60-
&& make install
62+
--prefix=${NCCL_HOME} \
63+
&& make -j$(nproc) \
64+
&& make install \
65+
&& rm -rf /tmp/aws-ofi-nccl /var/lib/apt/lists/*
6166

6267
# NCCL Tests
6368

64-
RUN cd $HOME \
69+
RUN cd /opt \
6570
&& git clone https://github.com/NVIDIA/nccl-tests \
6671
&& cd nccl-tests \
67-
&& make -j$(numproc) \
72+
&& make -j$(nproc) \
6873
MPI=1 \
6974
MPI_HOME=${OPEN_MPI_PATH} \
70-
CUDA_HOME=${CUDA_PATH} \
71-
NCCL_HOME=${PREFIX}
75+
CUDA_HOME=${CUDA_HOME} \
76+
NCCL_HOME=${NCCL_HOME}

0 commit comments

Comments
 (0)