Skip to content

Commit a24bd1a

Browse files
[UX] Pre-build a EFA version of the default Docker image #2793
1 parent 8ae1be6 commit a24bd1a

File tree

4 files changed

+19
-123
lines changed

4 files changed

+19
-123
lines changed

.github/workflows/docker.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ jobs:
5151
runs-on: ubuntu-latest
5252
strategy:
5353
matrix:
54-
# flavor: ["base", "devel", "devel-efa"]
55-
flavor: ["base-efa"]
54+
flavor: ["base", "devel", "devel-efa"]
5655
steps:
5756
- name: Checkout repository
5857
uses: actions/checkout@v4
@@ -71,8 +70,6 @@ jobs:
7170
FILE="base/Dockerfile"
7271
elif [ "${{ matrix.flavor }}" = "devel" ]; then
7372
FILE="base/Dockerfile"
74-
elif [ "${{ matrix.flavor }}" = "base-efa" ]; then
75-
FILE="base/base-efa.Dockerfile"
7673
else
7774
FILE="base/efa.Dockerfile"
7875
fi

docker/base/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ ENV NCCL_TESTS_HOME=/opt/nccl-tests
5555

5656
COPY --from=builder ${NCCL_HOME}/lib ${NCCL_HOME}/lib
5757
COPY --from=builder ${NCCL_HOME}/include ${NCCL_HOME}/include
58-
COPY --from=builder ${NCCL_TESTS_HOME}/build ${NCCL_TESTS_HOME}
58+
COPY --from=builder ${NCCL_TESTS_HOME}/build ${NCCL_TESTS_HOME}/build
5959

6060
ARG FLAVOR
6161

docker/base/base-efa.Dockerfile

Lines changed: 0 additions & 103 deletions
This file was deleted.

docker/base/efa.Dockerfile

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22

33
INCLUDE+ base/Dockerfile.common
44

5+
ENV NCCL_HOME=/opt/nccl
56
ENV PREFIX=/usr/local
6-
ENV CUDA_PATH=/usr/local/cuda
7+
ENV CUDA_HOME=/usr/local/cuda
78
ENV LIBFABRIC_PATH=/opt/amazon/efa
89
ENV OPEN_MPI_PATH=/opt/amazon/openmpi
910
ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${PATH}"
1011
ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}"
1112

12-
# prerequisites
13+
# Prerequisites
1314

1415
RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
1516
&& apt-get update \
@@ -25,47 +26,48 @@ RUN cuda_version=$(echo ${CUDA_VERSION} | awk -F . '{ print $1"-"$2 }') \
2526

2627
ARG EFA_VERSION=1.38.1
2728

28-
RUN cd $HOME \
29+
RUN cd /tmp \
2930
&& curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
3031
&& tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
3132
&& cd aws-efa-installer \
32-
&& ./efa_installer.sh -y --skip-kmod -g
33+
&& ./efa_installer.sh -y --skip-kmod -g \
34+
&& rm -rf /tmp/aws-efa-installer /var/lib/apt/lists/*
3335

3436
# NCCL
3537

3638
ARG NCCL_VERSION=2.26.2-1
3739

38-
RUN cd $HOME \
40+
RUN cd /tmp \
3941
&& git clone https://github.com/NVIDIA/nccl.git -b v${NCCL_VERSION} \
4042
&& cd nccl \
41-
&& make -j$(nproc) src.build BUILDDIR=${PREFIX}
43+
&& make -j$(nproc) src.build BUILDDIR=${PREFIX} \
44+
&& rm -rf /tmp/nccl
4245

4346
# AWS OFI NCCL
4447

4548
ARG OFI_VERSION=1.14.0
4649

47-
RUN cd $HOME \
50+
RUN cd /tmp \
4851
&& git clone https://github.com/aws/aws-ofi-nccl.git -b v${OFI_VERSION} \
4952
&& cd aws-ofi-nccl \
5053
&& ./autogen.sh \
5154
&& ./configure \
52-
--with-cuda=${CUDA_PATH} \
55+
--with-cuda=${CUDA_HOME} \
5356
--with-libfabric=${LIBFABRIC_PATH} \
5457
--with-mpi=${OPEN_MPI_PATH} \
55-
--with-cuda=${CUDA_PATH} \
56-
--with-nccl=${PREFIX} \
5758
--disable-tests \
5859
--prefix=${PREFIX} \
59-
&& make -j$(numproc) \
60-
&& make install
60+
&& make -j$(nproc) \
61+
&& make install \
62+
&& rm -rf /tmp/aws-ofi-nccl
6163

6264
# NCCL Tests
6365

64-
RUN cd $HOME \
66+
RUN cd $NCCL_HOME \
6567
&& git clone https://github.com/NVIDIA/nccl-tests \
6668
&& cd nccl-tests \
67-
&& make -j$(numproc) \
69+
&& make -j$(nproc) \
6870
MPI=1 \
6971
MPI_HOME=${OPEN_MPI_PATH} \
70-
CUDA_HOME=${CUDA_PATH} \
72+
CUDA_HOME=${CUDA_HOME} \
7173
NCCL_HOME=${PREFIX}

0 commit comments

Comments
 (0)