@@ -8,6 +8,7 @@ ARG EFA_VERSION=1.38.1
88ARG OFI_VERSION=1.14.0
99
1010ENV NCCL_HOME=/opt/nccl
11+ ENV OFI_NCCL_HOME=/opt/amazon/ofi-nccl
1112ENV CUDA_HOME=/usr/local/cuda
1213ENV LIBFABRIC_PATH=/opt/amazon/efa
1314ENV OPEN_MPI_PATH=/opt/amazon/openmpi
@@ -76,19 +77,24 @@ ENV NCCL_HOME=/opt/nccl
7677ENV LIBFABRIC_PATH=/opt/amazon/efa
7778ENV OPEN_MPI_PATH=/opt/amazon/openmpi
7879ENV NCCL_TESTS_HOME=/opt/nccl-tests
79-
8080ENV PATH="${LIBFABRIC_PATH}/bin:${OPEN_MPI_PATH}/bin:${PATH}"
81- # TODO: Unsure if this is required, updating ` /etc/ld.so.conf.d` should be enough
82- ENV LD_LIBRARY_PATH="${OPEN_MPI_PATH}/lib:${LD_LIBRARY_PATH}"
8381
8482COPY --from=builder ${NCCL_HOME} ${NCCL_HOME}
85- COPY --from=builder ${LIBFABRIC_PATH } ${LIBFABRIC_PATH }
86- COPY --from=builder ${OPEN_MPI_PATH} ${OPEN_MPI_PATH}
83+ COPY --from=builder ${OFI_NCCL_HOME } ${OFI_NCCL_HOME }
84+ COPY --from=builder /etc/ld.so.conf.d/100_ofinccl.conf /etc/ld.so.conf.d/100_ofinccl.conf
8785COPY --from=builder ${NCCL_TESTS_HOME}/build ${NCCL_TESTS_HOME}
88- COPY --from=builder /etc/ld.so.conf.d/000_efa.conf /etc/ld.so.conf.d/000_efa.conf
89- COPY --from=builder /etc/profile.d/zippy_efa.sh /etc/profile.d/zippy_efa.sh
9086
91- RUN echo "${NCCL_HOME}/lib" >> /etc/ld.so.conf.d/nccl.conf \
87+ RUN apt-get update \
88+ && apt-get install -y --no-install-recommends \
89+ libevent-dev \
90+ libhwloc-dev \
91+ && cd /tmp \
92+ && curl -O https://s3-us-west-2.amazonaws.com/aws-efa-installer/aws-efa-installer-${EFA_VERSION}.tar.gz \
93+ && tar -xf aws-efa-installer-${EFA_VERSION}.tar.gz \
94+ && cd aws-efa-installer \
95+ && ./efa_installer.sh -y --skip-kmod -g
96+ && rm -rf /tmp/aws-efa-installer /var/lib/apt/lists/* \
97+ && echo "${NCCL_HOME}/lib" >> /etc/ld.so.conf.d/nccl.conf \
9298 && echo "${OPEN_MPI_PATH}/lib" >> /etc/ld.so.conf.d/openmpi.conf \
9399 && echo "${LIBFABRIC_PATH}/lib" >> /etc/ld.so.conf.d/efa.conf \
94100 && ldconfig
0 commit comments