Skip to content

Commit 3117fdf

Browse files
authored
update dockerfile (#667)
Signed-off-by: Dushyant Behl <dushyantbehl@in.ibm.com>
1 parent 3c27af0 commit 3117fdf

1 file changed

Lines changed: 32 additions & 11 deletions

File tree

build/nvcr.Dockerfile

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ COPY . ${SOURCE_DIR}
4949
RUN cd ${SOURCE_DIR}
5050

5151
RUN pip install --no-cache-dir ${SOURCE_DIR} && \
52-
pip install --user --no-build-isolation ${SOURCE_DIR}[flash-attn] && \
52+
pip install --no-cache-dir --no-build-isolation ${SOURCE_DIR}[flash-attn] && \
5353
pip install --no-cache-dir --no-build-isolation ${SOURCE_DIR}[mamba]
5454

5555
# Optional extras
@@ -81,29 +81,50 @@ RUN if [[ "${ENABLE_RECOMMENDER}" == "true" ]]; then \
8181
pip install --no-cache-dir ${SOURCE_DIR}[tuning_config_recommender]; \
8282
fi
8383

84-
# cleanup
85-
RUN rm -rf /root/.cache /tmp/* /opt/pytorch
84+
# cleanup build artifacts and caches
85+
RUN rm -rf /root/.cache /tmp/pip-* \
86+
&& find /usr/local/lib/python3.12/dist-packages \
87+
\( -type d -name "__pycache__" -o -type d -name "tests" -o -type d -name "test" \) \
88+
-exec rm -rf {} + 2>/dev/null || true \
89+
&& find /usr/local/lib/python3.12/dist-packages -name "*.pyc" -delete 2>/dev/null || true
8690

8791
######################## RUNTIME ########################
8892
FROM nvcr.io/nvidia/pytorch:${NVCR_IMAGE_VERSION}
8993

9094
ARG WORKDIR=/app
9195
ARG SOURCE_DIR=${WORKDIR}/fms-hf-tuning
9296

93-
RUN mkdir -p /app && \
94-
chown -R root:0 /app /tmp && \
95-
chmod -R g+rwX /app /tmp
97+
# Remove bloat from the base image in a SINGLE layer so deletions reduce size.
98+
# - /opt/pytorch: PyTorch source/examples bundled in NVCR
99+
# - CUDA static libs (*.a): only needed for static linking at compile time
100+
# - CUDA samples/docs: not needed at runtime
101+
# - pip cache and tmp
102+
RUN rm -rf \
103+
/opt/pytorch \
104+
/root/.cache \
105+
/tmp/* \
106+
/usr/local/cuda/targets/x86_64-linux/lib/*.a \
107+
/usr/local/cuda/doc \
108+
/usr/local/cuda/samples \
109+
&& find /usr/local/lib/python3.12/dist-packages \
110+
\( -type d -name "__pycache__" -o -type d -name "tests" -o -type d -name "test" \) \
111+
-exec rm -rf {} + 2>/dev/null || true \
112+
&& find /usr/local/lib/python3.12/dist-packages -name "*.pyc" -delete 2>/dev/null || true \
113+
&& rm -rf /var/lib/apt/lists/* \
114+
&& mkdir -p /app \
115+
&& chown -R root:0 /app /tmp \
116+
&& chmod -R g+rwX /app /tmp
117+
96118
WORKDIR /app
97119

98-
# Copy only Python site-packages + app
120+
# Copy Python site-packages, binaries, and app from builder
99121
COPY --from=builder /usr/local/lib/python3.12/dist-packages \
100122
/usr/local/lib/python3.12/dist-packages
123+
COPY --from=builder /usr/local/bin /usr/local/bin
101124
COPY --from=builder ${SOURCE_DIR} ${SOURCE_DIR}
102125

103-
# Runtime cleanup
104-
RUN rm -rf /opt/pytorch /root/.cache /tmp/*
105-
106-
RUN mkdir -p /.cache && chmod -R 777 /.cache
126+
RUN chmod -R g+rwX /app /tmp && \
127+
mkdir -p /.cache && chmod -R 777 /.cache
107128

108129
# Copy scripts and default configs
109130
COPY build/accelerate_launch.py fixtures/accelerate_fsdp_defaults.yaml /app/

0 commit comments

Comments
 (0)