@@ -49,7 +49,7 @@ COPY . ${SOURCE_DIR}
4949RUN cd ${SOURCE_DIR}
5050
5151RUN pip install --no-cache-dir ${SOURCE_DIR} && \
52- pip install --user --no-build-isolation ${SOURCE_DIR}[flash-attn] && \
52+ pip install --no-cache-dir --no-build-isolation ${SOURCE_DIR}[flash-attn] && \
5353 pip install --no-cache-dir --no-build-isolation ${SOURCE_DIR}[mamba]
5454
5555# Optional extras
@@ -81,29 +81,50 @@ RUN if [[ "${ENABLE_RECOMMENDER}" == "true" ]]; then \
8181 pip install --no-cache-dir ${SOURCE_DIR}[tuning_config_recommender]; \
8282 fi
8383
84- # cleanup
85- RUN rm -rf /root/.cache /tmp/* /opt/pytorch
84+ # cleanup build artifacts and caches
85+ RUN rm -rf /root/.cache /tmp/pip-* \
86+ && find /usr/local/lib/python3.12/dist-packages \
87+ \( -type d -name "__pycache__" -o -type d -name "tests" -o -type d -name "test" \) \
88+ -exec rm -rf {} + 2>/dev/null || true \
89+ && find /usr/local/lib/python3.12/dist-packages -name "*.pyc" -delete 2>/dev/null || true
8690
8791# ####################### RUNTIME ########################
8892FROM nvcr.io/nvidia/pytorch:${NVCR_IMAGE_VERSION}
8993
9094ARG WORKDIR=/app
9195ARG SOURCE_DIR=${WORKDIR}/fms-hf-tuning
9296
93- RUN mkdir -p /app && \
94- chown -R root:0 /app /tmp && \
95- chmod -R g+rwX /app /tmp
97+ # Remove bloat from the base image in a SINGLE layer so deletions reduce size.
98+ # - /opt/pytorch: PyTorch source/examples bundled in NVCR
99+ # - CUDA static libs (*.a): only needed for static linking at compile time
100+ # - CUDA samples/docs: not needed at runtime
101+ # - pip cache and tmp
102+ RUN rm -rf \
103+ /opt/pytorch \
104+ /root/.cache \
105+ /tmp/* \
106+ /usr/local/cuda/targets/x86_64-linux/lib/*.a \
107+ /usr/local/cuda/doc \
108+ /usr/local/cuda/samples \
109+ && find /usr/local/lib/python3.12/dist-packages \
110+ \( -type d -name "__pycache__" -o -type d -name "tests" -o -type d -name "test" \) \
111+ -exec rm -rf {} + 2>/dev/null || true \
112+ && find /usr/local/lib/python3.12/dist-packages -name "*.pyc" -delete 2>/dev/null || true \
113+ && rm -rf /var/lib/apt/lists/* \
114+ && mkdir -p /app \
115+ && chown -R root:0 /app /tmp \
116+ && chmod -R g+rwX /app /tmp
117+
96118WORKDIR /app
97119
98- # Copy only Python site-packages + app
120+ # Copy Python site-packages, binaries, and app from builder
99121COPY --from=builder /usr/local/lib/python3.12/dist-packages \
100122 /usr/local/lib/python3.12/dist-packages
123+ COPY --from=builder /usr/local/bin /usr/local/bin
101124COPY --from=builder ${SOURCE_DIR} ${SOURCE_DIR}
102125
103- # Runtime cleanup
104- RUN rm -rf /opt/pytorch /root/.cache /tmp/*
105-
106- RUN mkdir -p /.cache && chmod -R 777 /.cache
126+ RUN chmod -R g+rwX /app /tmp && \
127+ mkdir -p /.cache && chmod -R 777 /.cache
107128
108129# Copy scripts and default configs
109130COPY build/accelerate_launch.py fixtures/accelerate_fsdp_defaults.yaml /app/
0 commit comments