@@ -87,7 +87,13 @@ RUN rm -rf /root/.cache /tmp/* /opt/pytorch
8787# ####################### RUNTIME ########################
8888FROM nvcr.io/nvidia/pytorch:${NVCR_IMAGE_VERSION}
8989
90- WORKDIR ${WORKDIR}
90+ ARG WORKDIR=/app
91+ ARG SOURCE_DIR=${WORKDIR}/fms-hf-tuning
92+
93+ RUN mkdir -p /app && \
94+ chown -R root:0 /app /tmp && \
95+ chmod -R g+rwX /app /tmp
96+ WORKDIR /app
9197
9298# Copy only Python site-packages + app
9399COPY --from=builder /usr/local/lib/python3.12/dist-packages \
@@ -97,15 +103,21 @@ COPY --from=builder ${SOURCE_DIR} ${SOURCE_DIR}
97103# Runtime cleanup
98104RUN rm -rf /opt/pytorch /root/.cache /tmp/*
99105
100- RUN chmod -R g+rwX $WORKDIR /tmp
101106RUN mkdir -p /.cache && chmod -R 777 /.cache
102107
108+ # Copy scripts and default configs
109+ COPY build/accelerate_launch.py fixtures/accelerate_fsdp_defaults.yaml /app/
110+ COPY build/utils.py /app/build/
111+ RUN chmod +x /app/accelerate_launch.py
112+
113+ ENV FSDP_DEFAULTS_FILE_PATH="/app/accelerate_fsdp_defaults.yaml"
114+ ENV SET_NUM_PROCESSES_TO_NUM_GPUS="True"
115+ ENV HOME="/app"
116+
103117# Set Triton environment variables for qLoRA
104118ENV TRITON_HOME="/tmp/triton_home"
105119ENV TRITON_DUMP_DIR="/tmp/triton_dump_dir"
106120ENV TRITON_CACHE_DIR="/tmp/triton_cache_dir"
107121ENV TRITON_OVERRIDE_DIR="/tmp/triton_override_dir"
108122
109- WORKDIR $WORKDIR
110-
111- CMD ["${SOURCE_DIR}/build/accelerate_launch.py" ]
123+ CMD ["python" , "/app/accelerate_launch.py" ]
0 commit comments