Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 43 additions & 11 deletions build/nvcr.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
## Global Args #################################################################
## If the nvcr container is updated, ensure to check the torch and python
## installation version inside the dockerfile before pushing changes.
ARG NVCR_IMAGE_VERSION=25.10-py3
ARG NVCR_IMAGE_VERSION=25.02-py3

# This is based on what is inside the NVCR image already
ARG PYTHON_VERSION=3.12
Expand All @@ -28,26 +28,58 @@ ARG USER_UID=0
ARG WORKDIR=/app
ARG SOURCE_DIR=${WORKDIR}/fms-hf-tuning

ARG ENABLE_FMS_ACCELERATION=true
ARG ENABLE_AIM=false
ARG ENABLE_MLFLOW=false
ARG ENABLE_SCANNER=false
ARG ENABLE_CLEARML=true
ARG ENABLE_TRITON_KERNELS=true
ARG ENABLE_RECOMMENDER=true

# Ensures to always build mamba_ssm from source
ENV PIP_NO_BINARY=mamba-ssm,mamba_ssm

# install triton kernels
RUN pip install --no-cache-dir "git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels"
# upgrade torch as the base layer contains only torch 2.7
RUN python -m pip install --upgrade pip && \
pip install --upgrade setuptools && \
pip install --upgrade --force-reinstall torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cu128

# Install main package + flash attention
COPY . ${SOURCE_DIR}
RUN cd ${SOURCE_DIR}

RUN pip install --no-cache-dir ${SOURCE_DIR}[flash-attn,mamba,fms-accel,clearml,tuning_config_recommender]

# install fms-accel packages
RUN python -m fms_acceleration.cli install fms_acceleration_peft && \
python -m fms_acceleration.cli install fms_acceleration_foak && \
python -m fms_acceleration.cli install fms_acceleration_aadp && \
python -m fms_acceleration.cli install fms_acceleration_moe && \
python -m fms_acceleration.cli install fms_acceleration_odm
RUN pip install --no-cache-dir ${SOURCE_DIR} && \
pip install --no-cache-dir --no-build-isolation ${SOURCE_DIR}[flash-attn] && \
pip install --no-cache-dir --no-build-isolation ${SOURCE_DIR}[mamba]

# Optional extras
RUN if [[ "${ENABLE_FMS_ACCELERATION}" == "true" ]]; then \
pip install --no-cache-dir ${SOURCE_DIR}[fms-accel] && \
python -m fms_acceleration.cli install fms_acceleration_peft && \
python -m fms_acceleration.cli install fms_acceleration_foak && \
python -m fms_acceleration.cli install fms_acceleration_aadp && \
python -m fms_acceleration.cli install fms_acceleration_moe && \
python -m fms_acceleration.cli install fms_acceleration_odm; \
fi

RUN if [[ "${ENABLE_TRITON_KERNELS}" == "true" ]]; then \
pip install --no-cache-dir "git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels"; \
fi
RUN if [[ "${ENABLE_CLEARML}" == "true" ]]; then \
pip install --no-cache-dir ${SOURCE_DIR}[clearml]; \
fi
RUN if [[ "${ENABLE_AIM}" == "true" ]]; then \
pip install --no-cache-dir ${SOURCE_DIR}[aim]; \
fi
RUN if [[ "${ENABLE_MLFLOW}" == "true" ]]; then \
pip install --no-cache-dir ${SOURCE_DIR}[mlflow]; \
fi
RUN if [[ "${ENABLE_SCANNER}" == "true" ]]; then \
pip install --no-cache-dir ${SOURCE_DIR}[scanner-dev]; \
fi
RUN if [[ "${ENABLE_RECOMMENDER}" == "true" ]]; then \
pip install --no-cache-dir ${SOURCE_DIR}[tuning_config_recommender]; \
fi

# cleanup build artifacts and caches
RUN rm -rf /root/.cache /tmp/pip-* \
Expand Down
Loading