diff --git a/build/nvcr.Dockerfile b/build/nvcr.Dockerfile index b85c62dd9..aa575a2ef 100644 --- a/build/nvcr.Dockerfile +++ b/build/nvcr.Dockerfile @@ -15,7 +15,7 @@ ## Global Args ################################################################# ## If the nvcr container is updated, ensure to check the torch and python ## installation version inside the dockerfile before pushing changes. -ARG NVCR_IMAGE_VERSION=25.10-py3 +ARG NVCR_IMAGE_VERSION=25.02-py3 # This is based on what is inside the NVCR image already ARG PYTHON_VERSION=3.12 @@ -28,26 +28,58 @@ ARG USER_UID=0 ARG WORKDIR=/app ARG SOURCE_DIR=${WORKDIR}/fms-hf-tuning +ARG ENABLE_FMS_ACCELERATION=true +ARG ENABLE_AIM=false +ARG ENABLE_MLFLOW=false +ARG ENABLE_SCANNER=false +ARG ENABLE_CLEARML=true ARG ENABLE_TRITON_KERNELS=true +ARG ENABLE_RECOMMENDER=true # Ensures to always build mamba_ssm from source ENV PIP_NO_BINARY=mamba-ssm,mamba_ssm -# install triton kernels -RUN pip install --no-cache-dir "git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels" +# upgrade torch as the base layer contains only torch 2.7 +RUN python -m pip install --upgrade pip && \ + pip install --upgrade setuptools && \ + pip install --upgrade --force-reinstall torch torchaudio torchvision --index-url https://download.pytorch.org/whl/cu128 # Install main package + flash attention COPY . ${SOURCE_DIR} RUN cd ${SOURCE_DIR} -RUN pip install --no-cache-dir ${SOURCE_DIR}[flash-attn,mamba,fms-accel,clearml,tuning_config_recommender] - -# install fms-accel packages -RUN python -m fms_acceleration.cli install fms_acceleration_peft && \ - python -m fms_acceleration.cli install fms_acceleration_foak && \ - python -m fms_acceleration.cli install fms_acceleration_aadp && \ - python -m fms_acceleration.cli install fms_acceleration_moe && \ - python -m fms_acceleration.cli install fms_acceleration_odm +RUN pip install --no-cache-dir ${SOURCE_DIR} && \ + pip install --no-cache-dir --no-build-isolation ${SOURCE_DIR}[flash-attn] && \ + pip install --no-cache-dir --no-build-isolation ${SOURCE_DIR}[mamba] + +# Optional extras +RUN if [[ "${ENABLE_FMS_ACCELERATION}" == "true" ]]; then \ + pip install --no-cache-dir ${SOURCE_DIR}[fms-accel] && \ + python -m fms_acceleration.cli install fms_acceleration_peft && \ + python -m fms_acceleration.cli install fms_acceleration_foak && \ + python -m fms_acceleration.cli install fms_acceleration_aadp && \ + python -m fms_acceleration.cli install fms_acceleration_moe && \ + python -m fms_acceleration.cli install fms_acceleration_odm; \ + fi + +RUN if [[ "${ENABLE_TRITON_KERNELS}" == "true" ]]; then \ + pip install --no-cache-dir "git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels"; \ + fi +RUN if [[ "${ENABLE_CLEARML}" == "true" ]]; then \ + pip install --no-cache-dir ${SOURCE_DIR}[clearml]; \ + fi +RUN if [[ "${ENABLE_AIM}" == "true" ]]; then \ + pip install --no-cache-dir ${SOURCE_DIR}[aim]; \ + fi +RUN if [[ "${ENABLE_MLFLOW}" == "true" ]]; then \ + pip install --no-cache-dir ${SOURCE_DIR}[mlflow]; \ + fi +RUN if [[ "${ENABLE_SCANNER}" == "true" ]]; then \ + pip install --no-cache-dir ${SOURCE_DIR}[scanner-dev]; \ + fi +RUN if [[ "${ENABLE_RECOMMENDER}" == "true" ]]; then \ + pip install --no-cache-dir ${SOURCE_DIR}[tuning_config_recommender]; \ + fi # cleanup build artifacts and caches RUN rm -rf /root/.cache /tmp/pip-* \