22# builder
33ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.03-py3
44
5- # # build args
5+ # # build base env
66FROM ${BASE_IMAGE} AS setup_env
77
8- ARG TORCH_VERSION
98ARG PPA_SOURCE
10-
11- RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
12- sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
9+ # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
10+ RUN sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
1311 apt update && \
1412 apt install --no-install-recommends ca-certificates -y && \
1513 apt install --no-install-recommends bc wget -y && \
1614 apt install --no-install-recommends build-essential sudo -y && \
1715 apt install --no-install-recommends git curl pkg-config tree unzip tmux \
1816 openssh-server openssh-client dnsutils iproute2 lsof net-tools zsh rclone \
19- iputils-ping telnet netcat-openbsd -y && \
17+ iputils-ping telnet netcat-openbsd htop bubblewrap socat -y && \
2018 apt clean && rm -rf /var/lib/apt/lists/*
2119
2220RUN if [ -d /etc/pip ] && [ -f /etc/pip/constraint.txt ]; then echo > /etc/pip/constraint.txt; fi
23- RUN pip install pystack py-spy --no-cache-dir
21+ RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
2422RUN git config --system --add safe.directory "*"
2523
24+ # torch
25+ ARG TORCH_VERSION
26+ ARG PYTORCH_WHEELS_URL
2627RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
28+ --mount=type=secret,id=NO_PROXY,env=no_proxy \
2729 if [ -n "${TORCH_VERSION}" ]; then \
2830 pip install torchvision torch==${TORCH_VERSION} \
29- --index-url https://download.pytorch.org/whl /cu128 \
30- --extra-index-url https://download.pytorch.org/whl /cu126 \
31+ -i ${PYTORCH_WHEELS_URL} /cu128 \
32+ --extra-index-url ${PYTORCH_WHEELS_URL} /cu126 \
3133 --no-cache-dir; \
3234 fi
33-
3435# set reasonable default for CUDA architectures when building ngc image
35- ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 9.0 10.0"
36-
37- RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
36+ ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"
3837
3938ARG FLASH_ATTN_DIR=/tmp/flash-attn
4039ARG CODESPACE=/root/codespace
@@ -56,6 +55,9 @@ ARG CODESPACE
5655ARG FLASH_ATTN_DIR
5756ARG FLASH_ATTN3_DIR
5857ARG FLASH_ATTN_URL
58+ # force hopper for now, you change it throught build args
59+ ARG FLASH_ATTN_CUDA_ARCHS="90"
60+ ARG FLASH_ATTENTION_DISABLE_SM80="TRUE"
5961
6062RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
6163 git clone $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 1) && \
@@ -119,42 +121,41 @@ WORKDIR ${CODESPACE}/causal-conv1d
119121
120122RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip wheel -w ${CAUSAL_CONV1D_DIR} -v --no-deps --no-build-isolation .
121123
122- # pypi install nvshmem and compile deepep
124+ # compile nvshmem and deepep
123125FROM setup_env AS deep_ep
124126
125127ARG CODESPACE
126128ARG DEEP_EP_DIR
127129ARG DEEP_EP_URL
128- # build sm90 and sm100 for deep_ep for now
129- ARG TORCH_CUDA_ARCH_LIST="9.0 10.0"
130130
131+ # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
132+ # curl -LO https://github.com/NVIDIA/nvshmem/releases/download/v3.4.5-0/nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
133+ # tar -zxvf nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
134+ # cd ${CODESPACE}/nvshmem_src && \
135+ # NVSHMEM_SHMEM_SUPPORT=0 \
136+ # NVSHMEM_UCX_SUPPORT=0 \
137+ # NVSHMEM_USE_NCCL=0 \
138+ # NVSHMEM_MPI_SUPPORT=0 \
139+ # NVSHMEM_IBGDA_SUPPORT=1 \
140+ # NVSHMEM_USE_GDRCOPY=0 \
141+ # NVSHMEM_PMIX_SUPPORT=0 \
142+ # NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
143+ # NVSHMEM_BUILD_TESTS=0 \
144+ # NVSHMEM_BUILD_EXAMPLES=0 \
145+ # NVSHMEM_BUILD_HYDRA_LAUNCHER=0 \
146+ # NVSHMEM_BUILD_TXZ_PACKAGE=0 \
147+ # NVSHMEM_BUILD_PYTHON_LIB=OFF \
148+ # cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_PREFIX} -DMLX5_lib=/lib/x86_64-linux-gnu/libmlx5.so.1 && \
149+ # cmake --build build --target install --parallel 32 && \
131150RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
132- curl -LO https://github.com/NVIDIA/nvshmem/releases/download/v3.4.5-0/nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
133- tar -zxvf nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
134- cd ${CODESPACE}/nvshmem_src && \
135- NVSHMEM_SHMEM_SUPPORT=0 \
136- NVSHMEM_UCX_SUPPORT=0 \
137- NVSHMEM_USE_NCCL=0 \
138- NVSHMEM_MPI_SUPPORT=0 \
139- NVSHMEM_IBGDA_SUPPORT=1 \
140- NVSHMEM_USE_GDRCOPY=0 \
141- NVSHMEM_PMIX_SUPPORT=0 \
142- NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
143- NVSHMEM_BUILD_TESTS=0 \
144- NVSHMEM_BUILD_EXAMPLES=0 \
145- NVSHMEM_BUILD_HYDRA_LAUNCHER=0 \
146- NVSHMEM_BUILD_TXZ_PACKAGE=0 \
147- NVSHMEM_BUILD_PYTHON_LIB=OFF \
148- cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_PREFIX} -DMLX5_lib=/lib/x86_64-linux-gnu/libmlx5.so.1 && \
149- cmake --build build --target install --parallel 32 && \
150151 cd ${CODESPACE} && git clone $(echo ${DEEP_EP_URL} | cut -d '@' -f 1) && \
151152 cd ${CODESPACE}/DeepEP && \
152153 git checkout $(echo ${DEEP_EP_URL} | cut -d '@' -f 2) && \
153154 git submodule update --init --recursive --force
154155
155156WORKDIR ${CODESPACE}/DeepEP
156157
157- RUN NVSHMEM_DIR=${NVSHMEM_PREFIX} pip wheel -w ${DEEP_EP_DIR} -v --no-deps .
158+ RUN pip wheel -w ${DEEP_EP_DIR} -v --no-deps .
158159
159160# compile deep_gemm
160161FROM setup_env AS deep_gemm
@@ -192,7 +193,7 @@ COPY --from=flash_attn ${FLASH_ATTN_DIR} ${FLASH_ATTN_DIR}
192193COPY --from=adaptive_gemm ${ADAPTIVE_GEMM_DIR} ${ADAPTIVE_GEMM_DIR}
193194COPY --from=grouped_gemm ${GROUPED_GEMM_DIR} ${GROUPED_GEMM_DIR}
194195COPY --from=deep_ep ${DEEP_EP_DIR} ${DEEP_EP_DIR}
195- COPY --from=deep_ep ${NVSHMEM_PREFIX} ${NVSHMEM_PREFIX}
196+ # COPY --from=deep_ep ${NVSHMEM_PREFIX} ${NVSHMEM_PREFIX}
196197COPY --from=deep_gemm ${DEEP_GEMM_DIR} ${DEEP_GEMM_DIR}
197198COPY --from=causal_conv1d ${CAUSAL_CONV1D_DIR} ${CAUSAL_CONV1D_DIR}
198199
@@ -204,51 +205,114 @@ RUN unzip ${DEEP_EP_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
204205RUN unzip ${DEEP_GEMM_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
205206RUN unzip ${CAUSAL_CONV1D_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
206207
207- # install sglang and its runtime requirements
208- ARG SGLANG_VERSION
208+ ARG DEFAULT_PYPI_URL
209209
210- RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
211- pip install sglang==${SGLANG_VERSION} sgl-kernel==0.3.14.post1 pybase64 orjson uvloop setproctitle msgspec \
212- compressed_tensors python-multipart torch_memory_saver \
213- grpcio-tools==1.75.1 hf_transfer interegular llguidance==0.7.11 \
214- xgrammar==0.1.24 blobfile==3.0.0 flashinfer_python==0.4.0 --no-cache-dir --no-deps
210+ # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
211+ RUN pip install pystack py-spy --no-cache-dir -i ${DEFAULT_PYPI_URL}
212+
213+ # install sglang and its runtime requirements
214+ ENV XTUNER_SGLANG_ENVS_DIR=/envs/sglang
215+
216+ # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
217+ RUN \
218+ pip install --target ${XTUNER_SGLANG_ENVS_DIR} \
219+ sglang==0.5.9 sgl-kernel==0.3.21 \
220+ apache-tvm-ffi==0.1.9 \
221+ anthropic==0.86.0 \
222+ build==1.4.0 \
223+ cuda-python==12.9.0 \
224+ decord2==3.2.0 \
225+ flashinfer_python==0.6.3 \
226+ flashinfer_cubin==0.6.3 \
227+ gguf==0.18.0 \
228+ modelscope==1.35.3 \
229+ nvidia-cutlass-dsl==4.4.2 \
230+ openai-harmony==0.0.4 \
231+ openai==2.6.1 \
232+ outlines==0.1.11 \
233+ quack-kernels==0.2.4 \
234+ timm==1.0.16 \
235+ torchao==0.9.0 \
236+ torchaudio==2.9.1 \
237+ torchcodec==0.8.0 \
238+ xgrammar==0.1.32 \
239+ smg-grpc-proto==0.4.5 \
240+ grpcio==1.78.1 \
241+ grpcio-reflection==1.78.1 \
242+ grpcio-health-checking==1.80.0 \
243+ pycryptodomex==3.23.0 \
244+ lxml==6.0.2 \
245+ cuda-bindings==12.9.6 \
246+ cuda-pathfinder==1.5.0 \
247+ nvidia-cudnn-frontend==1.21.0 \
248+ lark==1.3.1 \
249+ pycountry==26.2.16 \
250+ airportsdata==20260315 \
251+ outlines_core==0.1.26 \
252+ torch-c-dlpack-ext==0.1.5 \
253+ pyproject_hooks==1.2.0 \
254+ huggingface_hub==0.36.2 \
255+ torch_memory_saver==0.0.9 \
256+ diskcache==5.6.3 distro==1.9.0 jiter==0.13.0 \
257+ llguidance==0.7.11 blobfile==3.0.0 \
258+ pybase64 orjson uvloop setproctitle msgspec partial_json_parser \
259+ compressed_tensors python-multipart \
260+ hf_transfer interegular --no-cache-dir --no-deps -i ${DEFAULT_PYPI_URL}
215261
216262# install lmdeploy and its missing runtime requirements
217263ARG LMDEPLOY_VERSION
218264ARG LMDEPLOY_URL
265+ ENV XTUNER_LMDEPLOY_ENVS_DIR=/envs/lmdeploy
219266
267+ # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
268+ ARG LMDEPLOY_WHEELS=https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu128-cp312-cp312-manylinux2014_x86_64.whl
220269RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
270+ --mount=type=secret,id=NO_PROXY,env=no_proxy \
221271 pip install fastapi fire openai outlines \
222- partial_json_parser ray[default] shortuuid uvicorn \
223- 'pydantic>2' openai_harmony dlblas --no-cache-dir && \
272+ pyzmq aiohttp cloudpickle prometheus_client protobuf numpy pillow einops tiktoken sentencepiece \
273+ partial_json_parser 'ray[default]<3' shortuuid uvicorn pybase64 \
274+ 'pydantic>2' openai_harmony dlblas --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-cache-dir -i ${DEFAULT_PYPI_URL} && \
275+ pip install xgrammar==0.1.32 timm!=1.0.23 --no-cache-dir -i ${DEFAULT_PYPI_URL} --no-deps && \
224276 if [ -n "${LMDEPLOY_VERSION}" ]; then \
225- pip install lmdeploy==${LMDEPLOY_VERSION} --no-deps --no-cache-dir; \
277+ # pip install lmdeploy==${LMDEPLOY_VERSION} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
278+ echo pip install ${LMDEPLOY_WHEELS} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
279+ pip install ${LMDEPLOY_WHEELS} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
226280 else \
227281 git clone $(echo ${LMDEPLOY_URL} | cut -d '@' -f 1) && \
228282 cd ${CODESPACE}/lmdeploy && \
229283 git checkout $(echo ${LMDEPLOY_URL} | cut -d '@' -f 2) && \
230- pip install . -v --no-deps --no-cache-dir; \
284+ pip install . -v --target ${XTUNER_LMDEPLOY_ENVS_DIR} -- no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL} ; \
231285 fi
232286
233287# # install xtuner
234288ARG XTUNER_URL
235289ARG XTUNER_COMMIT
236- # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
290+ # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
237291# git clone $(echo ${XTUNER_URL} | cut -d '@' -f 1) && \
238292# cd ${CODESPACE}/xtuner && \
239293# git checkout $(echo ${XTUNER_URL} | cut -d '@' -f 2)
240294COPY . ${CODESPACE}/xtuner
241295
242296WORKDIR ${CODESPACE}/xtuner
243- RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
244- pip install .[all] -v --no-cache-dir
297+
298+ # Install custom .pth file for conditional lmdeploy and sglang path injection
299+ RUN cp -r .dev_scripts/xtuner_rl_path* ${PYTHON_SITE_PACKAGE_PATH}/
300+
301+ # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
302+ RUN pip install .[all] -v --no-cache-dir -i ${DEFAULT_PYPI_URL}
245303
246304WORKDIR ${CODESPACE}
247305
248306# nccl update for torch 2.6.0
249- RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
250- if [ "x${TORCH_VERSION}" = "x2.6.0" ]; then \
251- pip install nvidia-nccl-cu12==2.25.1 --no-cache-dir; \
307+ # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
308+ RUN if [ "x${TORCH_VERSION}" = "x2.6.0" ]; then \
309+ pip install nvidia-nccl-cu12==2.25.1 --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
310+ fi
311+
312+ # cudnn update for torch 2.9.1
313+ # RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
314+ RUN if [ "x${TORCH_VERSION}" = "x2.9.1" ]; then \
315+ pip install nvidia-cudnn-cu12==9.15.1.9 --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
252316 fi
253317
254318# setup sysctl
0 commit comments