|
| 1 | +# vortex_torch + sglang v0.5.9, with PD (prefill/decode) disaggregation. |
| 2 | +# |
| 3 | +# Builds the v0.5 branch (vendored sglang lives at |
| 4 | +# third_party/sglang/v0.5.9/sglang) and adds the RDMA/InfiniBand userspace |
| 5 | +# stack + the Mooncake transfer engine that sglang's disaggregation backend |
| 6 | +# needs to move KV cache between prefill and decode workers. |
| 7 | +# |
| 8 | +# Build: |
| 9 | +# DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile.pd \ |
| 10 | +# --build-arg VORTEX_TORCH_REF=v0.5 \ |
| 11 | +# --build-arg TORCH_CUDA_ARCH_LIST="9.0" \ |
| 12 | +# -t vortex-torch:pd-0.5.9 . |
| 13 | +# # Blackwell (B200/sm100): pass TORCH_CUDA_ARCH_LIST="10.0". |
| 14 | +# |
| 15 | +# Run (needs RDMA devices + IPC/host net for the transfer engine), e.g.: |
| 16 | +# docker run --gpus all --ipc=host --network=host \ |
| 17 | +# --device=/dev/infiniband --cap-add=IPC_LOCK \ |
| 18 | +# -v /raid/catalyst/models:/models -e HF_HOME=/models \ |
| 19 | +# -it vortex-torch:pd-0.5.9 |
| 20 | +# # then: bash marks/pd/run_p1d1.sh (mooncake backend, --disaggregation-ib-device mlx5_0) |
| 21 | + |
| 22 | +# CUDA 12.9 matches sglang 0.5.9's pins (cuda-python==12.9, torch==2.9.1). |
| 23 | +ARG CUDA_VERSION=12.9.1 |
| 24 | +FROM nvidia/cuda:${CUDA_VERSION}-cudnn-devel-ubuntu24.04 |
| 25 | + |
| 26 | +ENV DEBIAN_FRONTEND=noninteractive |
| 27 | + |
| 28 | +# Hopper=9.0, Blackwell=10.0. Override at build time as needed. |
| 29 | +ARG TORCH_CUDA_ARCH_LIST="9.0" |
| 30 | +ENV TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" |
| 31 | + |
| 32 | +SHELL ["/bin/bash", "-c"] |
| 33 | + |
| 34 | +# --- system deps: toolchain + libnuma + RDMA/InfiniBand userspace (for Mooncake) --- |
| 35 | +RUN apt-get update && apt-get install -y --no-install-recommends \ |
| 36 | + python3 \ |
| 37 | + python3-dev \ |
| 38 | + python3-pip \ |
| 39 | + python3-venv \ |
| 40 | + build-essential \ |
| 41 | + ca-certificates \ |
| 42 | + cmake \ |
| 43 | + curl \ |
| 44 | + git \ |
| 45 | + ninja-build \ |
| 46 | + wget \ |
| 47 | + libnuma1 \ |
| 48 | + libnuma-dev \ |
| 49 | + # InfiniBand / RDMA userspace — required by the Mooncake transfer engine |
| 50 | + rdma-core \ |
| 51 | + libibverbs-dev \ |
| 52 | + libibverbs1 \ |
| 53 | + libibumad3 \ |
| 54 | + librdmacm1 \ |
| 55 | + ibverbs-providers \ |
| 56 | + infiniband-diags \ |
| 57 | + perftest \ |
| 58 | + && ln -sf /usr/bin/python3 /usr/bin/python \ |
| 59 | + && rm -rf /var/lib/apt/lists/* |
| 60 | + |
| 61 | +# Isolated venv (Ubuntu 24.04 is PEP-668 externally-managed). |
| 62 | +RUN python3 -m venv /opt/venv |
| 63 | +ENV PATH="/opt/venv/bin:${PATH}" |
| 64 | +ENV VIRTUAL_ENV="/opt/venv" |
| 65 | +RUN python -m pip install --upgrade pip setuptools wheel |
| 66 | + |
| 67 | +ARG VORTEX_TORCH_REF=v0.5 |
| 68 | +ARG MOONCAKE_VERSION=0.3.9 |
| 69 | + |
| 70 | +WORKDIR /workspace |
| 71 | + |
| 72 | +# The PD-disaggregation support (server-args overlap-schedule force, the |
| 73 | +# decode-side rebuild_aux hook, the page-major get_contiguous_buf_infos) lives |
| 74 | +# in the vendored sglang on this branch — make sure VORTEX_TORCH_REF is pushed. |
| 75 | +RUN git clone -b "${VORTEX_TORCH_REF}" --recursive \ |
| 76 | + https://github.com/Infini-AI-Lab/vortex_torch.git |
| 77 | + |
| 78 | +# --- sglang v0.5.9 (vendored): editable install of its python package --- |
| 79 | +# (v0.5.9 has no install.sh; pulls torch==2.9.1, flashinfer==0.6.3, |
| 80 | +# sgl-kernel==0.3.21 as wheels.) |
| 81 | +WORKDIR /workspace/vortex_torch/third_party/sglang/v0.5.9/sglang |
| 82 | +RUN pip install --no-cache-dir -e "python" |
| 83 | + |
| 84 | +# --- vortex_torch (pure Python + Triton JIT; no compiled C extension) --- |
| 85 | +WORKDIR /workspace/vortex_torch |
| 86 | +RUN pip install --no-cache-dir -e . |
| 87 | + |
| 88 | +# --- Mooncake transfer engine (KV transport for PD disaggregation) --- |
| 89 | +# CUDA 12.x → pip wheel. (CUDA>=13 would need a from-source build.) |
| 90 | +RUN pip install --no-cache-dir "mooncake-transfer-engine==${MOONCAKE_VERSION}" |
| 91 | + |
| 92 | +# --- sanity checks --- |
| 93 | +RUN which python && python --version && which pip && pip --version |
| 94 | +RUN python - <<'PY' |
| 95 | +import ctypes |
| 96 | +for lib in ("libnuma.so.1", "libibverbs.so.1", "librdmacm.so.1"): |
| 97 | + ctypes.CDLL(lib) |
| 98 | + print(f"OK: {lib} loaded") |
| 99 | +import sglang, vortex_torch |
| 100 | +print("OK: import sglang", getattr(sglang, "__version__", "?")) |
| 101 | +print("OK: import vortex_torch") |
| 102 | +from mooncake.engine import TransferEngine # mooncake transport entrypoint |
| 103 | +print("OK: mooncake TransferEngine importable") |
| 104 | +PY |
| 105 | + |
| 106 | +CMD ["/bin/bash"] |
0 commit comments