Skip to content

Commit c0f5966

Browse files
authored
Merge pull request #152 from coreweave/es/sccache-vllm
build(vllm-tensorizer): Integrate `sccache` & fix `flashinfer` build
2 parents 51eae7a + 59c30ba commit c0f5966

6 files changed

Lines changed: 59 additions & 167 deletions

File tree

.github/workflows/vllm-tensorizer.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ jobs:
2222
image-name: vllm-tensorizer
2323
folder: vllm-tensorizer
2424
tag-suffix: ${{ matrix.vllm-commit }}
25+
build-contexts: |
26+
common=common
27+
object-storage-secrets: true
2528
build-args: |
2629
VLLM_COMMIT=${{ matrix.vllm-commit }}
2730
FLASHINFER_COMMIT=${{ matrix.flashinfer-commit }}

torch-extras/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ ENV CC=/opt/sccache-cc.sh \
160160
# so incremental build dependency tracking has no value anyway.
161161
ENV TORCH_EXTENSION_SKIP_NVCC_GEN_DEPENDENCIES=1
162162

163-
COPY --link --from=torch-common --chmod=755 nvcc-wrapper.py /build/nvcc-wrapper.py
163+
COPY --link --from=common --chmod=755 nvcc-wrapper.py /build/nvcc-wrapper.py
164164
ENV PYTORCH_NVCC='/build/nvcc-wrapper.py' \
165165
CMAKE_CUDA_COMPILER='/build/nvcc-wrapper.py' \
166166
CUDACXX='/build/nvcc-wrapper.py'

torch/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -421,7 +421,7 @@ RUN FLAGS="$BUILD_NVCC_APPEND_FLAGS" && \
421421
"${FLAGS:+ $FLAGS}" && echo; \
422422
} > /build/nvcc.conf
423423

424-
COPY --link --from=torch-common --chmod=755 nvcc-wrapper.py /build/nvcc-wrapper.py
424+
COPY --link --from=common --chmod=755 nvcc-wrapper.py /build/nvcc-wrapper.py
425425
ENV PYTORCH_NVCC='/build/nvcc-wrapper.py' \
426426
CMAKE_CUDA_COMPILER='/build/nvcc-wrapper.py' \
427427
CUDACXX='/build/nvcc-wrapper.py'

vllm-tensorizer/Dockerfile

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
1+
# syntax=docker/dockerfile:1.10
12
ARG BUILDER_BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch:17ad6db-nccl-cuda12.9.1-ubuntu22.04-nccl2.29.2-1-torch2.10.0-vision0.25.0-audio2.10.0-abi1"
23
ARG FINAL_BASE_IMAGE="ghcr.io/coreweave/ml-containers/torch:17ad6db-nccl-cuda12.9.1-ubuntu22.04-nccl2.29.2-1-torch2.10.0-vision0.25.0-audio2.10.0-abi1"
4+
ARG SCCACHE_VERSION="0.14.0"
5+
6+
FROM alpine/curl:8.17.0 AS sccache-downloader
7+
ARG SCCACHE_VERSION
8+
RUN ARCH=$(uname -m) && \
9+
curl -fsSL "https://github.com/mozilla/sccache/releases/download/v${SCCACHE_VERSION}/sccache-v${SCCACHE_VERSION}-${ARCH}-unknown-linux-musl.tar.gz" \
10+
| tar xz --strip-components=1 -C /opt/ "sccache-v${SCCACHE_VERSION}-${ARCH}-unknown-linux-musl/sccache" && \
11+
chmod 755 /opt/sccache
312

413
FROM scratch AS freezer
514
WORKDIR /
@@ -25,9 +34,19 @@ WORKDIR /workspace
2534
RUN --mount=type=bind,from=freezer,target=/tmp/frozen \
2635
/tmp/frozen/freeze.sh torch torchaudio torchvision xformers > /opt/constraints.txt
2736

28-
COPY --link --chmod=755 nvcc-wrapper.py /opt/nvcc-wrapper.py
37+
COPY --link --from=common --chmod=755 nvcc-wrapper.py /opt/nvcc-wrapper.py
2938
ENV PYTORCH_NVCC='/opt/nvcc-wrapper.py' \
30-
CMAKE_CUDA_COMPILER='/opt/nvcc-wrapper.py'
39+
CUDACXX='/opt/nvcc-wrapper.py'
40+
41+
# Setup for sccache and its wrappers.
42+
COPY --link --from=sccache-downloader /opt/sccache /opt/sccache
43+
COPY --link --from=common --chmod=755 sccache*.sh /opt/
44+
COPY --link --from=common sccache.toml /etc/sccache.toml
45+
RUN sed -Ei 's@^(key_prefix.*)misc@\1vllm-tensorizer@' /etc/sccache.toml
46+
ENV SCCACHE_CONF=/etc/sccache.toml
47+
ENV CC=/opt/sccache-cc.sh \
48+
CXX=/opt/sccache-c++.sh
49+
ENV TORCH_EXTENSION_SKIP_NVCC_GEN_DEPENDENCIES=1
3150

3251
ARG TARGETPLATFORM
3352
# Switch 9.0, 10.0, and 12.0 to -a variants; preserve originals for PTX
@@ -105,7 +124,13 @@ RUN git clone --filter=tree:0 --no-single-branch --no-checkout \
105124

106125
FROM builder-base AS vllm-builder
107126
RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw \
127+
--mount=type=secret,id=s3_access_key_id,env=AWS_ACCESS_KEY_ID \
128+
--mount=type=secret,id=s3_secret_access_key,env=AWS_SECRET_ACCESS_KEY \
129+
--mount=type=tmpfs,target=/sccache \
130+
--mount=type=tmpfs,target=/tmp \
131+
. /opt/sccache-start.sh && \
108132
. /opt/arch_flags.sh && \
133+
export CMAKE_ARGS='-DCMAKE_CUDA_COMPILER=/opt/nvcc-wrapper.py' && \
109134
if [ -z "$MAX_JOBS" ]; then unset MAX_JOBS; fi && \
110135
python3 -m pip install --no-cache-dir py-cpuinfo 'cmake>=3.26.1,<4' grpcio-tools && \
111136
if [ -f 'use_existing_torch.py' ]; then \
@@ -115,7 +140,6 @@ RUN --mount=type=bind,from=vllm-downloader,source=/git/vllm,target=/workspace,rw
115140
e489ad7a210f4234db696d1f2749d5f3662fa65b:use_existing_torch.py \
116141
| python3 -; \
117142
fi && \
118-
USE_CUDNN=1 USE_CUSPARSELT=1 \
119143
LIBRARY_PATH="/usr/local/cuda/lib64:${LIBRARY_PATH:+:$LIBRARY_PATH}" \
120144
CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \
121145
VLLM_MAIN_CUDA_VERSION="${CUDA_VERSION%.*}" \
@@ -126,14 +150,19 @@ WORKDIR /wheels
126150

127151
FROM builder-base AS flashinfer-builder
128152
RUN --mount=type=bind,from=flashinfer-downloader,source=/git/flashinfer,target=/workspace,rw \
153+
--mount=type=secret,id=s3_access_key_id,env=AWS_ACCESS_KEY_ID \
154+
--mount=type=secret,id=s3_secret_access_key,env=AWS_SECRET_ACCESS_KEY \
155+
--mount=type=tmpfs,target=/sccache \
156+
--mount=type=tmpfs,target=/tmp \
157+
. /opt/sccache-start.sh && \
129158
. /opt/arch_flags.sh && \
130159
export TORCH_CUDA_ARCH_LIST="$(echo "${TORCH_CUDA_ARCH_LIST}" | sed 's@[67]\.0 \+@@g')" && \
131160
[ -n "${CUDA_VERSION}" ] && \
132161
python3 -m pip install --no-cache-dir \
133162
requests nvidia-ml-py ninja tqdm filelock \
134163
'nvidia-cudnn-frontend>=1.13.0' \
135164
"cuda-python~=${CUDA_VERSION}" \
136-
"nvidia-nvshmem-cu${CUDA_VERSION%%.*}" \
165+
"nvidia-nvshmem-cu${CUDA_VERSION%%.*}<3.6" \
137166
'apache-tvm-ffi>=0.1,<0.2' && \
138167
export FLASHINFER_LOCAL_VERSION="$(sed -E 's@([[:digit:]]+)\.([[:digit:]]+).*$@cu\1\2@')" \
139168
FLASHINFER_AOT_USE_PY_LIMITED_API='0' \
@@ -149,6 +178,11 @@ WORKDIR /wheels
149178
FROM builder-base AS lmcache-builder
150179
# LMCache must be built from source as it doesn't have pre-built ARM binaries
151180
RUN --mount=type=bind,from=lmcache-downloader,source=/git/LMCache,target=/workspace,rw \
181+
--mount=type=secret,id=s3_access_key_id,env=AWS_ACCESS_KEY_ID \
182+
--mount=type=secret,id=s3_secret_access_key,env=AWS_SECRET_ACCESS_KEY \
183+
--mount=type=tmpfs,target=/sccache \
184+
--mount=type=tmpfs,target=/tmp \
185+
. /opt/sccache-start.sh && \
152186
. /opt/arch_flags.sh && \
153187
python3 -m pip install --no-cache-dir \
154188
'setuptools>=77.0.3,<81.0.0' \
@@ -163,6 +197,11 @@ FROM builder-base AS infinistore-builder
163197
# InfiniStore is required when installing LMCache
164198
# It must also be built from source as it also doesn't have pre-built ARM binaries
165199
RUN --mount=type=bind,from=infinistore-downloader,source=/git/InfiniStore,target=/workspace,rw \
200+
--mount=type=secret,id=s3_access_key_id,env=AWS_ACCESS_KEY_ID \
201+
--mount=type=secret,id=s3_secret_access_key,env=AWS_SECRET_ACCESS_KEY \
202+
--mount=type=tmpfs,target=/sccache \
203+
--mount=type=tmpfs,target=/tmp \
204+
. /opt/sccache-start.sh && \
166205
apt-get -qq update && \
167206
apt-get -q install --no-install-recommends --no-upgrade -y \
168207
libuv1-dev libflatbuffers-dev libspdlog-dev \
@@ -175,6 +214,11 @@ RUN --mount=type=bind,from=infinistore-downloader,source=/git/InfiniStore,target
175214

176215
FROM builder-base AS deepgemm-builder
177216
RUN --mount=type=bind,from=deepgemm-downloader,source=/git/DeepGEMM,target=/workspace,rw \
217+
--mount=type=secret,id=s3_access_key_id,env=AWS_ACCESS_KEY_ID \
218+
--mount=type=secret,id=s3_secret_access_key,env=AWS_SECRET_ACCESS_KEY \
219+
--mount=type=tmpfs,target=/sccache \
220+
--mount=type=tmpfs,target=/tmp \
221+
. /opt/sccache-start.sh && \
178222
. /opt/arch_flags.sh && \
179223
/opt/build.sh
180224

@@ -188,7 +232,12 @@ RUN apt-get -qq update && \
188232
ARG NIXL_TAG='0.2.0'
189233
ARG NIXL_UCX_HOME='/opt/hpcx/ucx'
190234

191-
RUN mkdir /tmp/nixl && \
235+
RUN --mount=type=secret,id=s3_access_key_id,env=AWS_ACCESS_KEY_ID \
236+
--mount=type=secret,id=s3_secret_access_key,env=AWS_SECRET_ACCESS_KEY \
237+
--mount=type=tmpfs,target=/sccache \
238+
--mount=type=tmpfs,target=/tmp \
239+
. /opt/sccache-start.sh && \
240+
mkdir /tmp/nixl && \
192241
cd /tmp/nixl && \
193242
wget "https://github.com/ai-dynamo/nixl/archive/refs/tags/${NIXL_TAG}.tar.gz" -qO- \
194243
| tar --strip-components=1 -xzf - && \

vllm-tensorizer/nvcc-wrapper.py

Lines changed: 0 additions & 160 deletions
This file was deleted.

0 commit comments

Comments
 (0)