Skip to content

Commit efda726

Browse files
committed
python3Packages.vllm: 0.17 update: updating CUDA support
- Bumping triton to a newer version, the older one didn't work for me with 0.17 - Drops quarck-kernels and cuteDSL from dependencies. From what I can tell those are only needed for FA4 and would also require some nvidia blobs. We are at FA2 right now, so this shouldn't remove any functionality that was present before - Adding NCCL to wrapper args, for better UX
1 parent 8ced2cf commit efda726

2 files changed

Lines changed: 22 additions & 2 deletions

File tree

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
2+
index 22477dc82..84fe34730 100644
3+
--- a/requirements/cuda.txt
4+
+++ b/requirements/cuda.txt
5+
@@ -11,7 +11,3 @@ torchaudio==2.10.0
6+
torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
7+
# FlashInfer should be updated together with the Dockerfile
8+
flashinfer-python==0.6.4
9+
-
10+
-# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
11+
-nvidia-cutlass-dsl>=4.4.0.dev1
12+
-quack-kernels>=0.2.7

pkgs/development/python-modules/vllm/default.nix

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ let
178178
triton-kernels = fetchFromGitHub {
179179
owner = "triton-lang";
180180
repo = "triton";
181-
tag = "v3.5.0";
182-
hash = "sha256-F6T0n37Lbs+B7UHNYzoIQHjNNv3TcMtoXjNrT8ZUlxY=";
181+
tag = "v3.6.0";
182+
hash = "sha256-JFSpQn+WsNnh7CAPlcpOcUp0nyKXNbJEANdXqmkt4Tc=";
183183
};
184184

185185
# grep for GIT_TAG in the following file
@@ -354,6 +354,9 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: {
354354
./0003-propagate-pythonpath.patch
355355
./0005-drop-intel-reqs.patch
356356
./0006-drop-rocm-extra-reqs.patch
357+
# QuACK and Cutlass DSL seem to be added only for FA4
358+
# which in our case handles its own deps
359+
./0007-drop-quack-reqs.patch
357360
];
358361

359362
postPatch = ''
@@ -582,6 +585,11 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: {
582585
pythonRelaxDeps = true;
583586

584587
pythonImportsCheck = [ "vllm" ];
588+
makeWrapperArgs = lib.optionals cudaSupport [
589+
"--set"
590+
"VLLM_NCCL_SO_PATH"
591+
"${cudaPackages.nccl}/lib/libnccl.so"
592+
];
585593

586594
passthru = {
587595
# make internal dependency available to overlays

0 commit comments

Comments
 (0)