Skip to content

Commit 578c8c8

Browse files
d-goldinCertainLach
authored andcommitted
vllm: cuda update for 0.19
- Bumping triton to a newer version, the older one didn't work for me with 0.17 - Drops quarck-kernels and cuteDSL from dependencies. From what I can tell those are only needed for FA4 and would also require some nvidia blobs. We are at FA2 right now, so this shouldn't remove any functionality that was present before - Adding NCCL to wrapper args, for better UX
1 parent d5eeed5 commit 578c8c8

2 files changed

Lines changed: 22 additions & 2 deletions

File tree

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
2+
index 22477dc82..84fe34730 100644
3+
--- a/requirements/cuda.txt
4+
+++ b/requirements/cuda.txt
5+
@@ -14,7 +14,3 @@
6+
# Cap nvidia-cudnn-frontend (transitive dep of flashinfer) due to
7+
# breaking changes in 1.19.0
8+
nvidia-cudnn-frontend>=1.13.0,<1.19.0
9+
-
10+
-# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
11+
-nvidia-cutlass-dsl>=4.4.0.dev1
12+
-quack-kernels>=0.2.7

pkgs/development/python-modules/vllm/default.nix

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,8 @@ let
177177
triton-kernels = fetchFromGitHub {
178178
owner = "triton-lang";
179179
repo = "triton";
180-
tag = "v3.5.0";
181-
hash = "sha256-F6T0n37Lbs+B7UHNYzoIQHjNNv3TcMtoXjNrT8ZUlxY=";
180+
tag = "v3.6.0";
181+
hash = "sha256-JFSpQn+WsNnh7CAPlcpOcUp0nyKXNbJEANdXqmkt4Tc=";
182182
};
183183

184184
# grep for GIT_TAG in the following file
@@ -353,6 +353,9 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: {
353353
./0003-propagate-pythonpath.patch
354354
./0005-drop-intel-reqs.patch
355355
./0006-drop-rocm-extra-reqs.patch
356+
# QuACK and Cutlass DSL seem to be added only for FA4
357+
# which in our case handles its own deps
358+
./0007-drop-quack-reqs.patch
356359
];
357360

358361
postPatch = ''
@@ -579,6 +582,11 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: {
579582
pythonRelaxDeps = true;
580583

581584
pythonImportsCheck = [ "vllm" ];
585+
makeWrapperArgs = lib.optionals cudaSupport [
586+
"--set"
587+
"VLLM_NCCL_SO_PATH"
588+
"${cudaPackages.nccl}/lib/libnccl.so"
589+
];
582590

583591
passthru = {
584592
# make internal dependency available to overlays

0 commit comments

Comments
 (0)