vllm: cuda update for 0.19

d-goldin · CertainLach · commit 578c8c821c81 · 2026-04-24T02:24:03.000+02:00
- Bumping triton to a newer version, the older one didn't
  work for me with 0.17
- Drops quarck-kernels and cuteDSL from dependencies.
  From what I can tell those are only needed for FA4
  and would also require some nvidia blobs. We are at FA2
  right now, so this shouldn't remove any functionality
  that was present before
- Adding NCCL to wrapper args, for better UX
diff --git a/pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch b/pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch
@@ -0,0 +1,12 @@
+diff --git a/requirements/cuda.txt b/requirements/cuda.txt
+index 22477dc82..84fe34730 100644
+--- a/requirements/cuda.txt
++++ b/requirements/cuda.txt
+@@ -14,7 +14,3 @@
+ # Cap nvidia-cudnn-frontend (transitive dep of flashinfer) due to
+ # breaking changes in 1.19.0
+ nvidia-cudnn-frontend>=1.13.0,<1.19.0
+-
+-# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
+-nvidia-cutlass-dsl>=4.4.0.dev1
+-quack-kernels>=0.2.7
diff --git a/pkgs/development/python-modules/vllm/default.nix b/pkgs/development/python-modules/vllm/default.nix
@@ -177,8 +177,8 @@ let
   triton-kernels = fetchFromGitHub {
     owner = "triton-lang";
     repo = "triton";
-    tag = "v3.5.0";
-    hash = "sha256-F6T0n37Lbs+B7UHNYzoIQHjNNv3TcMtoXjNrT8ZUlxY=";
+    tag = "v3.6.0";
+    hash = "sha256-JFSpQn+WsNnh7CAPlcpOcUp0nyKXNbJEANdXqmkt4Tc=";
   };
 
   # grep for GIT_TAG in the following file
@@ -353,6 +353,9 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: {
     ./0003-propagate-pythonpath.patch
     ./0005-drop-intel-reqs.patch
     ./0006-drop-rocm-extra-reqs.patch
+    # QuACK and Cutlass DSL seem to be added only for FA4
+    # which in our case handles its own deps
+    ./0007-drop-quack-reqs.patch
   ];
 
   postPatch = ''
@@ -579,6 +582,11 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: {
   pythonRelaxDeps = true;
 
   pythonImportsCheck = [ "vllm" ];
+  makeWrapperArgs = lib.optionals cudaSupport [
+    "--set"
+    "VLLM_NCCL_SO_PATH"
+    "${cudaPackages.nccl}/lib/libnccl.so"
+  ];
 
   passthru = {
     # make internal dependency available to overlays