python3Packages.vllm: 0.17 update: updating CUDA support

d-goldin · d-goldin · commit efda726ecfaf · 2026-03-28T15:02:13.000+01:00
- Bumping triton to a newer version, the older one didn't
  work for me with 0.17
- Drops quarck-kernels and cuteDSL from dependencies.
  From what I can tell those are only needed for FA4
  and would also require some nvidia blobs. We are at FA2
  right now, so this shouldn't remove any functionality
  that was present before
- Adding NCCL to wrapper args, for better UX
diff --git a/pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch b/pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch
@@ -0,0 +1,12 @@
+diff --git a/requirements/cuda.txt b/requirements/cuda.txt
+index 22477dc82..84fe34730 100644
+--- a/requirements/cuda.txt
++++ b/requirements/cuda.txt
+@@ -11,7 +11,3 @@ torchaudio==2.10.0
+ torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+ # FlashInfer should be updated together with the Dockerfile
+ flashinfer-python==0.6.4
+-
+-# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
+-nvidia-cutlass-dsl>=4.4.0.dev1
+-quack-kernels>=0.2.7
diff --git a/pkgs/development/python-modules/vllm/default.nix b/pkgs/development/python-modules/vllm/default.nix
@@ -178,8 +178,8 @@ let
   triton-kernels = fetchFromGitHub {
     owner = "triton-lang";
     repo = "triton";
-    tag = "v3.5.0";
-    hash = "sha256-F6T0n37Lbs+B7UHNYzoIQHjNNv3TcMtoXjNrT8ZUlxY=";
+    tag = "v3.6.0";
+    hash = "sha256-JFSpQn+WsNnh7CAPlcpOcUp0nyKXNbJEANdXqmkt4Tc=";
   };
 
   # grep for GIT_TAG in the following file
@@ -354,6 +354,9 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: {
     ./0003-propagate-pythonpath.patch
     ./0005-drop-intel-reqs.patch
     ./0006-drop-rocm-extra-reqs.patch
+    # QuACK and Cutlass DSL seem to be added only for FA4
+    # which in our case handles its own deps
+    ./0007-drop-quack-reqs.patch
   ];
 
   postPatch = ''
@@ -582,6 +585,11 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: {
   pythonRelaxDeps = true;
 
   pythonImportsCheck = [ "vllm" ];
+  makeWrapperArgs = lib.optionals cudaSupport [
+    "--set"
+    "VLLM_NCCL_SO_PATH"
+    "${cudaPackages.nccl}/lib/libnccl.so"
+  ];
 
   passthru = {
     # make internal dependency available to overlays