From efda726ecfaf642ef2030e963cbb140156b6eb99 Mon Sep 17 00:00:00 2001
From: Dima <dgoldin+github@protonmail.ch>
Date: Fri, 27 Mar 2026 14:22:59 +0100
Subject: [PATCH] python3Packages.vllm: 0.17 update: updating CUDA support

- Bumping triton to a newer version, the older one didn't
  work for me with 0.17
- Drops quarck-kernels and cuteDSL from dependencies.
  From what I can tell those are only needed for FA4
  and would also require some nvidia blobs. We are at FA2
  right now, so this shouldn't remove any functionality
  that was present before
- Adding NCCL to wrapper args, for better UX
---
 .../python-modules/vllm/0007-drop-quack-reqs.patch   | 12 ++++++++++++
 pkgs/development/python-modules/vllm/default.nix     | 12 ++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch

diff --git a/pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch b/pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch
new file mode 100644
index 0000000000000..a6c484f5a67bf
--- /dev/null
+++ b/pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch
@@ -0,0 +1,12 @@
+diff --git a/requirements/cuda.txt b/requirements/cuda.txt
+index 22477dc82..84fe34730 100644
+--- a/requirements/cuda.txt
++++ b/requirements/cuda.txt
+@@ -11,7 +11,3 @@ torchaudio==2.10.0
+ torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
+ # FlashInfer should be updated together with the Dockerfile
+ flashinfer-python==0.6.4
+-
+-# QuACK and Cutlass DSL for FA4 (cute-DSL implementation)
+-nvidia-cutlass-dsl>=4.4.0.dev1
+-quack-kernels>=0.2.7
diff --git a/pkgs/development/python-modules/vllm/default.nix b/pkgs/development/python-modules/vllm/default.nix
index 4986d383e5c1b..2a1bf03ee1f3d 100644
--- a/pkgs/development/python-modules/vllm/default.nix
+++ b/pkgs/development/python-modules/vllm/default.nix
@@ -178,8 +178,8 @@ let
   triton-kernels = fetchFromGitHub {
     owner = "triton-lang";
     repo = "triton";
-    tag = "v3.5.0";
-    hash = "sha256-F6T0n37Lbs+B7UHNYzoIQHjNNv3TcMtoXjNrT8ZUlxY=";
+    tag = "v3.6.0";
+    hash = "sha256-JFSpQn+WsNnh7CAPlcpOcUp0nyKXNbJEANdXqmkt4Tc=";
   };
 
   # grep for GIT_TAG in the following file
@@ -354,6 +354,9 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: {
     ./0003-propagate-pythonpath.patch
     ./0005-drop-intel-reqs.patch
     ./0006-drop-rocm-extra-reqs.patch
+    # QuACK and Cutlass DSL seem to be added only for FA4
+    # which in our case handles its own deps
+    ./0007-drop-quack-reqs.patch
   ];
 
   postPatch = ''
@@ -582,6 +585,11 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: {
   pythonRelaxDeps = true;
 
   pythonImportsCheck = [ "vllm" ];
+  makeWrapperArgs = lib.optionals cudaSupport [
+    "--set"
+    "VLLM_NCCL_SO_PATH"
+    "${cudaPackages.nccl}/lib/libnccl.so"
+  ];
 
   passthru = {
     # make internal dependency available to overlays