From efda726ecfaf642ef2030e963cbb140156b6eb99 Mon Sep 17 00:00:00 2001 From: Dima Date: Fri, 27 Mar 2026 14:22:59 +0100 Subject: [PATCH] python3Packages.vllm: 0.17 update: updating CUDA support - Bumping triton to a newer version, the older one didn't work for me with 0.17 - Drops quarck-kernels and cuteDSL from dependencies. From what I can tell those are only needed for FA4 and would also require some nvidia blobs. We are at FA2 right now, so this shouldn't remove any functionality that was present before - Adding NCCL to wrapper args, for better UX --- .../python-modules/vllm/0007-drop-quack-reqs.patch | 12 ++++++++++++ pkgs/development/python-modules/vllm/default.nix | 12 ++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch diff --git a/pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch b/pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch new file mode 100644 index 0000000000000..a6c484f5a67bf --- /dev/null +++ b/pkgs/development/python-modules/vllm/0007-drop-quack-reqs.patch @@ -0,0 +1,12 @@ +diff --git a/requirements/cuda.txt b/requirements/cuda.txt +index 22477dc82..84fe34730 100644 +--- a/requirements/cuda.txt ++++ b/requirements/cuda.txt +@@ -11,7 +11,3 @@ torchaudio==2.10.0 + torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version + # FlashInfer should be updated together with the Dockerfile + flashinfer-python==0.6.4 +- +-# QuACK and Cutlass DSL for FA4 (cute-DSL implementation) +-nvidia-cutlass-dsl>=4.4.0.dev1 +-quack-kernels>=0.2.7 diff --git a/pkgs/development/python-modules/vllm/default.nix b/pkgs/development/python-modules/vllm/default.nix index 4986d383e5c1b..2a1bf03ee1f3d 100644 --- a/pkgs/development/python-modules/vllm/default.nix +++ b/pkgs/development/python-modules/vllm/default.nix @@ -178,8 +178,8 @@ let triton-kernels = fetchFromGitHub { owner = "triton-lang"; repo = "triton"; - tag = "v3.5.0"; - hash = "sha256-F6T0n37Lbs+B7UHNYzoIQHjNNv3TcMtoXjNrT8ZUlxY="; + tag = "v3.6.0"; + hash = "sha256-JFSpQn+WsNnh7CAPlcpOcUp0nyKXNbJEANdXqmkt4Tc="; }; # grep for GIT_TAG in the following file @@ -354,6 +354,9 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: { ./0003-propagate-pythonpath.patch ./0005-drop-intel-reqs.patch ./0006-drop-rocm-extra-reqs.patch + # QuACK and Cutlass DSL seem to be added only for FA4 + # which in our case handles its own deps + ./0007-drop-quack-reqs.patch ]; postPatch = '' @@ -582,6 +585,11 @@ buildPythonPackage.override { stdenv = torch.stdenv; } (finalAttrs: { pythonRelaxDeps = true; pythonImportsCheck = [ "vllm" ]; + makeWrapperArgs = lib.optionals cudaSupport [ + "--set" + "VLLM_NCCL_SO_PATH" + "${cudaPackages.nccl}/lib/libnccl.so" + ]; passthru = { # make internal dependency available to overlays