better wf and explanation

anakin87 · anakin87 · commit 4dc67947e855 · 2026-04-03T11:18:51.000+02:00
diff --git a/.github/workflows/vllm.yml b/.github/workflows/vllm.yml
@@ -76,10 +76,17 @@ jobs:
 
       - name: Install vLLM (CPU)
         run: |
+          # vLLM on PyPI is GPU-only and requires CUDA, so it won't run on CPU-only systems.
+          # CPU wheels are not published to PyPI; they are only available as direct downloads from GitHub releases.
+          # We fetch the latest release and install the appropriate x86 CPU wheel.
+          # The --torch-backend cpu flag ensures uv installs PyTorch from the official CPU-only index,
+          # since the required torch+cpu builds are also not available on PyPI.
           VLLM_VERSION="$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq -r .tag_name | sed 's/^v//')"
           export VLLM_VERSION
           echo "Installing vLLM ${VLLM_VERSION} (CPU)"
-          hatch run -- uv pip install "https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl" --extra-index-url https://download.pytorch.org/whl/cpu --index-strategy unsafe-best-match
+          hatch run -- uv pip install \
+            "https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl" \
+            --torch-backend cpu
 
       - name: Start vLLM server
         env:
@@ -94,8 +101,8 @@ jobs:
             --enable-auto-tool-choice \
             --tool-call-parser hermes &
 
-          # Wait for the vLLM server to be ready with a timeout of 600 seconds
-          timeout=600
+          # Wait for the vLLM server to be ready with a timeout of 300 seconds
+          timeout=300
           while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:8000/health > /dev/null 2>&1; do
             echo "Waiting for vLLM server to start..."
             sleep 10
diff --git a/integrations/vllm/pyproject.toml b/integrations/vllm/pyproject.toml
@@ -23,7 +23,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai", "openai"]
+dependencies = ["haystack-ai>=2.23.0", "openai"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/vllm#readme"

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@ classifiers = [`
`23`	`23`	`"Programming Language :: Python :: Implementation :: CPython",`
`24`	`24`	`"Programming Language :: Python :: Implementation :: PyPy",`
`25`	`25`	`]`
`26`		`-dependencies = ["haystack-ai", "openai"]`
	`26`	`+dependencies = ["haystack-ai>=2.23.0", "openai"]`
`27`	`27`
`28`	`28`	`[project.urls]`
`29`	`29`	`Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/vllm#readme"`