feat: add VLLMRanker (#3174)

anakin87 · bogdankostic · web-flow · commit 4a15b8be0db8 · 2026-04-17T14:48:15.000+02:00
* feat: add VLLMRanker

* better links, better types

* readme

* Update integrations/vllm/src/haystack_integrations/components/rankers/vllm/ranker.py

Co-authored-by: bogdankostic &lt;bogdankostic@web.de&gt;

* fix

---------

Co-authored-by: bogdankostic &lt;bogdankostic@web.de&gt;
diff --git a/.github/workflows/vllm.yml b/.github/workflows/vllm.yml
@@ -31,6 +31,9 @@ env:
   FORCE_COLOR: "1"
   VLLM_MODEL: "Qwen/Qwen3-0.6B"
   VLLM_EMBEDDING_MODEL: "sentence-transformers/all-MiniLM-L6-v2"
+  VLLM_RANKER_MODEL: "BAAI/bge-reranker-base"
+  VLLM_TARGET_DEVICE: "cpu"
+  VLLM_CPU_KVCACHE_SPACE: "4"
   # we only test on Ubuntu to keep vLLM server running simple
   TEST_MATRIX_OS: '["ubuntu-latest"]'
   # vLLM is not compatible with Python 3.14. https://github.com/vllm-project/vllm/issues/34096
@@ -90,9 +93,6 @@ jobs:
             --torch-backend cpu
 
       - name: Start vLLM chat server
-        env:
-          VLLM_TARGET_DEVICE: "cpu"
-          VLLM_CPU_KVCACHE_SPACE: "4"
         run: |
           nohup hatch run -- vllm serve ${{ env.VLLM_MODEL }} \
             --port 8000 \
@@ -120,9 +120,6 @@ jobs:
           echo "vLLM chat server started successfully."
 
       - name: Start vLLM embedding server
-        env:
-          VLLM_TARGET_DEVICE: "cpu"
-          VLLM_CPU_KVCACHE_SPACE: "4"
         run: |
           nohup hatch run -- vllm serve ${{ env.VLLM_EMBEDDING_MODEL }} \
             --port 8001 \
@@ -144,6 +141,27 @@ jobs:
 
           echo "vLLM embedding server started successfully."
 
+      - name: Start vLLM ranker server
+        run: |
+          nohup hatch run -- vllm serve ${{ env.VLLM_RANKER_MODEL }} \
+            --port 8002 \
+            --enforce-eager \
+            --max-num-seqs 1 &
+
+          # Wait for the vLLM ranker server to be ready with a timeout of 300 seconds
+          timeout=300
+          while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:8002/health > /dev/null 2>&1; do
+            echo "Waiting for vLLM ranker server to start..."
+            sleep 10
+            ((timeout-=10))
+          done
+
+          if [ $timeout -eq 0 ]; then
+            echo "Timed out waiting for vLLM ranker server to start."
+            exit 1
+          fi
+
+          echo "vLLM ranker server started successfully."
       - name: Lint
         if: matrix.python-version == '3.10' && runner.os == 'Linux'
         run: hatch run fmt-check && hatch run test:types
diff --git a/README.md b/README.md
@@ -78,7 +78,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
 | [togetherai-haystack](integrations/togetherai/)                         | Generator         | [![PyPI - Version](https://img.shields.io/pypi/v/togetherai-haystack.svg)](https://pypi.org/project/togetherai-haystack)                                 | [![Test / togetherai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml)                                     | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai-combined/htmlcov/index.html) |
 | [unstructured-fileconverter-haystack](integrations/unstructured/)       | File converter              | [![PyPI - Version](https://img.shields.io/pypi/v/unstructured-fileconverter-haystack.svg)](https://pypi.org/project/unstructured-fileconverter-haystack) | [![Test / unstructured](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured.yml)                               | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-unstructured/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-unstructured/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-unstructured-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-unstructured-combined/htmlcov/index.html) |
 | [valkey-haystack](integrations/valkey/)                                 | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/valkey-haystack.svg)](https://pypi.org/project/valkey-haystack)                                         | [![Test / valkey](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/valkey.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/valkey.yml)                                                 | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-valkey/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-valkey/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-valkey-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-valkey-combined/htmlcov/index.html) |
-| [vllm-haystack](integrations/vllm/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/vllm-haystack.svg)](https://pypi.org/project/vllm-haystack) | [![Test / vllm](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm-combined/htmlcov/index.html) |
+| [vllm-haystack](integrations/vllm/) | Embedder, Generator, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/vllm-haystack.svg)](https://pypi.org/project/vllm-haystack) | [![Test / vllm](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm-combined/htmlcov/index.html) |
 | [watsonx-haystack](integrations/watsonx/)                               | Embedder, Generator         | [![PyPI - Version](https://img.shields.io/pypi/v/watsonx-haystack.svg?color=orange)](https://pypi.org/project/watsonx-haystack)                          | [![Test / watsonx](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/watsonx.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/watsonx.yml)                                              | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-watsonx/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-watsonx/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-watsonx-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-watsonx-combined/htmlcov/index.html) |
 | [weave-haystack](integrations/weave/)                                   | Tracer                      | [![PyPI - Version](https://img.shields.io/pypi/v/weave-haystack.svg)](https://pypi.org/project/weave-haystack)                                           | [![Test / weave](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weave.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weave.yml)                                                    | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weave/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weave/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weave-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weave-combined/htmlcov/index.html) |
 | [weaviate-haystack](integrations/weaviate/)                             | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/weaviate-haystack.svg)](https://pypi.org/project/weaviate-haystack)                                     | [![Test / weaviate](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml)                                           | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weaviate/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weaviate/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weaviate-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weaviate-combined/htmlcov/index.html) |
diff --git a/integrations/vllm/README.md b/integrations/vllm/README.md
@@ -26,4 +26,11 @@ vLLM-metal does not support embedding models. On macOS, you can run the embeddin
 # embedders server (port 8001)
 docker run --rm -p 8001:8000 -e VLLM_CPU_OMP_THREADS_BIND=0-3 vllm/vllm-openai-cpu:latest \
     --model sentence-transformers/all-MiniLM-L6-v2 --enforce-eager
+```
+
+To run the ranker server, use CPU Docker image:
+```bash
+# ranker server (port 8002)
+docker run --rm -p 8002:8000 -e VLLM_CPU_OMP_THREADS_BIND=0-3 vllm/vllm-openai-cpu:latest \
+    --model BAAI/bge-reranker-base --enforce-eager
 ```
diff --git a/integrations/vllm/pydoc/config_docusaurus.yml b/integrations/vllm/pydoc/config_docusaurus.yml
@@ -3,6 +3,7 @@ loaders:
       - haystack_integrations.components.generators.vllm.chat.chat_generator
       - haystack_integrations.components.embedders.vllm.text_embedder
       - haystack_integrations.components.embedders.vllm.document_embedder
+      - haystack_integrations.components.rankers.vllm.ranker
     search_path: [../src]
 processors:
   - type: filter
diff --git a/integrations/vllm/pyproject.toml b/integrations/vllm/pyproject.toml
@@ -57,7 +57,6 @@ dependencies = [
     "pytest-rerunfailures",
     "mypy",
     "pip",
-    "Pillow",
 ]
 
 [tool.hatch.envs.test.scripts]
@@ -66,7 +65,7 @@ integration = 'pytest -m "integration" {args:tests}'
 all = 'pytest {args:tests}'
 unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
 integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}'
-types = "mypy -p haystack_integrations.components.generators.vllm -p haystack_integrations.components.embedders.vllm -p haystack_integrations.common.vllm {args}"
+types = "mypy -p haystack_integrations.components.generators.vllm -p haystack_integrations.components.embedders.vllm -p haystack_integrations.components.rankers.vllm -p haystack_integrations.common.vllm {args}"
 
 [tool.mypy]
 install_types = true
diff --git a/integrations/vllm/src/haystack_integrations/components/embedders/vllm/document_embedder.py b/integrations/vllm/src/haystack_integrations/components/embedders/vllm/document_embedder.py
@@ -241,7 +241,7 @@ def _validate_documents(documents: list[Document]) -> None:
             raise TypeError(msg)
 
     @component.output_types(documents=list[Document], meta=dict[str, Any])
-    def run(self, documents: list[Document]) -> dict[str, Any]:
+    def run(self, documents: list[Document]) -> dict[str, list[Document] | dict[str, Any]]:
         """
         Embed a list of Documents.
 
@@ -267,7 +267,7 @@ def run(self, documents: list[Document]) -> dict[str, Any]:
         return {"documents": new_documents, "meta": meta}
 
     @component.output_types(documents=list[Document], meta=dict[str, Any])
-    async def run_async(self, documents: list[Document]) -> dict[str, Any]:
+    async def run_async(self, documents: list[Document]) -> dict[str, list[Document] | dict[str, Any]]:
         """
         Asynchronously embed a list of Documents.
 
diff --git a/integrations/vllm/src/haystack_integrations/components/embedders/vllm/text_embedder.py b/integrations/vllm/src/haystack_integrations/components/embedders/vllm/text_embedder.py
@@ -138,14 +138,14 @@ def _prepare_input(self, text: str) -> dict[str, Any]:
         return kwargs
 
     @staticmethod
-    def _prepare_output(response: CreateEmbeddingResponse) -> dict[str, Any]:
+    def _prepare_output(response: CreateEmbeddingResponse) -> dict[str, list[float] | dict[str, Any]]:
         return {
             "embedding": response.data[0].embedding,
             "meta": {"model": response.model, "usage": dict(response.usage)},
         }
 
     @component.output_types(embedding=list[float], meta=dict[str, Any])
-    def run(self, text: str) -> dict[str, Any]:
+    def run(self, text: str) -> dict[str, list[float] | dict[str, Any]]:
         """
         Embed a single string.
 
@@ -162,7 +162,7 @@ def run(self, text: str) -> dict[str, Any]:
         return self._prepare_output(response)
 
     @component.output_types(embedding=list[float], meta=dict[str, Any])
-    async def run_async(self, text: str) -> dict[str, Any]:
+    async def run_async(self, text: str) -> dict[str, list[float] | dict[str, Any]]:
         """
         Asynchronously embed a single string.
 
diff --git a/integrations/vllm/src/haystack_integrations/components/rankers/py.typed b/integrations/vllm/src/haystack_integrations/components/rankers/py.typed
diff --git a/integrations/vllm/src/haystack_integrations/components/rankers/vllm/__init__.py b/integrations/vllm/src/haystack_integrations/components/rankers/vllm/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from .ranker import VLLMRanker
+
+__all__ = ["VLLMRanker"]
diff --git a/integrations/vllm/src/haystack_integrations/components/rankers/vllm/ranker.py b/integrations/vllm/src/haystack_integrations/components/rankers/vllm/ranker.py
diff --git a/integrations/vllm/tests/test_ranker.py b/integrations/vllm/tests/test_ranker.py