deepset-ai
diff --git a/‎.github/workflows/vllm.yml‎
Lines changed: 32 additions & 5 deletions b/‎.github/workflows/vllm.yml‎
Lines changed: 32 additions & 5 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎integrations/vllm/README.md‎
Lines changed: 12 additions & 3 deletions b/‎integrations/vllm/README.md‎
Lines changed: 12 additions & 3 deletions
diff --git a/‎integrations/vllm/pydoc/config_docusaurus.yml‎
Lines changed: 2 additions & 0 deletions b/‎integrations/vllm/pydoc/config_docusaurus.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎integrations/vllm/pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎integrations/vllm/pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎integrations/vllm/src/haystack_integrations/common/py.typed‎ b/‎integrations/vllm/src/haystack_integrations/common/py.typed‎
diff --git a/‎integrations/vllm/src/haystack_integrations/common/vllm/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎integrations/vllm/src/haystack_integrations/common/vllm/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎integrations/vllm/src/haystack_integrations/common/vllm/utils.py‎
Lines changed: 38 additions & 0 deletions b/‎integrations/vllm/src/haystack_integrations/common/vllm/utils.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎integrations/vllm/src/haystack_integrations/components/embedders/py.typed‎ b/‎integrations/vllm/src/haystack_integrations/components/embedders/py.typed‎
diff --git a/‎integrations/vllm/src/haystack_integrations/components/embedders/vllm/__init__.py‎
Lines changed: 8 additions & 0 deletions b/‎integrations/vllm/src/haystack_integrations/components/embedders/vllm/__init__.py‎
Lines changed: 8 additions & 0 deletions
@@ -30,6 +30,7 @@ env:
   PYTHONUNBUFFERED: "1"
   FORCE_COLOR: "1"
   VLLM_MODEL: "Qwen/Qwen3-0.6B"
+  VLLM_EMBEDDING_MODEL: "sentence-transformers/all-MiniLM-L6-v2"
   # we only test on Ubuntu to keep vLLM server running simple
   TEST_MATRIX_OS: '["ubuntu-latest"]'
   # vLLM is not compatible with Python 3.14. https://github.com/vllm-project/vllm/issues/34096
@@ -88,12 +89,13 @@ jobs:
             "https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl" \
             --torch-backend cpu
 
-      - name: Start vLLM server
+      - name: Start vLLM chat server
         env:
           VLLM_TARGET_DEVICE: "cpu"
           VLLM_CPU_KVCACHE_SPACE: "4"
         run: |
           nohup hatch run -- vllm serve ${{ env.VLLM_MODEL }} \
+            --port 8000 \
             --reasoning-parser qwen3 \
             --max-model-len 1024 \
             --enforce-eager \
@@ -102,20 +104,45 @@ jobs:
             --tool-call-parser hermes \
             --max-num-seqs 1 &
 
-          # Wait for the vLLM server to be ready with a timeout of 300 seconds
+          # Wait for the vLLM chat server to be ready with a timeout of 300 seconds
           timeout=300
           while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:8000/health > /dev/null 2>&1; do
-            echo "Waiting for vLLM server to start..."
+            echo "Waiting for vLLM chat server to start..."
             sleep 10
             ((timeout-=10))
           done
 
           if [ $timeout -eq 0 ]; then
-            echo "Timed out waiting for vLLM server to start."
+            echo "Timed out waiting for vLLM chat server to start."
             exit 1
           fi
 
-          echo "vLLM server started successfully."
+          echo "vLLM chat server started successfully."
+
+      - name: Start vLLM embedding server
+        env:
+          VLLM_TARGET_DEVICE: "cpu"
+          VLLM_CPU_KVCACHE_SPACE: "4"
+        run: |
+          nohup hatch run -- vllm serve ${{ env.VLLM_EMBEDDING_MODEL }} \
+            --port 8001 \
+            --enforce-eager \
+            --max-num-seqs 1 &
+
+          # Wait for the vLLM embedding server to be ready with a timeout of 300 seconds
+          timeout=300
+          while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:8001/health > /dev/null 2>&1; do
+            echo "Waiting for vLLM embedding server to start..."
+            sleep 10
+            ((timeout-=10))
+          done
+
+          if [ $timeout -eq 0 ]; then
+            echo "Timed out waiting for vLLM embedding server to start."
+            exit 1
+          fi
+
+          echo "vLLM embedding server started successfully."
 
       - name: Lint
         if: matrix.python-version == '3.10' && runner.os == 'Linux'
 
@@ -78,7 +78,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
 | [togetherai-haystack](integrations/togetherai/)                         | Generator         | [![PyPI - Version](https://img.shields.io/pypi/v/togetherai-haystack.svg)](https://pypi.org/project/togetherai-haystack)                                 | [![Test / togetherai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml)                                     | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai-combined/htmlcov/index.html) |
 | [unstructured-fileconverter-haystack](integrations/unstructured/)       | File converter              | [![PyPI - Version](https://img.shields.io/pypi/v/unstructured-fileconverter-haystack.svg)](https://pypi.org/project/unstructured-fileconverter-haystack) | [![Test / unstructured](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured.yml)                               | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-unstructured/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-unstructured/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-unstructured-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-unstructured-combined/htmlcov/index.html) |
 | [valkey-haystack](integrations/valkey/)                                 | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/valkey-haystack.svg)](https://pypi.org/project/valkey-haystack)                                         | [![Test / valkey](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/valkey.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/valkey.yml)                                                 | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-valkey/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-valkey/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-valkey-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-valkey-combined/htmlcov/index.html) |
-| [vllm-haystack](integrations/vllm/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/vllm-haystack.svg)](https://pypi.org/project/vllm-haystack) | [![Test / vllm](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm-combined/htmlcov/index.html) |
+| [vllm-haystack](integrations/vllm/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/vllm-haystack.svg)](https://pypi.org/project/vllm-haystack) | [![Test / vllm](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm-combined/htmlcov/index.html) |
 | [watsonx-haystack](integrations/watsonx/)                               | Embedder, Generator         | [![PyPI - Version](https://img.shields.io/pypi/v/watsonx-haystack.svg?color=orange)](https://pypi.org/project/watsonx-haystack)                          | [![Test / watsonx](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/watsonx.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/watsonx.yml)                                              | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-watsonx/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-watsonx/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-watsonx-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-watsonx-combined/htmlcov/index.html) |
 | [weave-haystack](integrations/weave/)                                   | Tracer                      | [![PyPI - Version](https://img.shields.io/pypi/v/weave-haystack.svg)](https://pypi.org/project/weave-haystack)                                           | [![Test / weave](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weave.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weave.yml)                                                    | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weave/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weave/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weave-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weave-combined/htmlcov/index.html) |
 | [weaviate-haystack](integrations/weaviate/)                             | Document Store              | [![PyPI - Version](https://img.shields.io/pypi/v/weaviate-haystack.svg)](https://pypi.org/project/weaviate-haystack)                                     | [![Test / weaviate](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml)                                           | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weaviate/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weaviate/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weaviate-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weaviate-combined/htmlcov/index.html) |
 
@@ -11,10 +11,19 @@
 
 Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
 
-To run integration tests locally, you need to have a running vLLM server. Refer to the [workflow file](https://github.com/deepset-ai/haystack-core-integrations/blob/main/.github/workflows/vllm.yml) for more details.
+To run integration tests locally, you need two vLLM servers running in parallel: one for the chat generator on port `8000` and one for the embedders on port `8001`. Refer to the [workflow file](https://github.com/deepset-ai/haystack-core-integrations/blob/main/.github/workflows/vllm.yml) for more details.
 
-For example, on macOs, you can install [vLLM-metal](https://github.com/vllm-project/vllm-metal) and run the server with:
+For example, on macOs, you can install [vLLM-metal](https://github.com/vllm-project/vllm-metal) and start the chat generator server with:
 
 ```bash
-source ~/.venv-vllm-metal/bin/activate && vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3 --max-model-len 1024 --enforce-eager  --enable-auto-tool-choice --tool-call-parser hermes
+# chat generator server (port 8000)
+source ~/.venv-vllm-metal/bin/activate && vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3 --max-model-len 1024 --enforce-eager --enable-auto-tool-choice --tool-call-parser hermes
+```
+
+vLLM-metal does not support embedding models. On macOS, you can run the embedding server via CPU Docker image:
+
+```bash
+# embedders server (port 8001)
+docker run --rm -p 8001:8000 -e VLLM_CPU_OMP_THREADS_BIND=0-3 vllm/vllm-openai-cpu:latest \
+    --model sentence-transformers/all-MiniLM-L6-v2 --enforce-eager
 ```
@@ -1,6 +1,8 @@
 loaders:
   - modules:
       - haystack_integrations.components.generators.vllm.chat.chat_generator
+      - haystack_integrations.components.embedders.vllm.text_embedder
+      - haystack_integrations.components.embedders.vllm.document_embedder
     search_path: [../src]
 processors:
   - type: filter
 
@@ -22,7 +22,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.23.0", "openai"]
+dependencies = ["haystack-ai>=2.23.0", "openai", "more_itertools>=9.0.0", "tqdm>=4.48.0"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/vllm#readme"
@@ -66,7 +66,7 @@ integration = 'pytest -m "integration" {args:tests}'
 all = 'pytest {args:tests}'
 unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
 integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}'
-types = "mypy -p haystack_integrations.components.generators.vllm {args}"
+types = "mypy -p haystack_integrations.components.generators.vllm -p haystack_integrations.components.embedders.vllm -p haystack_integrations.common.vllm {args}"
 
 [tool.mypy]
 install_types = true
 
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,38 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any
+
+from haystack.utils import Secret
+from haystack.utils.http_client import init_http_client
+from openai import AsyncOpenAI, OpenAI
+
+
+def _create_openai_clients(
+    api_key: Secret | None,
+    api_base_url: str,
+    timeout: float | None,
+    max_retries: int | None,
+    http_client_kwargs: dict[str, Any] | None,
+) -> tuple[OpenAI, AsyncOpenAI]:
+    """
+    Build sync and async OpenAI clients pointing at a vLLM server.
+
+    A placeholder api key is used when the user did not supply one and no `VLLM_API_KEY` env var is set, because the
+    OpenAI client requires a non-empty value.
+    `timeout` and `max_retries` are only forwarded when provided: when None, the OpenAI client's own defaults apply.
+    """
+    resolved_api_key = "placeholder-api-key"
+    if api_key is not None and (value := api_key.resolve_value()):
+        resolved_api_key = value
+
+    client_kwargs: dict[str, Any] = {"api_key": resolved_api_key, "base_url": api_base_url}
+    if timeout is not None:
+        client_kwargs["timeout"] = timeout
+    if max_retries is not None:
+        client_kwargs["max_retries"] = max_retries
+
+    sync_client = OpenAI(http_client=init_http_client(http_client_kwargs, async_client=False), **client_kwargs)
+    async_client = AsyncOpenAI(http_client=init_http_client(http_client_kwargs, async_client=True), **client_kwargs)
+    return sync_client, async_client
@@ -0,0 +1,8 @@
+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from .document_embedder import VLLMDocumentEmbedder
+from .text_embedder import VLLMTextEmbedder
+
+__all__ = ["VLLMDocumentEmbedder", "VLLMTextEmbedder"]
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>`
	`2`	`+#`
	`3`	`+# SPDX-License-Identifier: Apache-2.0`