Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions .github/workflows/vllm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ env:
PYTHONUNBUFFERED: "1"
FORCE_COLOR: "1"
VLLM_MODEL: "Qwen/Qwen3-0.6B"
VLLM_EMBEDDING_MODEL: "sentence-transformers/all-MiniLM-L6-v2"
# we only test on Ubuntu to keep vLLM server running simple
TEST_MATRIX_OS: '["ubuntu-latest"]'
# vLLM is not compatible with Python 3.14. https://github.com/vllm-project/vllm/issues/34096
Expand Down Expand Up @@ -88,12 +89,13 @@ jobs:
"https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cpu-cp38-abi3-manylinux_2_35_x86_64.whl" \
--torch-backend cpu

- name: Start vLLM server
- name: Start vLLM chat server
env:
VLLM_TARGET_DEVICE: "cpu"
VLLM_CPU_KVCACHE_SPACE: "4"
run: |
nohup hatch run -- vllm serve ${{ env.VLLM_MODEL }} \
--port 8000 \
--reasoning-parser qwen3 \
--max-model-len 1024 \
--enforce-eager \
Expand All @@ -102,20 +104,45 @@ jobs:
--tool-call-parser hermes \
--max-num-seqs 1 &

# Wait for the vLLM server to be ready with a timeout of 300 seconds
# Wait for the vLLM chat server to be ready with a timeout of 300 seconds
timeout=300
while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:8000/health > /dev/null 2>&1; do
echo "Waiting for vLLM server to start..."
echo "Waiting for vLLM chat server to start..."
sleep 10
((timeout-=10))
done

if [ $timeout -eq 0 ]; then
echo "Timed out waiting for vLLM server to start."
echo "Timed out waiting for vLLM chat server to start."
exit 1
fi

echo "vLLM server started successfully."
echo "vLLM chat server started successfully."

- name: Start vLLM embedding server
env:
VLLM_TARGET_DEVICE: "cpu"
VLLM_CPU_KVCACHE_SPACE: "4"
run: |
nohup hatch run -- vllm serve ${{ env.VLLM_EMBEDDING_MODEL }} \
--port 8001 \
--enforce-eager \
--max-num-seqs 1 &

# Wait for the vLLM embedding server to be ready with a timeout of 300 seconds
timeout=300
while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:8001/health > /dev/null 2>&1; do
echo "Waiting for vLLM embedding server to start..."
sleep 10
((timeout-=10))
done

if [ $timeout -eq 0 ]; then
echo "Timed out waiting for vLLM embedding server to start."
exit 1
fi

echo "vLLM embedding server started successfully."

- name: Lint
if: matrix.python-version == '3.10' && runner.os == 'Linux'
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
| [togetherai-haystack](integrations/togetherai/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/togetherai-haystack.svg)](https://pypi.org/project/togetherai-haystack) | [![Test / togetherai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai-combined/htmlcov/index.html) |
| [unstructured-fileconverter-haystack](integrations/unstructured/) | File converter | [![PyPI - Version](https://img.shields.io/pypi/v/unstructured-fileconverter-haystack.svg)](https://pypi.org/project/unstructured-fileconverter-haystack) | [![Test / unstructured](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-unstructured/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-unstructured/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-unstructured-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-unstructured-combined/htmlcov/index.html) |
| [valkey-haystack](integrations/valkey/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/valkey-haystack.svg)](https://pypi.org/project/valkey-haystack) | [![Test / valkey](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/valkey.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/valkey.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-valkey/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-valkey/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-valkey-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-valkey-combined/htmlcov/index.html) |
| [vllm-haystack](integrations/vllm/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/vllm-haystack.svg)](https://pypi.org/project/vllm-haystack) | [![Test / vllm](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm-combined/htmlcov/index.html) |
| [vllm-haystack](integrations/vllm/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/vllm-haystack.svg)](https://pypi.org/project/vllm-haystack) | [![Test / vllm](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm-combined/htmlcov/index.html) |
| [watsonx-haystack](integrations/watsonx/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/watsonx-haystack.svg?color=orange)](https://pypi.org/project/watsonx-haystack) | [![Test / watsonx](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/watsonx.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/watsonx.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-watsonx/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-watsonx/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-watsonx-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-watsonx-combined/htmlcov/index.html) |
| [weave-haystack](integrations/weave/) | Tracer | [![PyPI - Version](https://img.shields.io/pypi/v/weave-haystack.svg)](https://pypi.org/project/weave-haystack) | [![Test / weave](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weave.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weave.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weave/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weave/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weave-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weave-combined/htmlcov/index.html) |
| [weaviate-haystack](integrations/weaviate/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/weaviate-haystack.svg)](https://pypi.org/project/weaviate-haystack) | [![Test / weaviate](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weaviate/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weaviate/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weaviate-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weaviate-combined/htmlcov/index.html) |
Expand Down
15 changes: 12 additions & 3 deletions integrations/vllm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,19 @@

Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).

To run integration tests locally, you need to have a running vLLM server. Refer to the [workflow file](https://github.com/deepset-ai/haystack-core-integrations/blob/main/.github/workflows/vllm.yml) for more details.
To run integration tests locally, you need two vLLM servers running in parallel: one for the chat generator on port `8000` and one for the embedders on port `8001`. Refer to the [workflow file](https://github.com/deepset-ai/haystack-core-integrations/blob/main/.github/workflows/vllm.yml) for more details.

For example, on macOs, you can install [vLLM-metal](https://github.com/vllm-project/vllm-metal) and run the server with:
For example, on macOs, you can install [vLLM-metal](https://github.com/vllm-project/vllm-metal) and start the chat generator server with:

```bash
source ~/.venv-vllm-metal/bin/activate && vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3 --max-model-len 1024 --enforce-eager --enable-auto-tool-choice --tool-call-parser hermes
# chat generator server (port 8000)
source ~/.venv-vllm-metal/bin/activate && vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3 --max-model-len 1024 --enforce-eager --enable-auto-tool-choice --tool-call-parser hermes
```

vLLM-metal does not support embedding models. On macOS, you can run the embedding server via CPU Docker image:

```bash
# embedders server (port 8001)
docker run --rm -p 8001:8000 -e VLLM_CPU_OMP_THREADS_BIND=0-3 vllm/vllm-openai-cpu:latest \
--model sentence-transformers/all-MiniLM-L6-v2 --enforce-eager
```
2 changes: 2 additions & 0 deletions integrations/vllm/pydoc/config_docusaurus.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
loaders:
- modules:
- haystack_integrations.components.generators.vllm.chat.chat_generator
- haystack_integrations.components.embedders.vllm.text_embedder
- haystack_integrations.components.embedders.vllm.document_embedder
search_path: [../src]
processors:
- type: filter
Expand Down
4 changes: 2 additions & 2 deletions integrations/vllm/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
]
dependencies = ["haystack-ai>=2.23.0", "openai"]
dependencies = ["haystack-ai>=2.23.0", "openai", "more_itertools>=9.0.0", "tqdm>=4.48.0"]

[project.urls]
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/vllm#readme"
Expand Down Expand Up @@ -66,7 +66,7 @@ integration = 'pytest -m "integration" {args:tests}'
all = 'pytest {args:tests}'
unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}'
types = "mypy -p haystack_integrations.components.generators.vllm {args}"
types = "mypy -p haystack_integrations.components.generators.vllm -p haystack_integrations.components.embedders.vllm -p haystack_integrations.common.vllm {args}"

[tool.mypy]
install_types = true
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from typing import Any

from haystack.utils import Secret
from haystack.utils.http_client import init_http_client
from openai import AsyncOpenAI, OpenAI


def _create_openai_clients(
api_key: Secret | None,
api_base_url: str,
timeout: float | None,
max_retries: int | None,
http_client_kwargs: dict[str, Any] | None,
) -> tuple[OpenAI, AsyncOpenAI]:
"""
Build sync and async OpenAI clients pointing at a vLLM server.

A placeholder api key is used when the user did not supply one and no `VLLM_API_KEY` env var is set, because the
OpenAI client requires a non-empty value.
`timeout` and `max_retries` are only forwarded when provided: when None, the OpenAI client's own defaults apply.
"""
resolved_api_key = "placeholder-api-key"
if api_key is not None and (value := api_key.resolve_value()):
resolved_api_key = value

client_kwargs: dict[str, Any] = {"api_key": resolved_api_key, "base_url": api_base_url}
if timeout is not None:
client_kwargs["timeout"] = timeout
if max_retries is not None:
client_kwargs["max_retries"] = max_retries

sync_client = OpenAI(http_client=init_http_client(http_client_kwargs, async_client=False), **client_kwargs)
async_client = AsyncOpenAI(http_client=init_http_client(http_client_kwargs, async_client=True), **client_kwargs)
return sync_client, async_client
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from .document_embedder import VLLMDocumentEmbedder
from .text_embedder import VLLMTextEmbedder

__all__ = ["VLLMDocumentEmbedder", "VLLMTextEmbedder"]
Loading
Loading