Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions .github/workflows/vllm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ env:
FORCE_COLOR: "1"
VLLM_MODEL: "Qwen/Qwen3-0.6B"
VLLM_EMBEDDING_MODEL: "sentence-transformers/all-MiniLM-L6-v2"
VLLM_RANKER_MODEL: "BAAI/bge-reranker-base"
VLLM_TARGET_DEVICE: "cpu"
VLLM_CPU_KVCACHE_SPACE: "4"
# we only test on Ubuntu to keep vLLM server running simple
TEST_MATRIX_OS: '["ubuntu-latest"]'
# vLLM is not compatible with Python 3.14. https://github.com/vllm-project/vllm/issues/34096
Expand Down Expand Up @@ -90,9 +93,6 @@ jobs:
--torch-backend cpu

- name: Start vLLM chat server
env:
VLLM_TARGET_DEVICE: "cpu"
VLLM_CPU_KVCACHE_SPACE: "4"
run: |
nohup hatch run -- vllm serve ${{ env.VLLM_MODEL }} \
--port 8000 \
Expand Down Expand Up @@ -120,9 +120,6 @@ jobs:
echo "vLLM chat server started successfully."

- name: Start vLLM embedding server
env:
VLLM_TARGET_DEVICE: "cpu"
VLLM_CPU_KVCACHE_SPACE: "4"
run: |
nohup hatch run -- vllm serve ${{ env.VLLM_EMBEDDING_MODEL }} \
--port 8001 \
Expand All @@ -144,6 +141,27 @@ jobs:

echo "vLLM embedding server started successfully."

- name: Start vLLM ranker server
run: |
nohup hatch run -- vllm serve ${{ env.VLLM_RANKER_MODEL }} \
--port 8002 \
--enforce-eager \
--max-num-seqs 1 &

# Wait for the vLLM ranker server to be ready with a timeout of 300 seconds
timeout=300
while [ $timeout -gt 0 ] && ! curl -sSf http://localhost:8002/health > /dev/null 2>&1; do
echo "Waiting for vLLM ranker server to start..."
sleep 10
((timeout-=10))
done

if [ $timeout -eq 0 ]; then
echo "Timed out waiting for vLLM ranker server to start."
exit 1
fi

echo "vLLM ranker server started successfully."
- name: Lint
if: matrix.python-version == '3.10' && runner.os == 'Linux'
run: hatch run fmt-check && hatch run test:types
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ Please check out our [Contribution Guidelines](CONTRIBUTING.md) for all the deta
| [togetherai-haystack](integrations/togetherai/) | Generator | [![PyPI - Version](https://img.shields.io/pypi/v/togetherai-haystack.svg)](https://pypi.org/project/togetherai-haystack) | [![Test / togetherai](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/togetherai.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-togetherai-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-togetherai-combined/htmlcov/index.html) |
| [unstructured-fileconverter-haystack](integrations/unstructured/) | File converter | [![PyPI - Version](https://img.shields.io/pypi/v/unstructured-fileconverter-haystack.svg)](https://pypi.org/project/unstructured-fileconverter-haystack) | [![Test / unstructured](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/unstructured.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-unstructured/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-unstructured/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-unstructured-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-unstructured-combined/htmlcov/index.html) |
| [valkey-haystack](integrations/valkey/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/valkey-haystack.svg)](https://pypi.org/project/valkey-haystack) | [![Test / valkey](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/valkey.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/valkey.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-valkey/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-valkey/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-valkey-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-valkey-combined/htmlcov/index.html) |
| [vllm-haystack](integrations/vllm/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/vllm-haystack.svg)](https://pypi.org/project/vllm-haystack) | [![Test / vllm](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm-combined/htmlcov/index.html) |
| [vllm-haystack](integrations/vllm/) | Embedder, Generator, Ranker | [![PyPI - Version](https://img.shields.io/pypi/v/vllm-haystack.svg)](https://pypi.org/project/vllm-haystack) | [![Test / vllm](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/vllm.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-vllm-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-vllm-combined/htmlcov/index.html) |
| [watsonx-haystack](integrations/watsonx/) | Embedder, Generator | [![PyPI - Version](https://img.shields.io/pypi/v/watsonx-haystack.svg?color=orange)](https://pypi.org/project/watsonx-haystack) | [![Test / watsonx](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/watsonx.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/watsonx.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-watsonx/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-watsonx/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-watsonx-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-watsonx-combined/htmlcov/index.html) |
| [weave-haystack](integrations/weave/) | Tracer | [![PyPI - Version](https://img.shields.io/pypi/v/weave-haystack.svg)](https://pypi.org/project/weave-haystack) | [![Test / weave](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weave.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weave.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weave/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weave/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weave-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weave-combined/htmlcov/index.html) |
| [weaviate-haystack](integrations/weaviate/) | Document Store | [![PyPI - Version](https://img.shields.io/pypi/v/weaviate-haystack.svg)](https://pypi.org/project/weaviate-haystack) | [![Test / weaviate](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml/badge.svg)](https://github.com/deepset-ai/haystack-core-integrations/actions/workflows/weaviate.yml) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weaviate/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weaviate/htmlcov/index.html) | [![Coverage badge](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations/python-coverage-comment-action-data-weaviate-combined/endpoint.json&label=)](https://htmlpreview.github.io/?https://github.com/deepset-ai/haystack-core-integrations/blob/python-coverage-comment-action-data-weaviate-combined/htmlcov/index.html) |
Expand Down
7 changes: 7 additions & 0 deletions integrations/vllm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,11 @@ vLLM-metal does not support embedding models. On macOS, you can run the embeddin
# embedders server (port 8001)
docker run --rm -p 8001:8000 -e VLLM_CPU_OMP_THREADS_BIND=0-3 vllm/vllm-openai-cpu:latest \
--model sentence-transformers/all-MiniLM-L6-v2 --enforce-eager
```

To run the ranker server, use CPU Docker image:
```bash
# ranker server (port 8002)
docker run --rm -p 8002:8000 -e VLLM_CPU_OMP_THREADS_BIND=0-3 vllm/vllm-openai-cpu:latest \
--model BAAI/bge-reranker-base --enforce-eager
```
1 change: 1 addition & 0 deletions integrations/vllm/pydoc/config_docusaurus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ loaders:
- haystack_integrations.components.generators.vllm.chat.chat_generator
- haystack_integrations.components.embedders.vllm.text_embedder
- haystack_integrations.components.embedders.vllm.document_embedder
- haystack_integrations.components.rankers.vllm.ranker
search_path: [../src]
processors:
- type: filter
Expand Down
3 changes: 1 addition & 2 deletions integrations/vllm/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ dependencies = [
"pytest-rerunfailures",
"mypy",
"pip",
"Pillow",
]

[tool.hatch.envs.test.scripts]
Expand All @@ -66,7 +65,7 @@ integration = 'pytest -m "integration" {args:tests}'
all = 'pytest {args:tests}'
unit-cov-retry = 'pytest --cov=haystack_integrations --reruns 3 --reruns-delay 30 -x -m "not integration" {args:tests}'
integration-cov-append-retry = 'pytest --cov=haystack_integrations --cov-append --reruns 3 --reruns-delay 30 -x -m "integration" {args:tests}'
types = "mypy -p haystack_integrations.components.generators.vllm -p haystack_integrations.components.embedders.vllm -p haystack_integrations.common.vllm {args}"
types = "mypy -p haystack_integrations.components.generators.vllm -p haystack_integrations.components.embedders.vllm -p haystack_integrations.components.rankers.vllm -p haystack_integrations.common.vllm {args}"

[tool.mypy]
install_types = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def _validate_documents(documents: list[Document]) -> None:
raise TypeError(msg)

@component.output_types(documents=list[Document], meta=dict[str, Any])
def run(self, documents: list[Document]) -> dict[str, Any]:
def run(self, documents: list[Document]) -> dict[str, list[Document] | dict[str, Any]]:
"""
Embed a list of Documents.

Expand All @@ -267,7 +267,7 @@ def run(self, documents: list[Document]) -> dict[str, Any]:
return {"documents": new_documents, "meta": meta}

@component.output_types(documents=list[Document], meta=dict[str, Any])
async def run_async(self, documents: list[Document]) -> dict[str, Any]:
async def run_async(self, documents: list[Document]) -> dict[str, list[Document] | dict[str, Any]]:
"""
Asynchronously embed a list of Documents.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,14 @@ def _prepare_input(self, text: str) -> dict[str, Any]:
return kwargs

@staticmethod
def _prepare_output(response: CreateEmbeddingResponse) -> dict[str, Any]:
def _prepare_output(response: CreateEmbeddingResponse) -> dict[str, list[float] | dict[str, Any]]:
return {
"embedding": response.data[0].embedding,
"meta": {"model": response.model, "usage": dict(response.usage)},
}

@component.output_types(embedding=list[float], meta=dict[str, Any])
def run(self, text: str) -> dict[str, Any]:
def run(self, text: str) -> dict[str, list[float] | dict[str, Any]]:
"""
Embed a single string.

Expand All @@ -162,7 +162,7 @@ def run(self, text: str) -> dict[str, Any]:
return self._prepare_output(response)

@component.output_types(embedding=list[float], meta=dict[str, Any])
async def run_async(self, text: str) -> dict[str, Any]:
async def run_async(self, text: str) -> dict[str, list[float] | dict[str, Any]]:
"""
Asynchronously embed a single string.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

from .ranker import VLLMRanker

__all__ = ["VLLMRanker"]
Loading
Loading