Skip to content

Commit 5207261

Browse files
committed
integration tests on the ci
1 parent bb20fc7 commit 5207261

6 files changed

Lines changed: 33 additions & 23 deletions

File tree

integrations/vllm/README.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,14 @@
1111

1212
Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
1313

14-
To run integration tests locally, you need to have a running vLLM server. Refer to the [workflow file](https://github.com/deepset-ai/haystack-core-integrations/blob/main/.github/workflows/vllm.yml) for more details.
14+
To run integration tests locally, you need two vLLM servers running in parallel: one for the chat generator on port `8000` and one for the embedders on port `8001`. Refer to the [workflow file](https://github.com/deepset-ai/haystack-core-integrations/blob/main/.github/workflows/vllm.yml) for more details.
1515

16-
For example, on macOs, you can install [vLLM-metal](https://github.com/vllm-project/vllm-metal) and run the server with:
16+
For example, on macOs, you can install [vLLM-metal](https://github.com/vllm-project/vllm-metal) and start both servers with:
1717

1818
```bash
19-
source ~/.venv-vllm-metal/bin/activate && vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3 --max-model-len 1024 --enforce-eager --enable-auto-tool-choice --tool-call-parser hermes
19+
# chat generator server (port 8000)
20+
source ~/.venv-vllm-metal/bin/activate && vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3 --max-model-len 1024 --enforce-eager --enable-auto-tool-choice --tool-call-parser hermes
21+
22+
# embedders server (port 8001)
23+
source ~/.venv-vllm-metal/bin/activate && vllm serve sergeyzh/rubert-tiny-turbo --port 8001 --enforce-eager --max-num-seqs 1
2024
```

integrations/vllm/src/haystack_integrations/components/embedders/vllm/document_embedder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,15 @@ def __init__(
8686
Creates an instance of VLLMDocumentEmbedder.
8787
8888
:param model: The name of the model served by vLLM. Check
89-
[vLLM's documentation](https://docs.vllm.ai/en/stable/models/pooling_models) for more information.
89+
[vLLM documentation](https://docs.vllm.ai/en/stable/models/pooling_models) for more information.
9090
:param api_key: The vLLM API key. Defaults to the `VLLM_API_KEY` environment variable.
9191
Only required if the vLLM server was started with `--api-key`.
9292
:param api_base_url: The base URL of the vLLM server.
9393
:param prefix: A string to add at the beginning of each text.
9494
:param suffix: A string to add at the end of each text.
9595
:param dimensions: The number of dimensions of the resulting embedding. Only models trained with
9696
Matryoshka Representation Learning support this parameter. See
97-
[vLLMs documentation](https://docs.vllm.ai/en/stable/models/pooling_models/embed/#matryoshka-embeddings)
97+
[vLLM documentation](https://docs.vllm.ai/en/stable/models/pooling_models/embed/#matryoshka-embeddings)
9898
for more information.
9999
:param batch_size: Number of documents to encode at once.
100100
:param progress_bar: Whether to show a progress bar.

integrations/vllm/src/haystack_integrations/components/embedders/vllm/text_embedder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def __init__(
7777
:param suffix: A string to add at the end of each text to embed.
7878
:param dimensions: The number of dimensions of the resulting embedding. Only models trained with
7979
Matryoshka Representation Learning support this parameter. See
80-
[vLLMs documentation](https://docs.vllm.ai/en/stable/models/pooling_models/embed/#matryoshka-embeddings)
80+
[vLLM documentation](https://docs.vllm.ai/en/stable/models/pooling_models/embed/#matryoshka-embeddings)
8181
for more information.
8282
:param timeout: Timeout in seconds for vLLM client calls. If not set, the OpenAI client default applies.
8383
:param max_retries: Maximum number of retries for failed requests. If not set, the OpenAI client

integrations/vllm/tests/test_document_embedder.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
import os
54
from unittest.mock import AsyncMock, MagicMock
65

76
import pytest
@@ -13,7 +12,8 @@
1312

1413
from haystack_integrations.components.embedders.vllm import VLLMDocumentEmbedder
1514

16-
MODEL = "intfloat/e5-mistral-7b-instruct"
15+
MODEL = "sergeyzh/rubert-tiny-turbo"
16+
API_BASE_URL = "http://localhost:8001/v1"
1717

1818

1919
def _fake_response(embeddings: list[list[float]], prompt_tokens: int = 1, total_tokens: int = 1):
@@ -143,8 +143,18 @@ def test_from_dict(self, monkeypatch):
143143
assert embedder.api_key == Secret.from_env_var("VLLM_API_KEY", strict=False)
144144
assert embedder.model == MODEL
145145
assert embedder.api_base_url == "http://localhost:8000/v1"
146-
assert embedder.batch_size == 32
146+
assert embedder.prefix == ""
147+
assert embedder.suffix == ""
147148
assert embedder.dimensions == 32
149+
assert embedder.batch_size == 32
150+
assert embedder.progress_bar is True
151+
assert embedder.meta_fields_to_embed == []
152+
assert embedder.embedding_separator == "\n"
153+
assert embedder.timeout is None
154+
assert embedder.max_retries is None
155+
assert embedder.http_client_kwargs is None
156+
assert embedder.raise_on_failure is False
157+
assert embedder.extra_parameters is None
148158

149159
def test_prepare_texts_to_embed(self):
150160
embedder = VLLMDocumentEmbedder(
@@ -224,13 +234,9 @@ async def test_run_async(self):
224234

225235
assert [d.embedding for d in result["documents"]] == [[0.5], [0.6]]
226236

227-
@pytest.mark.skipif(
228-
not os.environ.get("VLLM_API_BASE_URL", None),
229-
reason="Export VLLM_API_BASE_URL pointing to a running vLLM embedding server to run this test.",
230-
)
231237
@pytest.mark.integration
232238
def test_run(self):
233-
embedder = VLLMDocumentEmbedder(model=MODEL, api_base_url=os.environ["VLLM_API_BASE_URL"])
239+
embedder = VLLMDocumentEmbedder(model=MODEL, api_base_url=API_BASE_URL)
234240

235241
docs = [
236242
Document(content="I love cheese", meta={"topic": "Cuisine"}),

integrations/vllm/tests/test_text_embedder.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
22
#
33
# SPDX-License-Identifier: Apache-2.0
4-
import os
54
from unittest.mock import AsyncMock, MagicMock
65

76
import pytest
@@ -11,7 +10,8 @@
1110

1211
from haystack_integrations.components.embedders.vllm import VLLMTextEmbedder
1312

14-
MODEL = "intfloat/e5-mistral-7b-instruct"
13+
MODEL = "sergeyzh/rubert-tiny-turbo"
14+
API_BASE_URL = "http://localhost:8001/v1"
1515

1616

1717
def _fake_response(embeddings: list[list[float]], prompt_tokens: int = 5, total_tokens: int = 5):
@@ -122,7 +122,13 @@ def test_from_dict(self, monkeypatch):
122122
assert embedder.api_key == Secret.from_env_var("VLLM_API_KEY", strict=False)
123123
assert embedder.model == MODEL
124124
assert embedder.api_base_url == "http://localhost:8000/v1"
125+
assert embedder.prefix == ""
126+
assert embedder.suffix == ""
125127
assert embedder.dimensions == 32
128+
assert embedder.timeout is None
129+
assert embedder.max_retries is None
130+
assert embedder.http_client_kwargs is None
131+
assert embedder.extra_parameters is None
126132

127133
def test_prepare_input_adds_dimensions_and_extra_body(self):
128134
embedder = VLLMTextEmbedder(
@@ -168,13 +174,9 @@ async def test_run_async(self):
168174
result = await embedder.run_async("world")
169175
assert result["embedding"] == [0.3, 0.4]
170176

171-
@pytest.mark.skipif(
172-
not os.environ.get("VLLM_API_BASE_URL", None),
173-
reason="Export VLLM_API_BASE_URL pointing to a running vLLM embedding server to run this test.",
174-
)
175177
@pytest.mark.integration
176178
def test_run(self):
177-
embedder = VLLMTextEmbedder(model=MODEL, api_base_url=os.environ["VLLM_API_BASE_URL"])
179+
embedder = VLLMTextEmbedder(model=MODEL, api_base_url=API_BASE_URL)
178180
result = embedder.run("The food was delicious")
179181
assert isinstance(result["embedding"], list)
180182
assert all(isinstance(x, float) for x in result["embedding"])

integrations/vllm/tests/test_utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99

1010
def test_create_openai_clients_placeholder_when_no_key():
11-
"""When api_key is None or unresolved, a placeholder is used."""
1211
sync_client, async_client = _create_openai_clients(
1312
api_key=None, api_base_url="http://localhost:8000/v1", timeout=None, max_retries=None, http_client_kwargs=None
1413
)
@@ -18,7 +17,6 @@ def test_create_openai_clients_placeholder_when_no_key():
1817

1918

2019
def test_create_openai_clients_uses_resolved_key_and_forwards_options():
21-
"""When api_key resolves, it's used; timeout/max_retries forwarded only when set."""
2220
sync_client, _ = _create_openai_clients(
2321
api_key=Secret.from_token("real-key"),
2422
api_base_url="http://vllm:8000/v1",

0 commit comments

Comments
 (0)