integration tests on the ci

anakin87 · anakin87 · commit 5207261c87aa · 2026-04-14T19:16:45.000+02:00
diff --git a/integrations/vllm/README.md b/integrations/vllm/README.md
@@ -11,10 +11,14 @@
 
 Refer to the general [Contribution Guidelines](https://github.com/deepset-ai/haystack-core-integrations/blob/main/CONTRIBUTING.md).
 
-To run integration tests locally, you need to have a running vLLM server. Refer to the [workflow file](https://github.com/deepset-ai/haystack-core-integrations/blob/main/.github/workflows/vllm.yml) for more details.
+To run integration tests locally, you need two vLLM servers running in parallel: one for the chat generator on port `8000` and one for the embedders on port `8001`. Refer to the [workflow file](https://github.com/deepset-ai/haystack-core-integrations/blob/main/.github/workflows/vllm.yml) for more details.
 
-For example, on macOs, you can install [vLLM-metal](https://github.com/vllm-project/vllm-metal) and run the server with:
+For example, on macOs, you can install [vLLM-metal](https://github.com/vllm-project/vllm-metal) and start both servers with:
 
 ```bash
-source ~/.venv-vllm-metal/bin/activate && vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3 --max-model-len 1024 --enforce-eager  --enable-auto-tool-choice --tool-call-parser hermes
+# chat generator server (port 8000)
+source ~/.venv-vllm-metal/bin/activate && vllm serve Qwen/Qwen3-0.6B --reasoning-parser qwen3 --max-model-len 1024 --enforce-eager --enable-auto-tool-choice --tool-call-parser hermes
+
+# embedders server (port 8001)
+source ~/.venv-vllm-metal/bin/activate && vllm serve sergeyzh/rubert-tiny-turbo --port 8001 --enforce-eager --max-num-seqs 1
 ```
diff --git a/integrations/vllm/src/haystack_integrations/components/embedders/vllm/document_embedder.py b/integrations/vllm/src/haystack_integrations/components/embedders/vllm/document_embedder.py
@@ -86,15 +86,15 @@ def __init__(
         Creates an instance of VLLMDocumentEmbedder.
 
         :param model: The name of the model served by vLLM. Check
-        [vLLM's documentation](https://docs.vllm.ai/en/stable/models/pooling_models) for more information.
+        [vLLM documentation](https://docs.vllm.ai/en/stable/models/pooling_models) for more information.
         :param api_key: The vLLM API key. Defaults to the `VLLM_API_KEY` environment variable.
             Only required if the vLLM server was started with `--api-key`.
         :param api_base_url: The base URL of the vLLM server.
         :param prefix: A string to add at the beginning of each text.
         :param suffix: A string to add at the end of each text.
         :param dimensions: The number of dimensions of the resulting embedding. Only models trained with
             Matryoshka Representation Learning support this parameter. See
-            [vLLMs documentation](https://docs.vllm.ai/en/stable/models/pooling_models/embed/#matryoshka-embeddings)
+            [vLLM documentation](https://docs.vllm.ai/en/stable/models/pooling_models/embed/#matryoshka-embeddings)
             for more information.
         :param batch_size: Number of documents to encode at once.
         :param progress_bar: Whether to show a progress bar.
diff --git a/integrations/vllm/src/haystack_integrations/components/embedders/vllm/text_embedder.py b/integrations/vllm/src/haystack_integrations/components/embedders/vllm/text_embedder.py
@@ -77,7 +77,7 @@ def __init__(
         :param suffix: A string to add at the end of each text to embed.
         :param dimensions: The number of dimensions of the resulting embedding. Only models trained with
             Matryoshka Representation Learning support this parameter. See
-            [vLLMs documentation](https://docs.vllm.ai/en/stable/models/pooling_models/embed/#matryoshka-embeddings)
+            [vLLM documentation](https://docs.vllm.ai/en/stable/models/pooling_models/embed/#matryoshka-embeddings)
             for more information.
         :param timeout: Timeout in seconds for vLLM client calls. If not set, the OpenAI client default applies.
         :param max_retries: Maximum number of retries for failed requests. If not set, the OpenAI client
diff --git a/integrations/vllm/tests/test_document_embedder.py b/integrations/vllm/tests/test_document_embedder.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
-import os
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
@@ -13,7 +12,8 @@
 
 from haystack_integrations.components.embedders.vllm import VLLMDocumentEmbedder
 
-MODEL = "intfloat/e5-mistral-7b-instruct"
+MODEL = "sergeyzh/rubert-tiny-turbo"
+API_BASE_URL = "http://localhost:8001/v1"
 
 
 def _fake_response(embeddings: list[list[float]], prompt_tokens: int = 1, total_tokens: int = 1):
@@ -143,8 +143,18 @@ def test_from_dict(self, monkeypatch):
         assert embedder.api_key == Secret.from_env_var("VLLM_API_KEY", strict=False)
         assert embedder.model == MODEL
         assert embedder.api_base_url == "http://localhost:8000/v1"
-        assert embedder.batch_size == 32
+        assert embedder.prefix == ""
+        assert embedder.suffix == ""
         assert embedder.dimensions == 32
+        assert embedder.batch_size == 32
+        assert embedder.progress_bar is True
+        assert embedder.meta_fields_to_embed == []
+        assert embedder.embedding_separator == "\n"
+        assert embedder.timeout is None
+        assert embedder.max_retries is None
+        assert embedder.http_client_kwargs is None
+        assert embedder.raise_on_failure is False
+        assert embedder.extra_parameters is None
 
     def test_prepare_texts_to_embed(self):
         embedder = VLLMDocumentEmbedder(
@@ -224,13 +234,9 @@ async def test_run_async(self):
 
         assert [d.embedding for d in result["documents"]] == [[0.5], [0.6]]
 
-    @pytest.mark.skipif(
-        not os.environ.get("VLLM_API_BASE_URL", None),
-        reason="Export VLLM_API_BASE_URL pointing to a running vLLM embedding server to run this test.",
-    )
     @pytest.mark.integration
     def test_run(self):
-        embedder = VLLMDocumentEmbedder(model=MODEL, api_base_url=os.environ["VLLM_API_BASE_URL"])
+        embedder = VLLMDocumentEmbedder(model=MODEL, api_base_url=API_BASE_URL)
 
         docs = [
             Document(content="I love cheese", meta={"topic": "Cuisine"}),
diff --git a/integrations/vllm/tests/test_text_embedder.py b/integrations/vllm/tests/test_text_embedder.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
 #
 # SPDX-License-Identifier: Apache-2.0
-import os
 from unittest.mock import AsyncMock, MagicMock
 
 import pytest
@@ -11,7 +10,8 @@
 
 from haystack_integrations.components.embedders.vllm import VLLMTextEmbedder
 
-MODEL = "intfloat/e5-mistral-7b-instruct"
+MODEL = "sergeyzh/rubert-tiny-turbo"
+API_BASE_URL = "http://localhost:8001/v1"
 
 
 def _fake_response(embeddings: list[list[float]], prompt_tokens: int = 5, total_tokens: int = 5):
@@ -122,7 +122,13 @@ def test_from_dict(self, monkeypatch):
         assert embedder.api_key == Secret.from_env_var("VLLM_API_KEY", strict=False)
         assert embedder.model == MODEL
         assert embedder.api_base_url == "http://localhost:8000/v1"
+        assert embedder.prefix == ""
+        assert embedder.suffix == ""
         assert embedder.dimensions == 32
+        assert embedder.timeout is None
+        assert embedder.max_retries is None
+        assert embedder.http_client_kwargs is None
+        assert embedder.extra_parameters is None
 
     def test_prepare_input_adds_dimensions_and_extra_body(self):
         embedder = VLLMTextEmbedder(
@@ -168,13 +174,9 @@ async def test_run_async(self):
         result = await embedder.run_async("world")
         assert result["embedding"] == [0.3, 0.4]
 
-    @pytest.mark.skipif(
-        not os.environ.get("VLLM_API_BASE_URL", None),
-        reason="Export VLLM_API_BASE_URL pointing to a running vLLM embedding server to run this test.",
-    )
     @pytest.mark.integration
     def test_run(self):
-        embedder = VLLMTextEmbedder(model=MODEL, api_base_url=os.environ["VLLM_API_BASE_URL"])
+        embedder = VLLMTextEmbedder(model=MODEL, api_base_url=API_BASE_URL)
         result = embedder.run("The food was delicious")
         assert isinstance(result["embedding"], list)
         assert all(isinstance(x, float) for x in result["embedding"])
diff --git a/integrations/vllm/tests/test_utils.py b/integrations/vllm/tests/test_utils.py
@@ -8,7 +8,6 @@
 
 
 def test_create_openai_clients_placeholder_when_no_key():
-    """When api_key is None or unresolved, a placeholder is used."""
     sync_client, async_client = _create_openai_clients(
         api_key=None, api_base_url="http://localhost:8000/v1", timeout=None, max_retries=None, http_client_kwargs=None
     )
@@ -18,7 +17,6 @@ def test_create_openai_clients_placeholder_when_no_key():
 
 
 def test_create_openai_clients_uses_resolved_key_and_forwards_options():
-    """When api_key resolves, it's used; timeout/max_retries forwarded only when set."""
     sync_client, _ = _create_openai_clients(
         api_key=Secret.from_token("real-key"),
         api_base_url="http://vllm:8000/v1",