diff --git a/integrations/stackit/pyproject.toml b/integrations/stackit/pyproject.toml index d2218ecfc0..0dde17cbbf 100644 --- a/integrations/stackit/pyproject.toml +++ b/integrations/stackit/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai"] +dependencies = ["haystack-ai>=2.13.0"] [project.urls] Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/stackit#readme" diff --git a/integrations/stackit/src/haystack_integrations/components/embedders/stackit/document_embedder.py b/integrations/stackit/src/haystack_integrations/components/embedders/stackit/document_embedder.py index ec7171bfe4..a116d92a1f 100644 --- a/integrations/stackit/src/haystack_integrations/components/embedders/stackit/document_embedder.py +++ b/integrations/stackit/src/haystack_integrations/components/embedders/stackit/document_embedder.py @@ -1,9 +1,9 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import List, Optional +from typing import Any, Dict, List, Optional -from haystack import component +from haystack import component, default_to_dict from haystack.components.embedders import OpenAIDocumentEmbedder from haystack.utils.auth import Secret @@ -41,6 +41,10 @@ def __init__( progress_bar: bool = True, meta_fields_to_embed: Optional[List[str]] = None, embedding_separator: str = "\n", + *, + timeout: Optional[float] = None, + max_retries: Optional[int] = None, + http_client_kwargs: Optional[Dict[str, Any]] = None, ): """ Creates a STACKITDocumentEmbedder component. @@ -65,6 +69,15 @@ def __init__( List of meta fields that should be embedded along with the Document text. :param embedding_separator: Separator used to concatenate the meta fields to the Document text. + :param timeout: + Timeout for STACKIT client calls. If not set, it defaults to either the `OPENAI_TIMEOUT` environment + variable, or 30 seconds. + :param max_retries: + Maximum number of retries to contact STACKIT after an internal error. + If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5. + :param http_client_kwargs: + A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`. + For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client). """ super(STACKITDocumentEmbedder, self).__init__( # noqa: UP008 api_key=api_key, @@ -78,4 +91,32 @@ def __init__( progress_bar=progress_bar, meta_fields_to_embed=meta_fields_to_embed, embedding_separator=embedding_separator, + timeout=timeout, + max_retries=max_retries, + http_client_kwargs=http_client_kwargs, + ) + # We add these since they were only added in Haystack 2.14.0 + self.timeout = timeout + self.max_retries = max_retries + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + :returns: + Dictionary with serialized data. + """ + return default_to_dict( + self, + model=self.model, + api_key=self.api_key.to_dict(), + api_base_url=self.api_base_url, + prefix=self.prefix, + suffix=self.suffix, + batch_size=self.batch_size, + progress_bar=self.progress_bar, + meta_fields_to_embed=self.meta_fields_to_embed, + embedding_separator=self.embedding_separator, + timeout=self.timeout, + max_retries=self.max_retries, + http_client_kwargs=self.http_client_kwargs, ) diff --git a/integrations/stackit/src/haystack_integrations/components/embedders/stackit/text_embedder.py b/integrations/stackit/src/haystack_integrations/components/embedders/stackit/text_embedder.py index cea5b0f568..52e9581f16 100644 --- a/integrations/stackit/src/haystack_integrations/components/embedders/stackit/text_embedder.py +++ b/integrations/stackit/src/haystack_integrations/components/embedders/stackit/text_embedder.py @@ -1,9 +1,9 @@ # SPDX-FileCopyrightText: 2023-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Optional +from typing import Any, Dict, Optional -from haystack import component +from haystack import component, default_to_dict from haystack.components.embedders import OpenAITextEmbedder from haystack.utils.auth import Secret @@ -30,6 +30,10 @@ def __init__( api_base_url: Optional[str] = "https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1", prefix: str = "", suffix: str = "", + *, + timeout: Optional[float] = None, + max_retries: Optional[int] = None, + http_client_kwargs: Optional[Dict[str, Any]] = None, ): """ Creates a STACKITTextEmbedder component. @@ -45,6 +49,15 @@ def __init__( A string to add to the beginning of each text. :param suffix: A string to add to the end of each text. + :param timeout: + Timeout for STACKIT client calls. If not set, it defaults to either the `OPENAI_TIMEOUT` environment + variable, or 30 seconds. + :param max_retries: + Maximum number of retries to contact STACKIT after an internal error. + If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5. + :param http_client_kwargs: + A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`. + For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client). """ super(STACKITTextEmbedder, self).__init__( # noqa: UP008 api_key=api_key, @@ -54,4 +67,28 @@ def __init__( organization=None, prefix=prefix, suffix=suffix, + timeout=timeout, + max_retries=max_retries, + http_client_kwargs=http_client_kwargs, + ) + # We add these since they were only added in Haystack 2.14.0 + self.timeout = timeout + self.max_retries = max_retries + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + :returns: + Dictionary with serialized data. + """ + return default_to_dict( + self, + api_key=self.api_key.to_dict(), + model=self.model, + api_base_url=self.api_base_url, + prefix=self.prefix, + suffix=self.suffix, + timeout=self.timeout, + max_retries=self.max_retries, + http_client_kwargs=self.http_client_kwargs, ) diff --git a/integrations/stackit/src/haystack_integrations/components/generators/stackit/chat/chat_generator.py b/integrations/stackit/src/haystack_integrations/components/generators/stackit/chat/chat_generator.py index 40eaf5032c..e2ec1bd86d 100644 --- a/integrations/stackit/src/haystack_integrations/components/generators/stackit/chat/chat_generator.py +++ b/integrations/stackit/src/haystack_integrations/components/generators/stackit/chat/chat_generator.py @@ -1,11 +1,11 @@ # SPDX-FileCopyrightText: 2025-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 -from typing import Any, Callable, Dict, Optional +from typing import Any, Dict, Optional from haystack import component, default_to_dict from haystack.components.generators.chat import OpenAIChatGenerator -from haystack.dataclasses import StreamingChunk +from haystack.dataclasses import StreamingCallbackT from haystack.utils import serialize_callable from haystack.utils.auth import Secret @@ -40,9 +40,13 @@ def __init__( self, model: str, api_key: Secret = Secret.from_env_var("STACKIT_API_KEY"), - streaming_callback: Optional[Callable[[StreamingChunk], None]] = None, + streaming_callback: Optional[StreamingCallbackT] = None, api_base_url: Optional[str] = "https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1", generation_kwargs: Optional[Dict[str, Any]] = None, + *, + timeout: Optional[float] = None, + max_retries: Optional[int] = None, + http_client_kwargs: Optional[Dict[str, Any]] = None, ): """ Creates an instance of STACKITChatGenerator class. @@ -70,6 +74,15 @@ def __init__( events as they become available, with the stream terminated by a data: [DONE] message. - `safe_prompt`: Whether to inject a safety prompt before all conversations. - `random_seed`: The seed to use for random sampling. + :param timeout: + Timeout for STACKIT client calls. If not set, it defaults to either the `OPENAI_TIMEOUT` environment + variable, or 30 seconds. + :param max_retries: + Maximum number of retries to contact STACKIT after an internal error. + If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5. + :param http_client_kwargs: + A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`. + For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client). """ super(STACKITChatGenerator, self).__init__( # noqa: UP008 model=model, @@ -78,6 +91,9 @@ def __init__( api_base_url=api_base_url, organization=None, generation_kwargs=generation_kwargs, + timeout=timeout, + max_retries=max_retries, + http_client_kwargs=http_client_kwargs, ) def to_dict(self) -> Dict[str, Any]: @@ -100,4 +116,7 @@ def to_dict(self) -> Dict[str, Any]: api_base_url=self.api_base_url, generation_kwargs=self.generation_kwargs, api_key=self.api_key.to_dict(), + timeout=self.timeout, + max_retries=self.max_retries, + http_client_kwargs=self.http_client_kwargs, ) diff --git a/integrations/stackit/tests/test_stackit_chat_generator.py b/integrations/stackit/tests/test_stackit_chat_generator.py index 3409e61857..82cfc12ba9 100644 --- a/integrations/stackit/tests/test_stackit_chat_generator.py +++ b/integrations/stackit/tests/test_stackit_chat_generator.py @@ -93,6 +93,9 @@ def test_to_dict_default(self, monkeypatch): "streaming_callback": None, "api_base_url": "https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1", "generation_kwargs": {}, + "timeout": None, + "max_retries": None, + "http_client_kwargs": None, } for key, value in expected_params.items(): @@ -106,6 +109,9 @@ def test_to_dict_with_parameters(self, monkeypatch): streaming_callback=print_streaming_chunk, api_base_url="test-base-url", generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"}, + timeout=10.0, + max_retries=2, + http_client_kwargs={"proxy": "https://proxy.example.com:8080"}, ) data = component.to_dict() @@ -120,6 +126,9 @@ def test_to_dict_with_parameters(self, monkeypatch): "api_base_url": "test-base-url", "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk", "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"}, + "timeout": 10.0, + "max_retries": 2, + "http_client_kwargs": {"proxy": "https://proxy.example.com:8080"}, } for key, value in expected_params.items(): diff --git a/integrations/stackit/tests/test_stackit_document_embedder.py b/integrations/stackit/tests/test_stackit_document_embedder.py index e719c91177..d649094864 100644 --- a/integrations/stackit/tests/test_stackit_document_embedder.py +++ b/integrations/stackit/tests/test_stackit_document_embedder.py @@ -58,14 +58,14 @@ def test_to_dict(self, monkeypatch): "api_key": {"env_vars": ["STACKIT_API_KEY"], "strict": True, "type": "env_var"}, "model": "intfloat/e5-mistral-7b-instruct", "api_base_url": "https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1", - "dimensions": None, - "organization": None, "prefix": "", "suffix": "", "batch_size": 32, "progress_bar": True, "meta_fields_to_embed": [], "embedding_separator": "\n", + "timeout": None, + "max_retries": None, "http_client_kwargs": None, }, } @@ -82,6 +82,9 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): progress_bar=False, meta_fields_to_embed=["test_field"], embedding_separator="-", + timeout=10.0, + max_retries=2, + http_client_kwargs={"proxy": "https://proxy.example.com"}, ) component_dict = embedder.to_dict() assert component_dict == { @@ -89,18 +92,51 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "init_parameters": { "api_key": {"env_vars": ["ENV_VAR"], "strict": False, "type": "env_var"}, "model": "intfloat/e5-mistral-7b-instruct", - "dimensions": None, "api_base_url": "https://custom-api-base-url.com", - "organization": None, "prefix": "START", "suffix": "END", "batch_size": 64, "progress_bar": False, "meta_fields_to_embed": ["test_field"], "embedding_separator": "-", + "timeout": 10.0, + "max_retries": 2, + "http_client_kwargs": {"proxy": "https://proxy.example.com"}, + }, + } + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("STACKIT_API_KEY", "test-api-key") + data = { + "type": "haystack_integrations.components.embedders.stackit.document_embedder.STACKITDocumentEmbedder", + "init_parameters": { + "api_key": {"env_vars": ["STACKIT_API_KEY"], "strict": True, "type": "env_var"}, + "model": "intfloat/e5-mistral-7b-instruct", + "api_base_url": "https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1", + "prefix": "", + "suffix": "", + "batch_size": 32, + "progress_bar": True, + "meta_fields_to_embed": [], + "embedding_separator": "\n", + "timeout": None, + "max_retries": None, "http_client_kwargs": None, }, } + embedder = STACKITDocumentEmbedder.from_dict(data) + assert embedder.api_key == Secret.from_env_var(["STACKIT_API_KEY"]) + assert embedder.model == "intfloat/e5-mistral-7b-instruct" + assert embedder.api_base_url == "https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1" + assert embedder.prefix == "" + assert embedder.suffix == "" + assert embedder.batch_size == 32 + assert embedder.progress_bar is True + assert embedder.meta_fields_to_embed == [] + assert embedder.embedding_separator == "\n" + assert embedder.timeout is None + assert embedder.max_retries is None + assert embedder.http_client_kwargs is None @pytest.mark.skipif( not os.environ.get("STACKIT_API_KEY", None), diff --git a/integrations/stackit/tests/test_stackit_text_embedder.py b/integrations/stackit/tests/test_stackit_text_embedder.py index 24df9a4517..4e8d3235ea 100644 --- a/integrations/stackit/tests/test_stackit_text_embedder.py +++ b/integrations/stackit/tests/test_stackit_text_embedder.py @@ -44,10 +44,10 @@ def test_to_dict(self, monkeypatch): "api_key": {"env_vars": ["STACKIT_API_KEY"], "strict": True, "type": "env_var"}, "model": "intfloat/e5-mistral-7b-instruct", "api_base_url": "https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1", - "dimensions": None, - "organization": None, "prefix": "", "suffix": "", + "timeout": None, + "max_retries": None, "http_client_kwargs": None, }, } @@ -60,6 +60,9 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): api_base_url="https://custom-api-base-url.com", prefix="START", suffix="END", + timeout=10.0, + max_retries=2, + http_client_kwargs={"proxy": "https://proxy.example.com"}, ) component_dict = embedder.to_dict() assert component_dict == { @@ -68,13 +71,38 @@ def test_to_dict_with_custom_init_parameters(self, monkeypatch): "api_key": {"env_vars": ["ENV_VAR"], "strict": False, "type": "env_var"}, "model": "intfloat/e5-mistral-7b-instruct", "api_base_url": "https://custom-api-base-url.com", - "dimensions": None, - "organization": None, "prefix": "START", "suffix": "END", + "timeout": 10.0, + "max_retries": 2, + "http_client_kwargs": {"proxy": "https://proxy.example.com"}, + }, + } + + def test_from_dict(self, monkeypatch): + monkeypatch.setenv("STACKIT_API_KEY", "test-secret-key") + data = { + "type": "haystack_integrations.components.embedders.stackit.text_embedder.STACKITTextEmbedder", + "init_parameters": { + "api_key": {"env_vars": ["STACKIT_API_KEY"], "strict": True, "type": "env_var"}, + "model": "intfloat/e5-mistral-7b-instruct", + "api_base_url": "https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1", + "prefix": "", + "suffix": "", + "timeout": None, + "max_retries": None, "http_client_kwargs": None, }, } + embedder = STACKITTextEmbedder.from_dict(data) + assert embedder.api_key == Secret.from_env_var(["STACKIT_API_KEY"]) + assert embedder.api_base_url == "https://api.openai-compat.model-serving.eu01.onstackit.cloud/v1" + assert embedder.model == "intfloat/e5-mistral-7b-instruct" + assert embedder.prefix == "" + assert embedder.suffix == "" + assert embedder.timeout is None + assert embedder.max_retries is None + assert embedder.http_client_kwargs is None @pytest.mark.skipif( not os.environ.get("STACKIT_API_KEY", None),