Merge branch 'main' into hatch-commands-exp

anakin87 · anakin87 · commit 30fdc8eccd71 · 2025-06-06T08:05:19.000+02:00
diff --git a/.github/workflows/nvidia.yml b/.github/workflows/nvidia.yml
@@ -57,13 +57,13 @@ jobs:
         if: matrix.python-version == '3.9' && runner.os == 'Linux'
         run: hatch run fmt-check && hatch run lint:typing
 
-      - name: Run tests
-        run: hatch run test:cov-retry
-
       - name: Generate docs
         if: matrix.python-version == '3.9' && runner.os == 'Linux'
         run: hatch run docs
 
+      - name: Run tests
+        run: hatch run test:cov-retry
+
       - name: Run unit tests with lowest direct dependencies
         run: |
           hatch run uv pip compile pyproject.toml --resolution lowest-direct --output-file requirements_lowest_direct.txt
diff --git a/integrations/google_genai/CHANGELOG.md b/integrations/google_genai/CHANGELOG.md
@@ -1,5 +1,13 @@
 # Changelog
 
+## [integrations/google_genai-v1.0.1] - 2025-06-05
+
+### 🌀 Miscellaneous
+
+- Style: Update to linting to allow function calls in default arguments (#1899)
+- Add examples, set safety_settings
+- Add print in examples
+
 ## [integrations/google_genai-v1.0.0] - 2025-06-02
 
 ### 🚀 Features
diff --git a/integrations/google_genai/examples/chatgenerator_example.py b/integrations/google_genai/examples/chatgenerator_example.py
@@ -0,0 +1,32 @@
+# To run this example, you will need to
+# 1) set `GOOGLE_API_KEY` environment variable
+# 2) install the google_genai_haystack integration: pip install google-genai-haystack
+# Note: if you change the model, update the model-specific inference parameters.
+
+
+from haystack.dataclasses import ChatMessage
+
+from haystack_integrations.components.generators.google_genai import GoogleGenAIChatGenerator
+
+generator = GoogleGenAIChatGenerator(
+    model="gemini-2.0-flash",
+    # model-specific inference parameters
+    generation_kwargs={
+        "temperature": 0.7,
+    },
+)
+
+system_prompt = """
+You are a helpful assistant that helps users learn more about Google Cloud services.
+Your audience is engineers with a decent technical background.
+Be very concise and specific in your answers, keeping them short.
+You may use technical terms, jargon, and abbreviations that are common among practitioners.
+"""
+
+messages = [
+    ChatMessage.from_system(system_prompt),
+    ChatMessage.from_user("Which service should I use to train custom Machine Learning models?"),
+]
+
+results = generator.run(messages)
+print(results["replies"][0].text)
diff --git a/integrations/google_genai/pyproject.toml b/integrations/google_genai/pyproject.toml
@@ -135,6 +135,8 @@ ignore = [
   "ARG001",
   "ARG002",
   "ARG005",
+  # Allow function call argument defaults e.g. `Secret.from_env_var`
+  "B008",
 ]
 unfixable = [
   # Don't touch unused imports
@@ -150,6 +152,8 @@ ban-relative-imports = "parents"
 [tool.ruff.lint.per-file-ignores]
 # Tests can use magic values, assertions, and relative imports
 "tests/**/*" = ["PLR2004", "S101", "TID252"]
+# Examples can use print statements
+"examples/**/*" = ["T201"]
 
 [tool.coverage.run]
 source = ["haystack_integrations"]
@@ -180,4 +184,4 @@ markers = [
   "generators: generators tests",
 ]
 log_cli = true
-asyncio_mode = "auto" 
+asyncio_mode = "auto" 
diff --git a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/chat_generator.py b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/chat_generator.py
@@ -230,7 +230,7 @@ def weather_function(city: str):
     def __init__(
         self,
         *,
-        api_key: Secret = Secret.from_env_var("GOOGLE_API_KEY"),  # noqa: B008
+        api_key: Secret = Secret.from_env_var("GOOGLE_API_KEY"),
         model: str = "gemini-2.0-flash",
         generation_kwargs: Optional[Dict[str, Any]] = None,
         safety_settings: Optional[List[Dict[str, Any]]] = None,
@@ -509,6 +509,10 @@ def run(
             if system_instruction:
                 config_params["system_instruction"] = system_instruction
 
+            # Add safety settings if provided
+            if safety_settings:
+                config_params["safety_settings"] = safety_settings
+
             # Add tools if provided
             if tools:
                 config_params["tools"] = _convert_tools_to_google_genai_format(tools)
@@ -593,6 +597,10 @@ async def run_async(
             if system_instruction:
                 config_params["system_instruction"] = system_instruction
 
+            # Add safety settings if provided
+            if safety_settings:
+                config_params["safety_settings"] = safety_settings
+
             # Add tools if provided
             if tools:
                 config_params["tools"] = _convert_tools_to_google_genai_format(tools)
diff --git a/integrations/nvidia/CHANGELOG.md b/integrations/nvidia/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## [integrations/nvidia-v0.2.0] - 2025-06-05
+
+### 🚀 Features
+
+- Add NvidiaChatGenerator based on OpenAIChatGenerator (#1776)
+
+
 ## [integrations/nvidia-v0.1.8] - 2025-05-28
 
 ### 🌀 Miscellaneous
diff --git a/integrations/nvidia/pyproject.toml b/integrations/nvidia/pyproject.toml
@@ -23,7 +23,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai", "requests>=2.25.0", "tqdm>=4.21.0"]
+dependencies = ["haystack-ai>=2.13.0", "requests>=2.25.0", "tqdm>=4.21.0"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia#readme"
@@ -168,6 +168,7 @@ module = [
   "pytest.*",
   "numpy.*",
   "requests_mock.*",
+  "openai.*",
   "pydantic.*",
 ]
 ignore_missing_imports = true
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/__init__.py
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from .chat.chat_generator import NvidiaChatGenerator
 from .generator import NvidiaGenerator
 
-__all__ = ["NvidiaGenerator"]
+__all__ = ["NvidiaChatGenerator", "NvidiaGenerator"]
diff --git a/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py b/integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py
@@ -0,0 +1,133 @@
+# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+from typing import Any, Dict, List, Optional, Union
+
+from haystack import component, default_to_dict, logging
+from haystack.components.generators.chat import OpenAIChatGenerator
+from haystack.dataclasses import StreamingCallbackT
+from haystack.tools import Tool, Toolset, serialize_tools_or_toolset
+from haystack.utils import serialize_callable
+from haystack.utils.auth import Secret
+
+from haystack_integrations.utils.nvidia import DEFAULT_API_URL
+
+logger = logging.getLogger(__name__)
+
+
+@component
+class NvidiaChatGenerator(OpenAIChatGenerator):
+    """
+    Enables text generation using NVIDIA generative models.
+    For supported models, see [NVIDIA Docs](https://build.nvidia.com/models).
+
+    Users can pass any text generation parameters valid for the NVIDIA Chat Completion API
+    directly to this component via the `generation_kwargs` parameter in `__init__` or the `generation_kwargs`
+    parameter in `run` method.
+
+    This component uses the ChatMessage format for structuring both input and output,
+    ensuring coherent and contextually relevant responses in chat-based text generation scenarios.
+    Details on the ChatMessage format can be found in the
+    [Haystack docs](https://docs.haystack.deepset.ai/docs/data-classes#chatmessage)
+
+    For more details on the parameters supported by the NVIDIA API, refer to the
+    [NVIDIA Docs](https://build.nvidia.com/models).
+
+    Usage example:
+    ```python
+    from haystack_integrations.components.generators.nvidia import NvidiaChatGenerator
+    from haystack.dataclasses import ChatMessage
+
+    messages = [ChatMessage.from_user("What's Natural Language Processing?")]
+
+    client = NvidiaChatGenerator()
+    response = client.run(messages)
+    print(response)
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        api_key: Secret = Secret.from_env_var("NVIDIA_API_KEY"),
+        model: str = "meta/llama-3.1-8b-instruct",
+        streaming_callback: Optional[StreamingCallbackT] = None,
+        api_base_url: Optional[str] = os.getenv("NVIDIA_API_URL", DEFAULT_API_URL),
+        generation_kwargs: Optional[Dict[str, Any]] = None,
+        tools: Optional[Union[List[Tool], Toolset]] = None,
+        timeout: Optional[float] = None,
+        max_retries: Optional[int] = None,
+        http_client_kwargs: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        Creates an instance of NvidiaChatGenerator.
+
+        :param api_key:
+            The NVIDIA API key.
+        :param model:
+            The name of the NVIDIA chat completion model to use.
+        :param streaming_callback:
+            A callback function that is called when a new token is received from the stream.
+            The callback function accepts StreamingChunk as an argument.
+        :param api_base_url:
+            The NVIDIA API Base url.
+        :param generation_kwargs:
+            Other parameters to use for the model. These parameters are all sent directly to
+            the NVIDIA API endpoint. See [NVIDIA API docs](https://docs.nvcf.nvidia.com/ai/generative-models/)
+            for more details.
+            Some of the supported parameters:
+            - `max_tokens`: The maximum number of tokens the output text can have.
+            - `temperature`: What sampling temperature to use. Higher values mean the model will take more risks.
+                Try 0.9 for more creative applications and 0 (argmax sampling) for ones with a well-defined answer.
+            - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
+                considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens
+                comprising the top 10% probability mass are considered.
+            - `stream`: Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent
+                events as they become available, with the stream terminated by a data: [DONE] message.
+        :param tools:
+            A list of tools or a Toolset for which the model can prepare calls. This parameter can accept either a
+            list of `Tool` objects or a `Toolset` instance.
+        :param timeout:
+            The timeout for the NVIDIA API call.
+        :param max_retries:
+            Maximum number of retries to contact NVIDIA after an internal error.
+            If not set, it defaults to either the `NVIDIA_MAX_RETRIES` environment variable, or set to 5.
+        :param http_client_kwargs:
+            A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`.
+            For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client).
+        """
+        super(NvidiaChatGenerator, self).__init__(  # noqa: UP008
+            api_key=api_key,
+            model=model,
+            streaming_callback=streaming_callback,
+            api_base_url=api_base_url,
+            generation_kwargs=generation_kwargs,
+            tools=tools,
+            timeout=timeout,
+            max_retries=max_retries,
+            http_client_kwargs=http_client_kwargs,
+        )
+
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Serialize this component to a dictionary.
+
+        :returns:
+            The serialized component as a dictionary.
+        """
+        callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
+
+        return default_to_dict(
+            self,
+            model=self.model,
+            streaming_callback=callback_name,
+            api_base_url=self.api_base_url,
+            generation_kwargs=self.generation_kwargs,
+            api_key=self.api_key.to_dict(),
+            tools=serialize_tools_or_toolset(self.tools),
+            timeout=self.timeout,
+            max_retries=self.max_retries,
+            http_client_kwargs=self.http_client_kwargs,
+        )
diff --git a/integrations/nvidia/tests/test_nvidia_chat_generator.py b/integrations/nvidia/tests/test_nvidia_chat_generator.py

Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`	`#`
`3`	`3`	`# SPDX-License-Identifier: Apache-2.0`
`4`	`4`
	`5`	`+from .chat.chat_generator import NvidiaChatGenerator`
`5`	`6`	`from .generator import NvidiaGenerator`
`6`	`7`
`7`		`-__all__ = ["NvidiaGenerator"]`
	`8`	`+__all__ = ["NvidiaChatGenerator", "NvidiaGenerator"]`