Skip to content

Commit c8db40a

Browse files
authored
fest: add structured output support in NvidiaChatGenerator (#2405)
* Add structured output * Update ruff version * restore python 3.9 * Fix linting * Fix linting * Restore * Restore * Restore * Add a new test * Update nvidia.yml * Update tools * remove openrouter
1 parent f574f52 commit c8db40a

4 files changed

Lines changed: 117 additions & 5 deletions

File tree

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# SPDX-FileCopyrightText: 2024-present deepset GmbH <info@deepset.ai>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
6+
# This example demonstrates how to use the NvidiaChatGenerator component
7+
# with structured outputs.
8+
# To run this example, you will need to
9+
# set `NVIDIA_API_KEY` environment variable
10+
11+
from haystack.dataclasses import ChatMessage
12+
13+
from haystack_integrations.components.generators.nvidia import NvidiaChatGenerator
14+
15+
json_schema = {
16+
"type": "object",
17+
"properties": {"title": {"type": "string"}, "rating": {"type": "number"}},
18+
"required": ["title", "rating"],
19+
}
20+
chat_messages = [
21+
ChatMessage.from_user(
22+
"""
23+
Return the title and the rating based on the following movie review according
24+
to the provided json schema.
25+
Review: Inception is a really well made film. I rate it four stars out of five."""
26+
)
27+
]
28+
29+
component = NvidiaChatGenerator(
30+
model="meta/llama-3.1-70b-instruct",
31+
generation_kwargs={"extra_body": {"nvext": {"guided_json": json_schema}}},
32+
)
33+
results = component.run(chat_messages)
34+
# print(results)

integrations/nvidia/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ classifiers = [
2323
"Programming Language :: Python :: Implementation :: CPython",
2424
"Programming Language :: Python :: Implementation :: PyPy",
2525
]
26-
dependencies = ["haystack-ai>=2.13.0", "requests>=2.25.0", "tqdm>=4.21.0"]
26+
dependencies = ["haystack-ai>=2.19.0", "requests>=2.25.0", "tqdm>=4.21.0"]
2727

2828
[project.urls]
2929
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/nvidia#readme"
@@ -160,4 +160,4 @@ addopts = "--strict-markers"
160160
markers = [
161161
"integration: integration tests",
162162
]
163-
log_cli = true
163+
log_cli = true

integrations/nvidia/src/haystack_integrations/components/generators/nvidia/chat/chat_generator.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@
33
# SPDX-License-Identifier: Apache-2.0
44

55
import os
6-
from typing import Any, Dict, List, Optional, Union
6+
from typing import Any, Dict, Optional
77

88
from haystack import component, default_to_dict, logging
99
from haystack.components.generators.chat import OpenAIChatGenerator
1010
from haystack.dataclasses import StreamingCallbackT
11-
from haystack.tools import Tool, Toolset, serialize_tools_or_toolset
11+
from haystack.tools import ToolsType, serialize_tools_or_toolset
1212
from haystack.utils import serialize_callable
1313
from haystack.utils.auth import Secret
1414

@@ -56,7 +56,7 @@ def __init__(
5656
streaming_callback: Optional[StreamingCallbackT] = None,
5757
api_base_url: Optional[str] = os.getenv("NVIDIA_API_URL", DEFAULT_API_URL),
5858
generation_kwargs: Optional[Dict[str, Any]] = None,
59-
tools: Optional[Union[List[Tool], Toolset]] = None,
59+
tools: Optional[ToolsType] = None,
6060
timeout: Optional[float] = None,
6161
max_retries: Optional[int] = None,
6262
http_client_kwargs: Optional[Dict[str, Any]] = None,
@@ -86,6 +86,22 @@ def __init__(
8686
comprising the top 10% probability mass are considered.
8787
- `stream`: Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent
8888
events as they become available, with the stream terminated by a data: [DONE] message.
89+
- `response_format`: For NVIDIA NIM servers, this parameter has limited support.
90+
- The basic JSON mode with `{"type": "json_object"}` is supported by compatible models, to produce
91+
valid JSON output.
92+
To pass the JSON schema to the model, use the `guided_json` parameter in `extra_body`.
93+
For example:
94+
```python
95+
generation_kwargs={
96+
"extra_body": {
97+
"nvext": {
98+
"guided_json": {
99+
json_schema
100+
}
101+
}
102+
}
103+
```
104+
For more details, see the [NVIDIA NIM documentation](https://docs.nvidia.com/nim/large-language-models/latest/structured-generation.html).
89105
:param tools:
90106
A list of tools or a Toolset for which the model can prepare calls. This parameter can accept either a
91107
list of `Tool` objects or a `Toolset` instance.

integrations/nvidia/tests/test_nvidia_chat_generator.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
import json
56
import os
67
from datetime import datetime
78
from unittest.mock import AsyncMock, patch
@@ -267,6 +268,67 @@ def __call__(self, chunk: StreamingChunk) -> None:
267268
assert callback.counter > 1
268269
assert "Paris" in callback.responses
269270

271+
@pytest.mark.skipif(
272+
not os.environ.get("NVIDIA_API_KEY", None),
273+
reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
274+
)
275+
@pytest.mark.integration
276+
def test_live_run_with_guided_json_schema(self):
277+
json_schema = {
278+
"type": "object",
279+
"properties": {"title": {"type": "string"}, "rating": {"type": "number"}},
280+
"required": ["title", "rating"],
281+
}
282+
chat_messages = [
283+
ChatMessage.from_user(
284+
"""
285+
Return the title and the rating based on the following movie review according
286+
to the provided json schema.
287+
Review: Inception is a really well made film. I rate it four stars out of five."""
288+
)
289+
]
290+
291+
component = NvidiaChatGenerator(
292+
model="meta/llama-3.1-70b-instruct",
293+
generation_kwargs={"extra_body": {"nvext": {"guided_json": json_schema}}},
294+
)
295+
296+
results = component.run(chat_messages)
297+
assert len(results["replies"]) == 1
298+
message = results["replies"][0].text
299+
output = json.loads(message)
300+
assert output["title"] == "Inception"
301+
assert "rating" in output
302+
303+
@pytest.mark.skipif(
304+
not os.environ.get("NVIDIA_API_KEY", None),
305+
reason="Export an env var called NVIDIA_API_KEY containing the NVIDIA API key to run this test.",
306+
)
307+
@pytest.mark.integration
308+
def test_live_run_with_json_object(self):
309+
chat_messages = [
310+
ChatMessage.from_user(
311+
"""
312+
Return the title and the rating based on the following movie review according
313+
to the provided json schema.
314+
Review: Inception is a really well made film. I rate it four stars out of five."""
315+
)
316+
]
317+
318+
component = NvidiaChatGenerator(
319+
model="meta/llama-3.1-70b-instruct",
320+
generation_kwargs={"response_format": {"type": "json_object"}},
321+
)
322+
323+
results = component.run(chat_messages)
324+
assert len(results["replies"]) == 1
325+
message = results["replies"][0].text
326+
output = json.loads(message)
327+
assert "title" in output
328+
assert "rating" in output
329+
assert isinstance(output["rating"], int)
330+
assert "Inception" in output["title"]
331+
270332

271333
class TestNvidiaChatGeneratorAsync:
272334
def test_init_default_async(self, monkeypatch):

0 commit comments

Comments
 (0)