Skip to content

Commit 797ddfc

Browse files
fix: set include_thoughts=False when thinking_budget is 0 (#2853)
* fix: set include_thoughts=False when thinking_budget is 0 When thinking_budget is set to 0 (disabling thinking), include_thoughts was hardcoded to True, causing a 400 INVALID_ARGUMENT error from the Vertex AI API. Now include_thoughts is set based on whether thinking is actually enabled (thinking_budget != 0). Closes deepset-ai/haystack#2845 * fix: support explicit include_thoughts and auto-derive for thinking_level - Allow users to explicitly set include_thoughts via generation_kwargs, overriding the auto-derived value for both thinking_budget and thinking_level branches. - Auto-derive include_thoughts=False when thinking_level is MINIMAL, matching the existing behavior for thinking_budget=0. - Pop include_thoughts from generation_kwargs to prevent it leaking as an unknown kwarg to GenerateContentConfig. - Add include_thoughts assertions to thinking_level tests and new tests for explicit user overrides. * missing imports and fmt * dont mutate input dict * keep only _process_thinking_config in utils * remove unnecessary copy() from tests * fmt * handle google-genai versions that dont implement MINIMAL * bump google-genai to use ThinkingLevel.MINIMAL * extend tests in test_chat_generator_utils.py * fmt --------- Co-authored-by: Julian Risch <julian.risch@deepset.ai>
1 parent 9cd8d6e commit 797ddfc

3 files changed

Lines changed: 94 additions & 21 deletions

File tree

integrations/google_genai/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ classifiers = [
2525
"Programming Language :: Python :: Implementation :: CPython",
2626
"Programming Language :: Python :: Implementation :: PyPy",
2727
]
28-
dependencies = ["haystack-ai>=2.24.1", "google-genai[aiohttp]>=1.51.0", "jsonref>=1.0.0"]
28+
dependencies = ["haystack-ai>=2.24.1", "google-genai[aiohttp]>=1.56.0", "jsonref>=1.0.0"]
2929

3030
[project.urls]
3131
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/google_genai#readme"

integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,19 @@ def _process_thinking_config(generation_kwargs: dict[str, Any]) -> dict[str, Any
5858
"""
5959
Process thinking configuration from generation_kwargs.
6060
61+
Does not mutate the input dict; returns a new dict with thinking_config
62+
applied when applicable. Supports explicit ``include_thoughts`` in
63+
generation_kwargs to override the default derived from thinking_budget
64+
or thinking_level.
65+
6166
:param generation_kwargs: The generation configuration dictionary.
62-
:returns: Updated generation_kwargs with thinking_config if applicable.
67+
:returns: A new dict with thinking_config if applicable; caller's dict is unchanged.
6368
"""
69+
generation_kwargs = dict(generation_kwargs)
70+
# Extract include_thoughts from generation_kwargs if explicitly set by the user.
71+
# This must be popped before creating ThinkingConfig so it doesn't leak as an unknown kwarg.
72+
explicit_include_thoughts = generation_kwargs.pop("include_thoughts", None)
73+
6474
if "thinking_budget" in generation_kwargs:
6575
thinking_budget = generation_kwargs.pop("thinking_budget")
6676

@@ -72,8 +82,14 @@ def _process_thinking_config(generation_kwargs: dict[str, Any]) -> dict[str, Any
7282
# fall back to default: dynamic thinking budget allocation
7383
thinking_budget = -1
7484

75-
# Create thinking config
76-
thinking_config = types.ThinkingConfig(thinking_budget=thinking_budget, include_thoughts=True)
85+
# Determine include_thoughts: respect explicit user override, otherwise auto-derive
86+
if explicit_include_thoughts is not None:
87+
include_thoughts = explicit_include_thoughts
88+
else:
89+
# When thinking_budget is 0, thinking is disabled so include_thoughts must be False
90+
include_thoughts = thinking_budget != 0
91+
92+
thinking_config = types.ThinkingConfig(thinking_budget=thinking_budget, include_thoughts=include_thoughts)
7793
generation_kwargs["thinking_config"] = thinking_config
7894

7995
if "thinking_level" in generation_kwargs:
@@ -103,8 +119,13 @@ def _process_thinking_config(generation_kwargs: dict[str, Any]) -> dict[str, Any
103119
# Parse valid string to ThinkingLevel enum
104120
thinking_level = types.ThinkingLevel(thinking_level_upper)
105121

106-
# Create thinking config with level
107-
thinking_config = types.ThinkingConfig(thinking_level=thinking_level, include_thoughts=True)
122+
# Determine include_thoughts: respect explicit user override, otherwise auto-derive
123+
if explicit_include_thoughts is not None:
124+
include_thoughts = explicit_include_thoughts
125+
else:
126+
include_thoughts = thinking_level != types.ThinkingLevel.MINIMAL
127+
128+
thinking_config = types.ThinkingConfig(thinking_level=thinking_level, include_thoughts=include_thoughts)
108129
generation_kwargs["thinking_config"] = thinking_config
109130

110131
return generation_kwargs

integrations/google_genai/tests/test_chat_generator_utils.py

Lines changed: 67 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -32,82 +32,134 @@
3232

3333

3434
def test_process_thinking_budget():
35-
"""Test the _process_thinking_config method with different thinking_budget values."""
35+
"""Test the _process_thinking_config function with different thinking_budget values."""
3636

3737
# Test valid thinking_budget values
3838
generation_kwargs = {"thinking_budget": 1024, "temperature": 0.7}
39-
result = _process_thinking_config(generation_kwargs.copy())
39+
result = _process_thinking_config(generation_kwargs)
4040

4141
# thinking_budget should be moved to thinking_config
4242
assert "thinking_budget" not in result
4343
assert "thinking_config" in result
4444
assert result["thinking_config"].thinking_budget == 1024
45+
assert result["thinking_config"].include_thoughts is True
4546
# Other kwargs should be preserved
4647
assert result["temperature"] == 0.7
4748

4849
# Test dynamic allocation (-1)
4950
generation_kwargs = {"thinking_budget": -1}
50-
result = _process_thinking_config(generation_kwargs.copy())
51+
result = _process_thinking_config(generation_kwargs)
5152
assert result["thinking_config"].thinking_budget == -1
53+
assert result["thinking_config"].include_thoughts is True
5254

5355
# Test zero (disable thinking)
5456
generation_kwargs = {"thinking_budget": 0}
55-
result = _process_thinking_config(generation_kwargs.copy())
57+
result = _process_thinking_config(generation_kwargs)
5658
assert result["thinking_config"].thinking_budget == 0
59+
assert result["thinking_config"].include_thoughts is False
5760

5861
# Test large value
5962
generation_kwargs = {"thinking_budget": 24576}
60-
result = _process_thinking_config(generation_kwargs.copy())
63+
result = _process_thinking_config(generation_kwargs)
6164
assert result["thinking_config"].thinking_budget == 24576
65+
assert result["thinking_config"].include_thoughts is True
6266

6367
# Test when thinking_budget is not present
6468
generation_kwargs = {"temperature": 0.5}
65-
result = _process_thinking_config(generation_kwargs.copy())
69+
result = _process_thinking_config(generation_kwargs)
6670
assert result == generation_kwargs # No changes
6771

6872
# Test invalid type (should fall back to dynamic)
6973
generation_kwargs = {"thinking_budget": "invalid", "temperature": 0.5}
70-
result = _process_thinking_config(generation_kwargs.copy())
74+
result = _process_thinking_config(generation_kwargs)
7175
assert result["thinking_config"].thinking_budget == -1 # Dynamic allocation
7276
assert result["temperature"] == 0.5
7377

7478

7579
def test_process_thinking_level():
76-
"""Test the _process_thinking_config method with different thinking_level values."""
80+
"""Test the _process_thinking_config function with different thinking_level values."""
7781

7882
# Test valid thinking_level values
7983
generation_kwargs = {"thinking_level": "high", "temperature": 0.7}
80-
result = _process_thinking_config(generation_kwargs.copy())
84+
result = _process_thinking_config(generation_kwargs)
8185

8286
# thinking_level should be moved to thinking_config
8387
assert "thinking_level" not in result
8488
assert "thinking_config" in result
8589
assert result["thinking_config"].thinking_level == types.ThinkingLevel.HIGH
90+
assert result["thinking_config"].include_thoughts is True
8691
# Other kwargs should be preserved
8792
assert result["temperature"] == 0.7
8893

8994
# Test THINKING_LEVEL_LOW in upper case
9095
generation_kwargs = {"thinking_level": "LOW"}
91-
result = _process_thinking_config(generation_kwargs.copy())
96+
result = _process_thinking_config(generation_kwargs)
9297
assert result["thinking_config"].thinking_level == types.ThinkingLevel.LOW
98+
assert result["thinking_config"].include_thoughts is True
9399

94-
# Test THINKING_LEVEL_UNSPECIFIED
100+
# Test MINIMAL (should disable include_thoughts)
101+
generation_kwargs = {"thinking_level": "MINIMAL"}
102+
result = _process_thinking_config(generation_kwargs)
103+
assert result["thinking_config"].thinking_level == types.ThinkingLevel.MINIMAL
104+
assert result["thinking_config"].include_thoughts is False
105+
106+
# Test THINKING_LEVEL_UNSPECIFIED (invalid value falls back)
95107
generation_kwargs = {"thinking_level": "test"}
96-
result = _process_thinking_config(generation_kwargs.copy())
108+
result = _process_thinking_config(generation_kwargs)
97109
assert result["thinking_config"].thinking_level == types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED
110+
assert result["thinking_config"].include_thoughts is True
98111

99112
# Test when thinking_level is not present
100113
generation_kwargs = {"temperature": 0.5}
101-
result = _process_thinking_config(generation_kwargs.copy())
114+
result = _process_thinking_config(generation_kwargs)
102115
assert result == generation_kwargs # No changes
103116

104117
# Test invalid type (should fall back to THINKING_LEVEL_UNSPECIFIED)
105118
generation_kwargs = {"thinking_level": 123, "temperature": 0.5}
106-
result = _process_thinking_config(generation_kwargs.copy())
119+
result = _process_thinking_config(generation_kwargs)
107120
assert result["thinking_config"].thinking_level == types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED
121+
assert result["thinking_config"].include_thoughts is True
108122
assert result["temperature"] == 0.5
109123

110124

125+
def test_process_thinking_config_explicit_include_thoughts():
126+
"""Test that explicit include_thoughts in generation_kwargs overrides the auto-derived value."""
127+
# thinking_budget=0 normally means include_thoughts=False, but user explicitly sets True
128+
generation_kwargs = {"thinking_budget": 0, "include_thoughts": True}
129+
result = _process_thinking_config(generation_kwargs)
130+
assert result["thinking_config"].thinking_budget == 0
131+
assert result["thinking_config"].include_thoughts is True
132+
assert "include_thoughts" not in result # should be popped from top-level kwargs
133+
134+
# thinking_budget=1024 normally means include_thoughts=True, but user explicitly sets False
135+
generation_kwargs = {"thinking_budget": 1024, "include_thoughts": False}
136+
result = _process_thinking_config(generation_kwargs)
137+
assert result["thinking_config"].thinking_budget == 1024
138+
assert result["thinking_config"].include_thoughts is False
139+
assert "include_thoughts" not in result
140+
141+
# thinking_level="high" normally means include_thoughts=True, but user explicitly sets False
142+
generation_kwargs = {"thinking_level": "high", "include_thoughts": False}
143+
result = _process_thinking_config(generation_kwargs)
144+
assert result["thinking_config"].thinking_level == types.ThinkingLevel.HIGH
145+
assert result["thinking_config"].include_thoughts is False
146+
assert "include_thoughts" not in result
147+
148+
# thinking_level="minimal" normally means include_thoughts=False, but user explicitly sets True
149+
generation_kwargs = {"thinking_level": "minimal", "include_thoughts": True}
150+
result = _process_thinking_config(generation_kwargs)
151+
assert result["thinking_config"].thinking_level == types.ThinkingLevel.MINIMAL
152+
assert result["thinking_config"].include_thoughts is True
153+
assert "include_thoughts" not in result
154+
155+
# include_thoughts alone (no thinking_budget or thinking_level) should just be popped and ignored
156+
generation_kwargs = {"include_thoughts": True, "temperature": 0.5}
157+
result = _process_thinking_config(generation_kwargs)
158+
assert "include_thoughts" not in result
159+
assert "thinking_config" not in result
160+
assert result == {"temperature": 0.5}
161+
162+
111163
class TestStreamingChunkConversion:
112164
def test_convert_google_chunk_to_streaming_chunk_text_only(self, monkeypatch):
113165
monkeypatch.setenv("GOOGLE_API_KEY", "test-api-key")
@@ -438,7 +490,7 @@ def test_convert_google_chunk_to_streaming_chunk_real_example(self, monkeypatch)
438490
assert streaming_chunk.tool_calls[5].index == 5
439491

440492
def test_aggregate_streaming_chunks_with_reasoning(self):
441-
"""Test the _aggregate_streaming_chunks_with_reasoning method for reasoning content aggregation."""
493+
"""Test the _aggregate_streaming_chunks_with_reasoning function for reasoning content aggregation."""
442494

443495
# Create mock streaming chunks with reasoning content
444496
chunk1 = Mock()

0 commit comments

Comments
 (0)