fix: set include_thoughts=False when thinking_budget is 0 (#2853)

yaowubarbara · julian-risch · web-flow · commit 797ddfc0b01d · 2026-02-27T16:56:23.000+01:00
* fix: set include_thoughts=False when thinking_budget is 0 When thinking_budget is set to 0 (disabling thinking), include_thoughts was hardcoded to True, causing a 400 INVALID_ARGUMENT error from the Vertex AI API. Now include_thoughts is set based on whether thinking is actually enabled (thinking_budget != 0). Closes deepset-ai/haystack#2845 * fix: support explicit include_thoughts and auto-derive for thinking_level - Allow users to explicitly set include_thoughts via generation_kwargs, overriding the auto-derived value for both thinking_budget and thinking_level branches. - Auto-derive include_thoughts=False when thinking_level is MINIMAL, matching the existing behavior for thinking_budget=0. - Pop include_thoughts from generation_kwargs to prevent it leaking as an unknown kwarg to GenerateContentConfig. - Add include_thoughts assertions to thinking_level tests and new tests for explicit user overrides. * missing imports and fmt * dont mutate input dict * keep only _process_thinking_config in utils * remove unnecessary copy() from tests * fmt * handle google-genai versions that dont implement MINIMAL * bump google-genai to use ThinkingLevel.MINIMAL * extend tests in test_chat_generator_utils.py * fmt --------- Co-authored-by: Julian Risch <julian.risch@deepset.ai>
diff --git a/integrations/google_genai/pyproject.toml b/integrations/google_genai/pyproject.toml
@@ -25,7 +25,7 @@ classifiers = [
   "Programming Language :: Python :: Implementation :: CPython",
   "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.24.1", "google-genai[aiohttp]>=1.51.0", "jsonref>=1.0.0"]
+dependencies = ["haystack-ai>=2.24.1", "google-genai[aiohttp]>=1.56.0", "jsonref>=1.0.0"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/google_genai#readme"
diff --git a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py
@@ -58,9 +58,19 @@ def _process_thinking_config(generation_kwargs: dict[str, Any]) -> dict[str, Any
     """
     Process thinking configuration from generation_kwargs.
 
+    Does not mutate the input dict; returns a new dict with thinking_config
+    applied when applicable. Supports explicit ``include_thoughts`` in
+    generation_kwargs to override the default derived from thinking_budget
+    or thinking_level.
+
     :param generation_kwargs: The generation configuration dictionary.
-    :returns: Updated generation_kwargs with thinking_config if applicable.
+    :returns: A new dict with thinking_config if applicable; caller's dict is unchanged.
     """
+    generation_kwargs = dict(generation_kwargs)
+    # Extract include_thoughts from generation_kwargs if explicitly set by the user.
+    # This must be popped before creating ThinkingConfig so it doesn't leak as an unknown kwarg.
+    explicit_include_thoughts = generation_kwargs.pop("include_thoughts", None)
+
     if "thinking_budget" in generation_kwargs:
         thinking_budget = generation_kwargs.pop("thinking_budget")
 
@@ -72,8 +82,14 @@ def _process_thinking_config(generation_kwargs: dict[str, Any]) -> dict[str, Any
             # fall back to default: dynamic thinking budget allocation
             thinking_budget = -1
 
-        # Create thinking config
-        thinking_config = types.ThinkingConfig(thinking_budget=thinking_budget, include_thoughts=True)
+        # Determine include_thoughts: respect explicit user override, otherwise auto-derive
+        if explicit_include_thoughts is not None:
+            include_thoughts = explicit_include_thoughts
+        else:
+            # When thinking_budget is 0, thinking is disabled so include_thoughts must be False
+            include_thoughts = thinking_budget != 0
+
+        thinking_config = types.ThinkingConfig(thinking_budget=thinking_budget, include_thoughts=include_thoughts)
         generation_kwargs["thinking_config"] = thinking_config
 
     if "thinking_level" in generation_kwargs:
@@ -103,8 +119,13 @@ def _process_thinking_config(generation_kwargs: dict[str, Any]) -> dict[str, Any
                 # Parse valid string to ThinkingLevel enum
                 thinking_level = types.ThinkingLevel(thinking_level_upper)
 
-        # Create thinking config with level
-        thinking_config = types.ThinkingConfig(thinking_level=thinking_level, include_thoughts=True)
+        # Determine include_thoughts: respect explicit user override, otherwise auto-derive
+        if explicit_include_thoughts is not None:
+            include_thoughts = explicit_include_thoughts
+        else:
+            include_thoughts = thinking_level != types.ThinkingLevel.MINIMAL
+
+        thinking_config = types.ThinkingConfig(thinking_level=thinking_level, include_thoughts=include_thoughts)
         generation_kwargs["thinking_config"] = thinking_config
 
     return generation_kwargs
diff --git a/integrations/google_genai/tests/test_chat_generator_utils.py b/integrations/google_genai/tests/test_chat_generator_utils.py
@@ -32,82 +32,134 @@
 
 
 def test_process_thinking_budget():
-    """Test the _process_thinking_config method with different thinking_budget values."""
+    """Test the _process_thinking_config function with different thinking_budget values."""
 
     # Test valid thinking_budget values
     generation_kwargs = {"thinking_budget": 1024, "temperature": 0.7}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
 
     # thinking_budget should be moved to thinking_config
     assert "thinking_budget" not in result
     assert "thinking_config" in result
     assert result["thinking_config"].thinking_budget == 1024
+    assert result["thinking_config"].include_thoughts is True
     # Other kwargs should be preserved
     assert result["temperature"] == 0.7
 
     # Test dynamic allocation (-1)
     generation_kwargs = {"thinking_budget": -1}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
     assert result["thinking_config"].thinking_budget == -1
+    assert result["thinking_config"].include_thoughts is True
 
     # Test zero (disable thinking)
     generation_kwargs = {"thinking_budget": 0}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
     assert result["thinking_config"].thinking_budget == 0
+    assert result["thinking_config"].include_thoughts is False
 
     # Test large value
     generation_kwargs = {"thinking_budget": 24576}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
     assert result["thinking_config"].thinking_budget == 24576
+    assert result["thinking_config"].include_thoughts is True
 
     # Test when thinking_budget is not present
     generation_kwargs = {"temperature": 0.5}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
     assert result == generation_kwargs  # No changes
 
     # Test invalid type (should fall back to dynamic)
     generation_kwargs = {"thinking_budget": "invalid", "temperature": 0.5}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
     assert result["thinking_config"].thinking_budget == -1  # Dynamic allocation
     assert result["temperature"] == 0.5
 
 
 def test_process_thinking_level():
-    """Test the _process_thinking_config method with different thinking_level values."""
+    """Test the _process_thinking_config function with different thinking_level values."""
 
     # Test valid thinking_level values
     generation_kwargs = {"thinking_level": "high", "temperature": 0.7}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
 
     # thinking_level should be moved to thinking_config
     assert "thinking_level" not in result
     assert "thinking_config" in result
     assert result["thinking_config"].thinking_level == types.ThinkingLevel.HIGH
+    assert result["thinking_config"].include_thoughts is True
     # Other kwargs should be preserved
     assert result["temperature"] == 0.7
 
     # Test THINKING_LEVEL_LOW in upper case
     generation_kwargs = {"thinking_level": "LOW"}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
     assert result["thinking_config"].thinking_level == types.ThinkingLevel.LOW
+    assert result["thinking_config"].include_thoughts is True
 
-    # Test THINKING_LEVEL_UNSPECIFIED
+    # Test MINIMAL (should disable include_thoughts)
+    generation_kwargs = {"thinking_level": "MINIMAL"}
+    result = _process_thinking_config(generation_kwargs)
+    assert result["thinking_config"].thinking_level == types.ThinkingLevel.MINIMAL
+    assert result["thinking_config"].include_thoughts is False
+
+    # Test THINKING_LEVEL_UNSPECIFIED (invalid value falls back)
     generation_kwargs = {"thinking_level": "test"}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
     assert result["thinking_config"].thinking_level == types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED
+    assert result["thinking_config"].include_thoughts is True
 
     # Test when thinking_level is not present
     generation_kwargs = {"temperature": 0.5}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
     assert result == generation_kwargs  # No changes
 
     # Test invalid type (should fall back to THINKING_LEVEL_UNSPECIFIED)
     generation_kwargs = {"thinking_level": 123, "temperature": 0.5}
-    result = _process_thinking_config(generation_kwargs.copy())
+    result = _process_thinking_config(generation_kwargs)
     assert result["thinking_config"].thinking_level == types.ThinkingLevel.THINKING_LEVEL_UNSPECIFIED
+    assert result["thinking_config"].include_thoughts is True
     assert result["temperature"] == 0.5
 
 
+def test_process_thinking_config_explicit_include_thoughts():
+    """Test that explicit include_thoughts in generation_kwargs overrides the auto-derived value."""
+    # thinking_budget=0 normally means include_thoughts=False, but user explicitly sets True
+    generation_kwargs = {"thinking_budget": 0, "include_thoughts": True}
+    result = _process_thinking_config(generation_kwargs)
+    assert result["thinking_config"].thinking_budget == 0
+    assert result["thinking_config"].include_thoughts is True
+    assert "include_thoughts" not in result  # should be popped from top-level kwargs
+
+    # thinking_budget=1024 normally means include_thoughts=True, but user explicitly sets False
+    generation_kwargs = {"thinking_budget": 1024, "include_thoughts": False}
+    result = _process_thinking_config(generation_kwargs)
+    assert result["thinking_config"].thinking_budget == 1024
+    assert result["thinking_config"].include_thoughts is False
+    assert "include_thoughts" not in result
+
+    # thinking_level="high" normally means include_thoughts=True, but user explicitly sets False
+    generation_kwargs = {"thinking_level": "high", "include_thoughts": False}
+    result = _process_thinking_config(generation_kwargs)
+    assert result["thinking_config"].thinking_level == types.ThinkingLevel.HIGH
+    assert result["thinking_config"].include_thoughts is False
+    assert "include_thoughts" not in result
+
+    # thinking_level="minimal" normally means include_thoughts=False, but user explicitly sets True
+    generation_kwargs = {"thinking_level": "minimal", "include_thoughts": True}
+    result = _process_thinking_config(generation_kwargs)
+    assert result["thinking_config"].thinking_level == types.ThinkingLevel.MINIMAL
+    assert result["thinking_config"].include_thoughts is True
+    assert "include_thoughts" not in result
+
+    # include_thoughts alone (no thinking_budget or thinking_level) should just be popped and ignored
+    generation_kwargs = {"include_thoughts": True, "temperature": 0.5}
+    result = _process_thinking_config(generation_kwargs)
+    assert "include_thoughts" not in result
+    assert "thinking_config" not in result
+    assert result == {"temperature": 0.5}
+
+
 class TestStreamingChunkConversion:
     def test_convert_google_chunk_to_streaming_chunk_text_only(self, monkeypatch):
         monkeypatch.setenv("GOOGLE_API_KEY", "test-api-key")
@@ -438,7 +490,7 @@ def test_convert_google_chunk_to_streaming_chunk_real_example(self, monkeypatch)
         assert streaming_chunk.tool_calls[5].index == 5
 
     def test_aggregate_streaming_chunks_with_reasoning(self):
-        """Test the _aggregate_streaming_chunks_with_reasoning method for reasoning content aggregation."""
+        """Test the _aggregate_streaming_chunks_with_reasoning function for reasoning content aggregation."""
 
         # Create mock streaming chunks with reasoning content
         chunk1 = Mock()

Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ classifiers = [`
`25`	`25`	`"Programming Language :: Python :: Implementation :: CPython",`
`26`	`26`	`"Programming Language :: Python :: Implementation :: PyPy",`
`27`	`27`	`]`
`28`		`-dependencies = ["haystack-ai>=2.24.1", "google-genai[aiohttp]>=1.51.0", "jsonref>=1.0.0"]`
	`28`	`+dependencies = ["haystack-ai>=2.24.1", "google-genai[aiohttp]>=1.56.0", "jsonref>=1.0.0"]`
`29`	`29`
`30`	`30`	`[project.urls]`
`31`	`31`	`Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/google_genai#readme"`