deepset-ai · anakin87 · Mar 2, 2026 · Mar 1, 2026 · Mar 2, 2026 · Mar 2, 2026
@@ -555,7 +555,7 @@ def _convert_google_chunk_to_streaming_chunk(
     content = ""
     tool_calls: list[ToolCallDelta] = []
     finish_reason = None
-    reasoning_deltas: list[dict[str, str]] = []
+    reasoning_deltas: list[str] = []
     thought_signature_deltas: list[dict[str, Any]] = []  # Track thought signatures in streaming
 
     if chunk.candidates:
@@ -606,39 +606,39 @@ def _convert_google_chunk_to_streaming_chunk(
 
             # Handle thought parts for Gemini 2.5 series
             elif hasattr(part, "thought") and part.thought:
-                thought_delta = {
-                    "type": "reasoning",
-                    "content": part.text if part.text else "",
-                }
-                reasoning_deltas.append(thought_delta)
+                reasoning_deltas.append(part.text if part.text else "")
+
+    # Combine reasoning deltas into a single ReasoningContent
+    reasoning = ReasoningContent(reasoning_text="".join(reasoning_deltas)) if reasoning_deltas else None
 
     # start is only used by print_streaming_chunk. We try to make a reasonable assumption here but it should not be
     # a problem if we change it in the future.
     start = index == 0 or len(tool_calls) > 0
 
-    # Create meta with reasoning deltas and thought signatures if available
+    # Create meta with thought signatures if available
     meta: dict[str, Any] = {
         "received_at": datetime.now(timezone.utc).isoformat(),
         "model": model,
         "usage": usage,
     }
 
-    # Add reasoning deltas to meta if available
-    if reasoning_deltas:
-        meta["reasoning_deltas"] = reasoning_deltas
-
     # Add thought signature deltas to meta if available (for multi-turn context)
     if thought_signature_deltas:
         meta["thought_signature_deltas"] = thought_signature_deltas
 
+    # StreamingChunk allows only one of content/tool_calls/reasoning to be set.
+    # Determine the effective content: tool_calls and reasoning take priority.
+    effective_content = "" if tool_calls or reasoning else content
+
     return StreamingChunk(
-        content="" if tool_calls else content,  # prioritize tool calls over content when both are present
+        content=effective_content,
         tool_calls=tool_calls,
         component_info=component_info,
         index=index,
         start=start,
         finish_reason=FINISH_REASON_MAPPING.get(finish_reason or ""),
         meta=meta,
+        reasoning=reasoning,
     )
 
 
@@ -662,13 +662,9 @@ def _aggregate_streaming_chunks_with_reasoning(chunks: list[StreamingChunk]) ->
     thoughts_token_count = None
 
     for chunk in chunks:
-        # Extract reasoning deltas
-        if chunk.meta and "reasoning_deltas" in chunk.meta:
-            reasoning_deltas = chunk.meta["reasoning_deltas"]
-            if isinstance(reasoning_deltas, list):
-                for delta in reasoning_deltas:
-                    if delta.get("type") == "reasoning":
-                        reasoning_text_parts.append(delta.get("content", ""))
+        # Extract reasoning from the StreamingChunk.reasoning field
+        if chunk.reasoning and chunk.reasoning.reasoning_text:
+            reasoning_text_parts.append(chunk.reasoning.reasoning_text)
 
         # Extract thought signature deltas (for multi-turn context preservation)
         if chunk.meta and "thought_signature_deltas" in chunk.meta:

@@ -489,6 +489,38 @@ def test_convert_google_chunk_to_streaming_chunk_real_example(self, monkeypatch)
         assert streaming_chunk.tool_calls[5].id is None
         assert streaming_chunk.tool_calls[5].index == 5
 
+    def test_convert_google_chunk_to_streaming_chunk_with_thought(self, monkeypatch):
+        """Test that thought parts populate StreamingChunk.reasoning instead of meta."""
+        monkeypatch.setenv("GOOGLE_API_KEY", "test-api-key")
+        component = GoogleGenAIChatGenerator()
+        component_info = ComponentInfo.from_component(component)
+
+        # Simulate a chunk with a thought part (reasoning-only chunk)
+        mock_thought_part = Mock()
+        mock_thought_part.text = "Let me think about this..."
+        mock_thought_part.thought = True
+        mock_thought_part.function_call = None
+
+        mock_content = Mock()
+        mock_content.parts = [mock_thought_part]
+        mock_candidate = Mock()
+        mock_candidate.content = mock_content
+        mock_candidate.finish_reason = None
+
+        mock_chunk = Mock()
+        mock_chunk.candidates = [mock_candidate]
+        mock_chunk.usage_metadata = None
+
+        streaming_chunk = _convert_google_chunk_to_streaming_chunk(
+            chunk=mock_chunk, index=0, component_info=component_info, model="gemini-2.5-flash"
+        )
+
+        # Reasoning should be in the reasoning field, not in meta
+        assert streaming_chunk.reasoning is not None
+        assert streaming_chunk.reasoning.reasoning_text == "Let me think about this..."
+        assert "reasoning_deltas" not in streaming_chunk.meta
+        assert streaming_chunk.content == ""
+
     def test_aggregate_streaming_chunks_with_reasoning(self):
         """Test the _aggregate_streaming_chunks_with_reasoning function for reasoning content aggregation."""
 
@@ -515,9 +547,6 @@ def test_aggregate_streaming_chunks_with_reasoning(self):
         }
         final_chunk.reasoning = ReasoningContent(reasoning_text="I should greet the user politely")
 
-        # Add reasoning deltas to the final chunk meta (this is how the real method works)
-        final_chunk.meta["reasoning_deltas"] = [{"type": "reasoning", "content": "I should greet the user politely"}]
-
         # Test aggregation
         result = _aggregate_streaming_chunks_with_reasoning([chunk1, chunk2, final_chunk])