diff --git a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py index e2996212cd..96a4b49c99 100644 --- a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py +++ b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py @@ -555,7 +555,7 @@ def _convert_google_chunk_to_streaming_chunk( content = "" tool_calls: list[ToolCallDelta] = [] finish_reason = None - reasoning_deltas: list[dict[str, str]] = [] + reasoning_deltas: list[str] = [] thought_signature_deltas: list[dict[str, Any]] = [] # Track thought signatures in streaming if chunk.candidates: @@ -606,39 +606,39 @@ def _convert_google_chunk_to_streaming_chunk( # Handle thought parts for Gemini 2.5 series elif hasattr(part, "thought") and part.thought: - thought_delta = { - "type": "reasoning", - "content": part.text if part.text else "", - } - reasoning_deltas.append(thought_delta) + reasoning_deltas.append(part.text if part.text else "") + + # Combine reasoning deltas into a single ReasoningContent + reasoning = ReasoningContent(reasoning_text="".join(reasoning_deltas)) if reasoning_deltas else None # start is only used by print_streaming_chunk. We try to make a reasonable assumption here but it should not be # a problem if we change it in the future. start = index == 0 or len(tool_calls) > 0 - # Create meta with reasoning deltas and thought signatures if available meta: dict[str, Any] = { "received_at": datetime.now(timezone.utc).isoformat(), "model": model, "usage": usage, } - # Add reasoning deltas to meta if available - if reasoning_deltas: - meta["reasoning_deltas"] = reasoning_deltas - - # Add thought signature deltas to meta if available (for multi-turn context) + # Thought signatures can appear in both reasoning and non-reasoning response parts, + # so we always store them in meta for consistency. if thought_signature_deltas: meta["thought_signature_deltas"] = thought_signature_deltas + # StreamingChunk allows only one of content/tool_calls/reasoning to be set. + # Determine the effective content: tool_calls and reasoning take priority. + effective_content = "" if tool_calls or reasoning else content + return StreamingChunk( - content="" if tool_calls else content, # prioritize tool calls over content when both are present + content=effective_content, tool_calls=tool_calls, component_info=component_info, index=index, start=start, finish_reason=FINISH_REASON_MAPPING.get(finish_reason or ""), meta=meta, + reasoning=reasoning, ) @@ -662,13 +662,9 @@ def _aggregate_streaming_chunks_with_reasoning(chunks: list[StreamingChunk]) -> thoughts_token_count = None for chunk in chunks: - # Extract reasoning deltas - if chunk.meta and "reasoning_deltas" in chunk.meta: - reasoning_deltas = chunk.meta["reasoning_deltas"] - if isinstance(reasoning_deltas, list): - for delta in reasoning_deltas: - if delta.get("type") == "reasoning": - reasoning_text_parts.append(delta.get("content", "")) + # Extract reasoning from the StreamingChunk.reasoning field + if chunk.reasoning and chunk.reasoning.reasoning_text: + reasoning_text_parts.append(chunk.reasoning.reasoning_text) # Extract thought signature deltas (for multi-turn context preservation) if chunk.meta and "thought_signature_deltas" in chunk.meta: diff --git a/integrations/google_genai/tests/test_chat_generator_utils.py b/integrations/google_genai/tests/test_chat_generator_utils.py index 3ac529b0d1..4513bb54b5 100644 --- a/integrations/google_genai/tests/test_chat_generator_utils.py +++ b/integrations/google_genai/tests/test_chat_generator_utils.py @@ -489,6 +489,49 @@ def test_convert_google_chunk_to_streaming_chunk_real_example(self, monkeypatch) assert streaming_chunk.tool_calls[5].id is None assert streaming_chunk.tool_calls[5].index == 5 + def test_convert_google_chunk_to_streaming_chunk_with_thought(self, monkeypatch): + """Test that thought parts populate StreamingChunk.reasoning instead of meta.""" + monkeypatch.setenv("GOOGLE_API_KEY", "test-api-key") + component = GoogleGenAIChatGenerator() + component_info = ComponentInfo.from_component(component) + + # Build a chunk with a thought part using actual Google API objects + thought_part = types.Part(text="Let me think about this...", thought=True, function_call=None) + content = types.Content(role="model", parts=[thought_part]) + candidate = types.Candidate( + content=content, + finish_reason=None, + index=None, + safety_ratings=None, + citation_metadata=None, + grounding_metadata=None, + finish_message=None, + token_count=None, + logprobs_result=None, + avg_logprobs=None, + url_context_metadata=None, + ) + chunk = types.GenerateContentResponse( + candidates=[candidate], + usage_metadata=None, + model_version="gemini-2.5-flash", + response_id=None, + create_time=None, + prompt_feedback=None, + automatic_function_calling_history=None, + parsed=None, + ) + + streaming_chunk = _convert_google_chunk_to_streaming_chunk( + chunk=chunk, index=0, component_info=component_info, model="gemini-2.5-flash" + ) + + # Reasoning should be in the reasoning field, not in meta + assert streaming_chunk.reasoning is not None + assert streaming_chunk.reasoning.reasoning_text == "Let me think about this..." + assert "reasoning_deltas" not in streaming_chunk.meta + assert streaming_chunk.content == "" + def test_aggregate_streaming_chunks_with_reasoning(self): """Test the _aggregate_streaming_chunks_with_reasoning function for reasoning content aggregation.""" @@ -515,9 +558,6 @@ def test_aggregate_streaming_chunks_with_reasoning(self): } final_chunk.reasoning = ReasoningContent(reasoning_text="I should greet the user politely") - # Add reasoning deltas to the final chunk meta (this is how the real method works) - final_chunk.meta["reasoning_deltas"] = [{"type": "reasoning", "content": "I should greet the user politely"}] - # Test aggregation result = _aggregate_streaming_chunks_with_reasoning([chunk1, chunk2, final_chunk])