From 70af8fa0772b11f5148e65ba750ac356bf6ea4aa Mon Sep 17 00:00:00 2001 From: Br1an67 <932039080@qq.com> Date: Mon, 2 Mar 2026 01:03:07 +0800 Subject: [PATCH 1/4] feat: use reasoning field in StreamingChunk for Google GenAI Populate StreamingChunk.reasoning with ReasoningContent instead of storing reasoning deltas as dicts in meta. Update aggregation to read from chunk.reasoning instead of chunk.meta["reasoning_deltas"]. --- .../generators/google_genai/chat/utils.py | 34 ++++++++---------- .../tests/test_chat_generator_utils.py | 35 +++++++++++++++++-- 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py index e2996212cd..002d981643 100644 --- a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py +++ b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py @@ -555,7 +555,7 @@ def _convert_google_chunk_to_streaming_chunk( content = "" tool_calls: list[ToolCallDelta] = [] finish_reason = None - reasoning_deltas: list[dict[str, str]] = [] + reasoning_deltas: list[str] = [] thought_signature_deltas: list[dict[str, Any]] = [] # Track thought signatures in streaming if chunk.candidates: @@ -606,39 +606,39 @@ def _convert_google_chunk_to_streaming_chunk( # Handle thought parts for Gemini 2.5 series elif hasattr(part, "thought") and part.thought: - thought_delta = { - "type": "reasoning", - "content": part.text if part.text else "", - } - reasoning_deltas.append(thought_delta) + reasoning_deltas.append(part.text if part.text else "") + + # Combine reasoning deltas into a single ReasoningContent + reasoning = ReasoningContent(reasoning_text="".join(reasoning_deltas)) if reasoning_deltas else None # start is only used by print_streaming_chunk. We try to make a reasonable assumption here but it should not be # a problem if we change it in the future. start = index == 0 or len(tool_calls) > 0 - # Create meta with reasoning deltas and thought signatures if available + # Create meta with thought signatures if available meta: dict[str, Any] = { "received_at": datetime.now(timezone.utc).isoformat(), "model": model, "usage": usage, } - # Add reasoning deltas to meta if available - if reasoning_deltas: - meta["reasoning_deltas"] = reasoning_deltas - # Add thought signature deltas to meta if available (for multi-turn context) if thought_signature_deltas: meta["thought_signature_deltas"] = thought_signature_deltas + # StreamingChunk allows only one of content/tool_calls/reasoning to be set. + # Determine the effective content: tool_calls and reasoning take priority. + effective_content = "" if tool_calls or reasoning else content + return StreamingChunk( - content="" if tool_calls else content, # prioritize tool calls over content when both are present + content=effective_content, tool_calls=tool_calls, component_info=component_info, index=index, start=start, finish_reason=FINISH_REASON_MAPPING.get(finish_reason or ""), meta=meta, + reasoning=reasoning, ) @@ -662,13 +662,9 @@ def _aggregate_streaming_chunks_with_reasoning(chunks: list[StreamingChunk]) -> thoughts_token_count = None for chunk in chunks: - # Extract reasoning deltas - if chunk.meta and "reasoning_deltas" in chunk.meta: - reasoning_deltas = chunk.meta["reasoning_deltas"] - if isinstance(reasoning_deltas, list): - for delta in reasoning_deltas: - if delta.get("type") == "reasoning": - reasoning_text_parts.append(delta.get("content", "")) + # Extract reasoning from the StreamingChunk.reasoning field + if chunk.reasoning and chunk.reasoning.reasoning_text: + reasoning_text_parts.append(chunk.reasoning.reasoning_text) # Extract thought signature deltas (for multi-turn context preservation) if chunk.meta and "thought_signature_deltas" in chunk.meta: diff --git a/integrations/google_genai/tests/test_chat_generator_utils.py b/integrations/google_genai/tests/test_chat_generator_utils.py index 3ac529b0d1..2852e88cda 100644 --- a/integrations/google_genai/tests/test_chat_generator_utils.py +++ b/integrations/google_genai/tests/test_chat_generator_utils.py @@ -489,6 +489,38 @@ def test_convert_google_chunk_to_streaming_chunk_real_example(self, monkeypatch) assert streaming_chunk.tool_calls[5].id is None assert streaming_chunk.tool_calls[5].index == 5 + def test_convert_google_chunk_to_streaming_chunk_with_thought(self, monkeypatch): + """Test that thought parts populate StreamingChunk.reasoning instead of meta.""" + monkeypatch.setenv("GOOGLE_API_KEY", "test-api-key") + component = GoogleGenAIChatGenerator() + component_info = ComponentInfo.from_component(component) + + # Simulate a chunk with a thought part (reasoning-only chunk) + mock_thought_part = Mock() + mock_thought_part.text = "Let me think about this..." + mock_thought_part.thought = True + mock_thought_part.function_call = None + + mock_content = Mock() + mock_content.parts = [mock_thought_part] + mock_candidate = Mock() + mock_candidate.content = mock_content + mock_candidate.finish_reason = None + + mock_chunk = Mock() + mock_chunk.candidates = [mock_candidate] + mock_chunk.usage_metadata = None + + streaming_chunk = _convert_google_chunk_to_streaming_chunk( + chunk=mock_chunk, index=0, component_info=component_info, model="gemini-2.5-flash" + ) + + # Reasoning should be in the reasoning field, not in meta + assert streaming_chunk.reasoning is not None + assert streaming_chunk.reasoning.reasoning_text == "Let me think about this..." + assert "reasoning_deltas" not in streaming_chunk.meta + assert streaming_chunk.content == "" + def test_aggregate_streaming_chunks_with_reasoning(self): """Test the _aggregate_streaming_chunks_with_reasoning function for reasoning content aggregation.""" @@ -515,9 +547,6 @@ def test_aggregate_streaming_chunks_with_reasoning(self): } final_chunk.reasoning = ReasoningContent(reasoning_text="I should greet the user politely") - # Add reasoning deltas to the final chunk meta (this is how the real method works) - final_chunk.meta["reasoning_deltas"] = [{"type": "reasoning", "content": "I should greet the user politely"}] - # Test aggregation result = _aggregate_streaming_chunks_with_reasoning([chunk1, chunk2, final_chunk]) From 648bfd3881c1869ed4c52464861e673d4d15dbf2 Mon Sep 17 00:00:00 2001 From: Br1an67 <932039080@qq.com> Date: Tue, 3 Mar 2026 00:18:42 +0800 Subject: [PATCH 2/4] refactor: move thought_signature_deltas from meta to ReasoningContent.extra Store thought_signature_deltas in ReasoningContent.extra instead of StreamingChunk.meta when reasoning content is present, grouping all reasoning-related info into ReasoningContent. For text/tool-call chunks (where StreamingChunk mutual exclusivity prevents setting both content and reasoning), signatures remain in meta. The aggregation logic reads from both sources. Consistent with the Anthropic approach in PR #2849. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../generators/google_genai/chat/utils.py | 35 +++++++++++++------ 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py index 002d981643..4659aedbab 100644 --- a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py +++ b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py @@ -608,22 +608,31 @@ def _convert_google_chunk_to_streaming_chunk( elif hasattr(part, "thought") and part.thought: reasoning_deltas.append(part.text if part.text else "") - # Combine reasoning deltas into a single ReasoningContent - reasoning = ReasoningContent(reasoning_text="".join(reasoning_deltas)) if reasoning_deltas else None + # Combine reasoning deltas and thought signatures into a single ReasoningContent. + # Thought signature deltas are stored in ReasoningContent.extra when reasoning content + # is present (consistent with the Anthropic approach in PR #2849). + # When there's no reasoning content, signatures go in meta to avoid the StreamingChunk + # mutual exclusivity constraint (content and reasoning cannot both be set). + if reasoning_deltas: + reasoning_extra: dict[str, Any] = {} + if thought_signature_deltas: + reasoning_extra["thought_signature_deltas"] = thought_signature_deltas + reasoning = ReasoningContent(reasoning_text="".join(reasoning_deltas), extra=reasoning_extra) + else: + reasoning = None # start is only used by print_streaming_chunk. We try to make a reasonable assumption here but it should not be # a problem if we change it in the future. start = index == 0 or len(tool_calls) > 0 - # Create meta with thought signatures if available meta: dict[str, Any] = { "received_at": datetime.now(timezone.utc).isoformat(), "model": model, "usage": usage, } - # Add thought signature deltas to meta if available (for multi-turn context) - if thought_signature_deltas: + # Thought signatures go in meta when there's no reasoning content (e.g. text-only or tool-call chunks) + if thought_signature_deltas and not reasoning_deltas: meta["thought_signature_deltas"] = thought_signature_deltas # StreamingChunk allows only one of content/tool_calls/reasoning to be set. @@ -666,13 +675,17 @@ def _aggregate_streaming_chunks_with_reasoning(chunks: list[StreamingChunk]) -> if chunk.reasoning and chunk.reasoning.reasoning_text: reasoning_text_parts.append(chunk.reasoning.reasoning_text) - # Extract thought signature deltas (for multi-turn context preservation) - if chunk.meta and "thought_signature_deltas" in chunk.meta: + # Extract thought signature deltas from reasoning.extra or meta + # (signatures are in reasoning.extra for reasoning chunks, in meta for text/tool-call chunks) + signature_deltas = None + if chunk.reasoning and chunk.reasoning.extra.get("thought_signature_deltas"): + signature_deltas = chunk.reasoning.extra["thought_signature_deltas"] + elif chunk.meta and "thought_signature_deltas" in chunk.meta: signature_deltas = chunk.meta["thought_signature_deltas"] - if isinstance(signature_deltas, list): - # Aggregate thought signatures - they should come from the final chunks - # We'll keep the last set of signatures as they represent the complete state - thought_signatures = signature_deltas + if signature_deltas and isinstance(signature_deltas, list): + # Aggregate thought signatures - they should come from the final chunks + # We'll keep the last set of signatures as they represent the complete state + thought_signatures = signature_deltas # Extract thinking token usage (from the last chunk that has it) if chunk.meta and "usage" in chunk.meta: From 510a3363589daf049c4ddf52f62bd683931c4ed0 Mon Sep 17 00:00:00 2001 From: Br1an67 <932039080@qq.com> Date: Tue, 3 Mar 2026 00:18:54 +0800 Subject: [PATCH 3/4] test: use real Google API objects in thought chunk test Replace Mock objects with actual types.Part, types.Content, types.Candidate and types.GenerateContentResponse in the test_convert_google_chunk_to_streaming_chunk_with_thought test, following the pattern established in the existing real_example test. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../tests/test_chat_generator_utils.py | 43 ++++++++++++------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/integrations/google_genai/tests/test_chat_generator_utils.py b/integrations/google_genai/tests/test_chat_generator_utils.py index 2852e88cda..4513bb54b5 100644 --- a/integrations/google_genai/tests/test_chat_generator_utils.py +++ b/integrations/google_genai/tests/test_chat_generator_utils.py @@ -495,24 +495,35 @@ def test_convert_google_chunk_to_streaming_chunk_with_thought(self, monkeypatch) component = GoogleGenAIChatGenerator() component_info = ComponentInfo.from_component(component) - # Simulate a chunk with a thought part (reasoning-only chunk) - mock_thought_part = Mock() - mock_thought_part.text = "Let me think about this..." - mock_thought_part.thought = True - mock_thought_part.function_call = None - - mock_content = Mock() - mock_content.parts = [mock_thought_part] - mock_candidate = Mock() - mock_candidate.content = mock_content - mock_candidate.finish_reason = None - - mock_chunk = Mock() - mock_chunk.candidates = [mock_candidate] - mock_chunk.usage_metadata = None + # Build a chunk with a thought part using actual Google API objects + thought_part = types.Part(text="Let me think about this...", thought=True, function_call=None) + content = types.Content(role="model", parts=[thought_part]) + candidate = types.Candidate( + content=content, + finish_reason=None, + index=None, + safety_ratings=None, + citation_metadata=None, + grounding_metadata=None, + finish_message=None, + token_count=None, + logprobs_result=None, + avg_logprobs=None, + url_context_metadata=None, + ) + chunk = types.GenerateContentResponse( + candidates=[candidate], + usage_metadata=None, + model_version="gemini-2.5-flash", + response_id=None, + create_time=None, + prompt_feedback=None, + automatic_function_calling_history=None, + parsed=None, + ) streaming_chunk = _convert_google_chunk_to_streaming_chunk( - chunk=mock_chunk, index=0, component_info=component_info, model="gemini-2.5-flash" + chunk=chunk, index=0, component_info=component_info, model="gemini-2.5-flash" ) # Reasoning should be in the reasoning field, not in meta From bd3d479c2ac5352c592a6eb5025c73001a270ef3 Mon Sep 17 00:00:00 2001 From: Br1an67 <932039080@qq.com> Date: Tue, 3 Mar 2026 00:44:54 +0800 Subject: [PATCH 4/4] refactor: always store thought_signature_deltas in meta Thought signatures can appear in both reasoning and non-reasoning response parts, so storing them consistently in meta is simpler than splitting between ReasoningContent.extra and meta. --- .../generators/google_genai/chat/utils.py | 35 ++++++------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py index 4659aedbab..96a4b49c99 100644 --- a/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py +++ b/integrations/google_genai/src/haystack_integrations/components/generators/google_genai/chat/utils.py @@ -608,18 +608,8 @@ def _convert_google_chunk_to_streaming_chunk( elif hasattr(part, "thought") and part.thought: reasoning_deltas.append(part.text if part.text else "") - # Combine reasoning deltas and thought signatures into a single ReasoningContent. - # Thought signature deltas are stored in ReasoningContent.extra when reasoning content - # is present (consistent with the Anthropic approach in PR #2849). - # When there's no reasoning content, signatures go in meta to avoid the StreamingChunk - # mutual exclusivity constraint (content and reasoning cannot both be set). - if reasoning_deltas: - reasoning_extra: dict[str, Any] = {} - if thought_signature_deltas: - reasoning_extra["thought_signature_deltas"] = thought_signature_deltas - reasoning = ReasoningContent(reasoning_text="".join(reasoning_deltas), extra=reasoning_extra) - else: - reasoning = None + # Combine reasoning deltas into a single ReasoningContent + reasoning = ReasoningContent(reasoning_text="".join(reasoning_deltas)) if reasoning_deltas else None # start is only used by print_streaming_chunk. We try to make a reasonable assumption here but it should not be # a problem if we change it in the future. @@ -631,8 +621,9 @@ def _convert_google_chunk_to_streaming_chunk( "usage": usage, } - # Thought signatures go in meta when there's no reasoning content (e.g. text-only or tool-call chunks) - if thought_signature_deltas and not reasoning_deltas: + # Thought signatures can appear in both reasoning and non-reasoning response parts, + # so we always store them in meta for consistency. + if thought_signature_deltas: meta["thought_signature_deltas"] = thought_signature_deltas # StreamingChunk allows only one of content/tool_calls/reasoning to be set. @@ -675,17 +666,13 @@ def _aggregate_streaming_chunks_with_reasoning(chunks: list[StreamingChunk]) -> if chunk.reasoning and chunk.reasoning.reasoning_text: reasoning_text_parts.append(chunk.reasoning.reasoning_text) - # Extract thought signature deltas from reasoning.extra or meta - # (signatures are in reasoning.extra for reasoning chunks, in meta for text/tool-call chunks) - signature_deltas = None - if chunk.reasoning and chunk.reasoning.extra.get("thought_signature_deltas"): - signature_deltas = chunk.reasoning.extra["thought_signature_deltas"] - elif chunk.meta and "thought_signature_deltas" in chunk.meta: + # Extract thought signature deltas (for multi-turn context preservation) + if chunk.meta and "thought_signature_deltas" in chunk.meta: signature_deltas = chunk.meta["thought_signature_deltas"] - if signature_deltas and isinstance(signature_deltas, list): - # Aggregate thought signatures - they should come from the final chunks - # We'll keep the last set of signatures as they represent the complete state - thought_signatures = signature_deltas + if isinstance(signature_deltas, list): + # Aggregate thought signatures - they should come from the final chunks + # We'll keep the last set of signatures as they represent the complete state + thought_signatures = signature_deltas # Extract thinking token usage (from the last chunk that has it) if chunk.meta and "usage" in chunk.meta: