fix(streaming): Ensure final partial=False frame is always yielded

wyf7107 · copybara-github · commit 8befdb88be8f · 2026-06-08T15:59:13.000-07:00
The StreamingResponseAggregator.close() method previously returned None if it didn't accumulate text or parts, such as for safety blocks or pure function calls. This caused clients (e.g., Vertex AI Reasoning Engine) to hang indefinitely waiting for a partial=False termination frame, and caused loops to break prematurely. This fix ensures close() always returns a final LlmResponse(partial=False) as long as a response exists, carrying over any error_code, error_message, and usage_metadata, regardless of whether PROGRESSIVE_SSE_STREAMING is enabled. Added parameterized unit tests to verify behavior across both streaming modes. Fixes #3754 Co-authored-by: Yifan Wang <wanyif@google.com> PiperOrigin-RevId: 927411984
diff --git a/src/google/adk/utils/streaming_utils.py b/src/google/adk/utils/streaming_utils.py
@@ -349,61 +349,61 @@ def close(self) -> Optional[LlmResponse]:
     Returns:
       The aggregated LlmResponse.
     """
+    if not self._response:
+      return None
+
+    candidate = (
+        self._response.candidates[0] if self._response.candidates else None
+    )
+
+    finish_reason = self._finish_reason
+    if not finish_reason and candidate:
+      finish_reason = candidate.finish_reason
+
+    error_code = None
+    error_message = None
+    if finish_reason and finish_reason != types.FinishReason.STOP:
+      error_code = finish_reason
+      error_message = candidate.finish_message if candidate else None
+    elif not candidate and self._response.prompt_feedback:
+      error_code = self._response.prompt_feedback.block_reason
+      error_message = self._response.prompt_feedback.block_reason_message
+
     # ========== Progressive SSE Streaming (new feature) ==========
     if is_feature_enabled(FeatureName.PROGRESSIVE_SSE_STREAMING):
-      # Always generate final aggregated response in progressive mode
-      if self._response and self._response.candidates:
-        # Flush any remaining buffers to complete the sequence
-        self._flush_text_buffer_to_sequence()
-        self._flush_function_call_to_sequence()
-
-        # Use the parts sequence which preserves original ordering
-        final_parts = self._parts_sequence
-
-        if final_parts:
-          candidate = self._response.candidates[0]
-          finish_reason = self._finish_reason or candidate.finish_reason
-
-          return LlmResponse(
-              content=types.ModelContent(parts=final_parts),
-              grounding_metadata=self._grounding_metadata,
-              citation_metadata=self._citation_metadata,
-              error_code=None
-              if finish_reason == types.FinishReason.STOP
-              else finish_reason,
-              error_message=None
-              if finish_reason == types.FinishReason.STOP
-              else candidate.finish_message,
-              usage_metadata=self._usage_metadata,
-              finish_reason=finish_reason,
-              partial=False,
-          )
-
-        return None
+      self._flush_text_buffer_to_sequence()
+      self._flush_function_call_to_sequence()
+
+      final_parts = self._parts_sequence
+      content = types.ModelContent(parts=final_parts) if final_parts else None
 
-    # ========== Non-Progressive SSE Streaming (old behavior) ==========
-    if (
-        (self._text or self._thought_text)
-        and self._response
-        and self._response.candidates
-    ):
-      parts = []
-      if self._thought_text:
-        parts.append(types.Part(text=self._thought_text, thought=True))
-      if self._text:
-        parts.append(types.Part.from_text(text=self._text))
-      candidate = self._response.candidates[0]
       return LlmResponse(
-          content=types.ModelContent(parts=parts),
+          content=content,
           grounding_metadata=self._grounding_metadata,
           citation_metadata=self._citation_metadata,
-          error_code=None
-          if candidate.finish_reason == types.FinishReason.STOP
-          else candidate.finish_reason,
-          error_message=None
-          if candidate.finish_reason == types.FinishReason.STOP
-          else candidate.finish_message,
+          error_code=error_code,
+          error_message=error_message,
           usage_metadata=self._usage_metadata,
+          finish_reason=finish_reason,
+          partial=False,
       )
 
-    return None
+    # ========== Non-Progressive SSE Streaming (old behavior) ==========
+    parts = []
+    if self._thought_text:
+      parts.append(types.Part(text=self._thought_text, thought=True))
+    if self._text:
+      parts.append(types.Part.from_text(text=self._text))
+    content = types.ModelContent(parts=parts) if parts else None
+
+    return LlmResponse(
+        content=content,
+        grounding_metadata=self._grounding_metadata,
+        citation_metadata=self._citation_metadata,
+        error_code=error_code,
+        error_message=error_message,
+        usage_metadata=self._usage_metadata,
+        finish_reason=finish_reason,
+        partial=False,
+    )
+
diff --git a/tests/unittests/utils/test_streaming_utils.py b/tests/unittests/utils/test_streaming_utils.py
@@ -184,25 +184,106 @@ async def test_close_with_error(self):
     assert closed_response.error_message == "Recitation error"
 
   @pytest.mark.asyncio
-  async def test_process_response_with_none_content(self):
-    """Test that StreamingResponseAggregator handles content=None."""
-    aggregator = streaming_utils.StreamingResponseAggregator()
-    response = types.GenerateContentResponse(
-        candidates=[
-            types.Candidate(
-                content=types.Content(parts=[]),
-                finish_reason=types.FinishReason.STOP,
-            )
-        ]
-    )
-    results = []
-    async for r in aggregator.process_response(response):
-      results.append(r)
-    assert len(results) == 1
-    assert results[0].content is not None
+  @pytest.mark.parametrize("use_progressive_sse", [True, False])
+  async def test_empty_content_produces_empty_final_frame(
+      self, use_progressive_sse
+  ):
+    """A candidate with an empty parts list produces an empty final frame."""
+    with temporary_feature_override(
+        FeatureName.PROGRESSIVE_SSE_STREAMING, use_progressive_sse
+    ):
+      aggregator = streaming_utils.StreamingResponseAggregator()
+      response = types.GenerateContentResponse(
+          candidates=[
+              types.Candidate(
+                  content=types.Content(parts=[]),
+                  finish_reason=types.FinishReason.STOP,
+              )
+          ]
+      )
+      results = []
+      async for r in aggregator.process_response(response):
+        results.append(r)
+      closed_response = aggregator.close()
+
+      assert len(results) == 1
+      assert results[0].content is not None
+      assert closed_response is not None
+      assert closed_response.partial is False
+      assert closed_response.content is None
+      assert closed_response.finish_reason == types.FinishReason.STOP
+
+  @pytest.mark.asyncio
+  @pytest.mark.parametrize("use_progressive_sse", [True, False])
+  async def test_prompt_feedback_block_returns_error_frame(
+      self, use_progressive_sse
+  ):
+    """A prompt-level safety block produces a final frame with the error code."""
+    with temporary_feature_override(
+        FeatureName.PROGRESSIVE_SSE_STREAMING, use_progressive_sse
+    ):
+      aggregator = streaming_utils.StreamingResponseAggregator()
+      response = types.GenerateContentResponse(
+          prompt_feedback=types.GenerateContentResponsePromptFeedback(
+              block_reason=types.BlockedReason.SAFETY,
+              block_reason_message="Blocked by safety",
+          )
+      )
+      results = []
+      async for r in aggregator.process_response(response):
+        results.append(r)
+      closed_response = aggregator.close()
+
+      assert len(results) == 1
+      assert closed_response is not None
+      assert closed_response.partial is False
+      assert closed_response.error_code == types.BlockedReason.SAFETY
+      assert closed_response.error_message == "Blocked by safety"
+      assert closed_response.content is None
+
+  @pytest.mark.asyncio
+  @pytest.mark.parametrize("use_progressive_sse", [True, False])
+  async def test_pure_function_call_behavior_differs_by_mode(
+      self, use_progressive_sse
+  ):
+    """A pure function call yields the part in progressive mode and an empty frame otherwise."""
+    with temporary_feature_override(
+        FeatureName.PROGRESSIVE_SSE_STREAMING, use_progressive_sse
+    ):
+      aggregator = streaming_utils.StreamingResponseAggregator()
+      response = types.GenerateContentResponse(
+          candidates=[
+              types.Candidate(
+                  content=types.Content(
+                      parts=[
+                          types.Part(
+                              function_call=types.FunctionCall(
+                                  name="my_tool",
+                                  args={"x": 1},
+                              )
+                          )
+                      ]
+                  ),
+                  finish_reason=types.FinishReason.STOP,
+              )
+          ]
+      )
+
+      results = []
+      async for r in aggregator.process_response(response):
+        results.append(r)
+      closed_response = aggregator.close()
+
+      assert closed_response is not None
+      assert closed_response.partial is False
+
+      if use_progressive_sse:
+        assert closed_response.content is not None
+        assert len(closed_response.content.parts) == 1
+        assert closed_response.content.parts[0].function_call.name == "my_tool"
+      else:
+        assert closed_response.content is None
 
-    closed_response = aggregator.close()
-    assert closed_response is None
 
   @pytest.mark.asyncio
   @pytest.mark.parametrize(