fix: include intermediate subagent final response events in evaluation intermediate data

google-genai-bot · copybara-github · commit f8a6bd7fc0ca · 2026-03-31T13:55:33.000-07:00
PiperOrigin-RevId: 891846041
diff --git a/src/google/adk/evaluation/evaluation_generator.py b/src/google/adk/evaluation/evaluation_generator.py
@@ -280,6 +280,7 @@ def convert_events_to_eval_invocations(
     invocations = []
     for invocation_id, events in events_by_invocation_id.items():
       final_response = None
+      final_event = None
       user_content = Content(parts=[])
       invocation_timestamp = 0
       app_details = None
@@ -304,15 +305,17 @@ def convert_events_to_eval_invocations(
         if event.content and event.content.parts:
           if event.is_final_response():
             final_response = event.content
-          else:
-            for p in event.content.parts:
-              if p.function_call or p.function_response or p.text:
-                events_to_add.append(event)
-                break
+            final_event = event
+
+          for p in event.content.parts:
+            if p.function_call or p.function_response or p.text:
+              events_to_add.append(event)
+              break
 
       invocation_events = [
           InvocationEvent(author=e.author, content=e.content)
           for e in events_to_add
+          if e is not final_event
       ]
       invocations.append(
           Invocation(
diff --git a/tests/unittests/evaluation/test_evaluation_generator.py b/tests/unittests/evaluation/test_evaluation_generator.py
@@ -204,6 +204,28 @@ def test_multi_agent(
     assert events[2].author == "sub_agent_1"
     assert events[3].author == "sub_agent_2"
 
+  def test_convert_multi_agent_final_responses(
+      self,
+  ):
+    """Tests that only the last final response is excluded from intermediate data."""
+    events = [
+        _build_event("user", [types.Part(text="Hello")], "inv1"),
+        _build_event("agent1", [types.Part(text="First response")], "inv1"),
+        _build_event("agent2", [types.Part(text="Second response")], "inv1"),
+    ]
+
+    invocations = EvaluationGenerator.convert_events_to_eval_invocations(events)
+
+    assert len(invocations) == 1
+    invocation = invocations[0]
+    assert invocation.final_response.parts[0].text == "Second response"
+
+    intermediate_events = invocation.intermediate_data.invocation_events
+    # agent1 is included because it is not the final_event (which is agent2)
+    assert len(intermediate_events) == 1
+    assert intermediate_events[0].author == "agent1"
+    assert intermediate_events[0].content.parts[0].text == "First response"
+
 
 class TestGetAppDetailsByInvocationId:
   """Test cases for EvaluationGenerator._get_app_details_by_invocation_id method."""