feat (live) : Refactor live SequentialAgent worklow and live event handling for reliability and cleaner session history

ac-machache · ac-machache · commit e25e725fd40c · 2025-08-01T16:20:58.000+02:00
diff --git a/src/google/adk/agents/sequential_agent.py b/src/google/adk/agents/sequential_agent.py
@@ -14,6 +14,7 @@
 
 """Sequential agent implementation."""
 
+
 from __future__ import annotations
 
 from typing import AsyncGenerator
@@ -29,6 +30,8 @@
 from .sequential_agent_config import SequentialAgentConfig
 
 
+
+
 class SequentialAgent(BaseAgent):
   """A shell agent that runs its sub-agents in sequence."""
 
@@ -46,37 +49,21 @@ async def _run_live_impl(
   ) -> AsyncGenerator[Event, None]:
     """Implementation for live SequentialAgent.
 
-    Compared to the non-live case, live agents process a continuous stream of audio
-    or video, so there is no way to tell if it's finished and should pass
-    to the next agent or not. So we introduce a task_completed() function so the
-    model can call this function to signal that it's finished the task and we
-    can move on to the next agent.
+    In a live run, this agent executes its sub-agents one by one. It relies
+    on the `generation_complete` event from the underlying model to determine
+    when a sub-agent has finished its turn. Once a sub-agent's `run_live`
+    stream concludes (triggered by the `generation_complete` event), the
+    `SequentialAgent` will proceed to execute the next sub-agent in the
+    sequence.
 
     Args:
       ctx: The invocation context of the agent.
     """
-    # There is no way to know if it's using live during init phase so we have to init it here
-    for sub_agent in self.sub_agents:
-      # add tool
-      def task_completed():
-        """
-        Signals that the model has successfully completed the user's question
-        or task.
-        """
-        return 'Task completion signaled.'
-
-      if isinstance(sub_agent, LlmAgent):
-        # Use function name to dedupe.
-        if task_completed.__name__ not in sub_agent.tools:
-          sub_agent.tools.append(task_completed)
-          sub_agent.instruction += f"""If you finished the user's request
-          according to its description, call the {task_completed.__name__} function
-          to exit so the next agents can take over. When calling this function,
-          do not generate any text other than the function call."""
-
     for sub_agent in self.sub_agents:
       async for event in sub_agent.run_live(ctx):
         yield event
+        if event.generation_complete:
+          break
 
   @classmethod
   @override
diff --git a/src/google/adk/flows/llm_flows/base_llm_flow.py b/src/google/adk/flows/llm_flows/base_llm_flow.py
@@ -109,26 +109,45 @@ async def run_live(
             await llm_connection.send_history(llm_request.contents)
             trace_send_data(invocation_context, event_id, llm_request.contents)
 
-      send_task = asyncio.create_task(
-          self._send_to_model(llm_connection, invocation_context)
-      )
+      event_queue = asyncio.Queue()
 
-      try:
-        async for event in self._receive_from_model(
-            llm_connection,
-            event_id,
-            invocation_context,
-            llm_request,
+      async def send_handler():
+        """Handles sending user input and generating user text events."""
+        async for event in self._send_to_model(
+            llm_connection, invocation_context
         ):
-          # Empty event means the queue is closed.
-          if not event:
+          await event_queue.put(event)
+
+      async def receive_handler():
+        """Handles receiving model output and generating model events."""
+        try:
+          async for event in self._receive_from_model(
+              llm_connection, event_id, invocation_context, llm_request
+          ):
+            await event_queue.put(event)
+        finally:
+          # Signal that the receiving process is complete.
+          await event_queue.put(None)
+
+      send_task = asyncio.create_task(send_handler())
+      receive_task = asyncio.create_task(receive_handler())
+      tasks = {send_task, receive_task}
+
+      try:
+        while True:
+          # Consume events from the unified queue.
+          event = await event_queue.get()
+          if event is None:  # End of stream signal
             break
+
           logger.debug('Receive new event: %s', event)
           yield event
-          # send back the function response
+
+          # Forward function responses back to the model.
           if event.get_function_responses():
             logger.debug('Sending back last function response event: %s', event)
             invocation_context.live_request_queue.send_content(event.content)
+
           if (
               event.content
               and event.content.parts
@@ -140,33 +159,19 @@ async def run_live(
             # cancel the tasks that belongs to the closed connection.
             send_task.cancel()
             await llm_connection.close()
-          if (
-              event.content
-              and event.content.parts
-              and event.content.parts[0].function_response
-              and event.content.parts[0].function_response.name
-              == 'task_completed'
-          ):
-            # this is used for sequential agent to signal the end of the agent.
-            await asyncio.sleep(1)
-            # cancel the tasks that belongs to the closed connection.
-            send_task.cancel()
-            return
       finally:
-        # Clean up
-        if not send_task.done():
-          send_task.cancel()
-        try:
-          await send_task
-        except asyncio.CancelledError:
-          pass
+        # Clean up all running tasks.
+        for task in tasks:
+          if not task.done():
+            task.cancel()
+        await asyncio.gather(*tasks, return_exceptions=True)
 
   async def _send_to_model(
       self,
       llm_connection: BaseLlmConnection,
       invocation_context: InvocationContext,
-  ):
-    """Sends data to model."""
+  ) -> AsyncGenerator[Event, None]:
+    """Sends data to model and yields user events for text messages."""
     while True:
       live_request_queue = invocation_context.live_request_queue
       try:
@@ -212,7 +217,23 @@ async def _send_to_model(
           )
         await llm_connection.send_realtime(live_request.blob)
 
-      if live_request.content:
+      # If the request is a user-sent text message, create and yield an event
+      # so it can be saved to the session history.
+      if (
+          live_request.content
+          and live_request.content.parts
+          and live_request.content.parts[0].text
+      ):
+        user_event = Event(
+            invocation_id=invocation_context.invocation_id,
+            author='user',
+            content=live_request.content,
+        )
+        yield user_event
+        await llm_connection.send_content(live_request.content)
+      elif live_request.content:
+        # Handle other content types, like function responses, without creating
+        # a user event.
         await llm_connection.send_content(live_request.content)
 
   async def _receive_from_model(
diff --git a/src/google/adk/models/gemini_llm_connection.py b/src/google/adk/models/gemini_llm_connection.py
@@ -140,10 +140,37 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
     Yields:
       LlmResponse: The model response.
     """
-
     text = ''
+    user_text = ''
     async for message in self._gemini_session.receive():
       logger.debug('Got LLM Live message: %s', message)
+
+      model_turn_has_content = False
+      if message.server_content and message.server_content.model_turn:
+        content = message.server_content.model_turn
+        if content and content.parts:
+          model_turn_has_content = any(
+              p.text or p.inline_data for p in content.parts
+          )
+
+      model_is_replying = (
+          message.tool_call
+          or (
+              message.server_content
+              and message.server_content.output_transcription
+          )
+          or model_turn_has_content
+      )
+
+      if user_text and model_is_replying:
+        yield LlmResponse(
+            content=types.Content(
+                role='user',
+                parts=[types.Part.from_text(text=user_text)],
+            )
+        )
+        user_text = ''
+
       if message.server_content:
         content = message.server_content.model_turn
         if content and content.parts:
@@ -153,6 +180,8 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
           if content.parts[0].text:
             text += content.parts[0].text
             llm_response.partial = True
+          if content.parts[0].inline_data:
+            llm_response.partial = True
           # don't yield the merged text event when receiving audio data
           elif text and not content.parts[0].inline_data:
             yield self.__build_full_text_response(text)
@@ -162,14 +191,15 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
             message.server_content.input_transcription
             and message.server_content.input_transcription.text
         ):
-          user_text = message.server_content.input_transcription.text
+          user_text_fragment = message.server_content.input_transcription.text
+          user_text += user_text_fragment
           parts = [
               types.Part.from_text(
-                  text=user_text,
+                  text=user_text_fragment,
               )
           ]
           llm_response = LlmResponse(
-              content=types.Content(role='user', parts=parts)
+              content=types.Content(role='user', parts=parts), partial=True
           )
           yield llm_response
         if (
@@ -202,6 +232,21 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
               turn_complete=True, interrupted=message.server_content.interrupted
           )
           break
+        if message.server_content.generation_complete:
+          if text:
+            yield self.__build_full_text_response(text)
+            text = ''
+          #yield LlmResponse(generation_complete=True, partial=True)
+          yield LlmResponse(
+              content=types.Content(
+                  role='model',
+                  parts=[
+                      types.Part.from_text(text='[SYSTEM] Hand off to second agent')
+                  ],
+              ),
+              generation_complete=True,
+              partial = True 
+          )
         # in case of empty content or parts, we sill surface it
         # in case it's an interrupted message, we merge the previous partial
         # text. Other we don't merge. because content can be none when model
diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py
@@ -35,6 +35,8 @@ class LlmResponse(BaseModel):
       stream. Only used for streaming mode and when the content is plain text.
     turn_complete: Indicates whether the response from the model is complete.
       Only used for streaming mode.
+    generation_complete: Indicates that the model has finished generating content.
+      Only used for streaming mode.
     error_code: Error code if the response is an error. Code varies by model.
     error_message: Error message if the response is an error.
     interrupted: Flag indicating that LLM was interrupted when generating the
@@ -67,6 +69,11 @@ class LlmResponse(BaseModel):
   Only used for streaming mode.
   """
 
+  generation_complete: Optional[bool] = None
+  """Indicates that the model has finished generating content.
+  Only used for streaming mode.
+  """
+
   error_code: Optional[str] = None
   """Error code if the response is an error. Code varies by model."""