fix(live): Buffer tool calls and emit them together upon turn completion

wuliang229 · copybara-github · commit 7fa13edacda3 · 2026-04-01T17:45:51.000-07:00
The `receive` method now accumulates function calls from multiple `LiveServerMessage` instances. These accumulated tool calls are yielded as a single `LlmResponse` containing all function call parts only when a turn_complete message is received. Without the change, the tool_1's response is sent to the model as soon as it's generated, triggering a second call for tool_2. Upon receiving two consecutive tool_2's responses, the model utters the same message twice. Fixes issue #4902 Co-authored-by: Liang Wu <wuliang@google.com> PiperOrigin-RevId: 893197482
diff --git a/contributing/samples/live_bidi_streaming_parallel_tools_agent/README.md b/contributing/samples/live_bidi_streaming_parallel_tools_agent/README.md
@@ -0,0 +1,38 @@
+# Simple Live (Bidi-Streaming) Agent with Parallel Tools
+This project provides a basic example of a live, [bidirectional streaming](https://google.github.io/adk-docs/streaming/) agent that demonstrates parallel tool execution.
+
+## Getting Started
+
+Follow these steps to get the agent up and running:
+
+1.  **Start the ADK Web Server**
+    Open your terminal, navigate to the root directory that contains the
+    `live_bidi_streaming_parallel_tools_agent` folder, and execute the following
+    command:
+    ```bash
+    adk web
+    ```
+
+2.  **Access the ADK Web UI**
+    Once the server is running, open your web browser and navigate to the URL 
+    provided in the terminal (it will typically be `http://localhost:8000`).
+
+3.  **Select the Agent**
+    In the top-left corner of the ADK Web UI, use the dropdown menu to select 
+    this agent (`live_bidi_streaming_parallel_tools_agent`).
+
+4.  **Start Streaming**
+    Click on the **Audio** icon located near the chat input 
+    box to begin the streaming session.
+
+5.  **Interact with the Agent**
+    You can now begin talking to the agent, and it will respond in real-time.
+    Try asking it to perform multiple actions at once, for example: "Turn on the
+    lights and the TV at the same time." The agent will be able to invoke both
+    `turn_on_lights` and `turn_on_tv` tools in parallel.
+
+## Usage Notes
+
+* You only need to click the **Audio** button once to initiate the
+ stream. The current version does not support stopping and restarting the stream
+  by clicking the button again during a session.
diff --git a/contributing/samples/live_bidi_streaming_parallel_tools_agent/__init__.py b/contributing/samples/live_bidi_streaming_parallel_tools_agent/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from . import agent
diff --git a/contributing/samples/live_bidi_streaming_parallel_tools_agent/agent.py b/contributing/samples/live_bidi_streaming_parallel_tools_agent/agent.py
@@ -0,0 +1,36 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from google.adk.agents.llm_agent import Agent
+
+
+def turn_on_lights():
+  """Turn on the lights."""
+  print("turn_on_lights")
+  return {"status": "OK"}
+
+
+def turn_on_tv():
+  """Turn on the tv."""
+  print("turn_on_tv")
+  return {"status": "OK"}
+
+
+root_agent = Agent(
+    model="gemini-live-2.5-flash-native-audio",
+    name="Home_helper",
+    instruction="Be polite and answer all user's questions.",
+    tools=[turn_on_lights, turn_on_tv],
+)
diff --git a/src/google/adk/models/gemini_llm_connection.py b/src/google/adk/models/gemini_llm_connection.py
@@ -203,6 +203,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
     """
 
     text = ''
+    tool_call_parts = []
     async with Aclosing(self._gemini_session.receive()) as agen:
       # TODO(b/440101573): Reuse StreamingResponseAggregator to accumulate
       # partial content and emit responses as needed.
@@ -332,6 +333,13 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
             if text:
               yield self.__build_full_text_response(text)
               text = ''
+            if tool_call_parts:
+              logger.debug('Returning aggregated tool_call_parts')
+              yield LlmResponse(
+                  content=types.Content(role='model', parts=tool_call_parts),
+                  model_version=self._model_version,
+              )
+              tool_call_parts = []
             yield LlmResponse(
                 turn_complete=True,
                 interrupted=message.server_content.interrupted,
@@ -353,17 +361,14 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
                   model_version=self._model_version,
               )
         if message.tool_call:
+          logger.debug('Received tool call: %s', message.tool_call)
           if text:
             yield self.__build_full_text_response(text)
             text = ''
-          parts = [
+          tool_call_parts.extend([
               types.Part(function_call=function_call)
               for function_call in message.tool_call.function_calls
-          ]
-          yield LlmResponse(
-              content=types.Content(role='model', parts=parts),
-              model_version=self._model_version,
-          )
+          ])
         if message.session_resumption_update:
           logger.debug('Received session resumption message: %s', message)
           yield (
@@ -372,6 +377,12 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
                   model_version=self._model_version,
               )
           )
+      if tool_call_parts:
+        logger.debug('Exited loop with pending tool_call_parts')
+        yield LlmResponse(
+            content=types.Content(role='model', parts=tool_call_parts),
+            model_version=self._model_version,
+        )
 
   async def close(self):
     """Closes the llm server connection."""
diff --git a/tests/unittests/models/test_gemini_llm_connection.py b/tests/unittests/models/test_gemini_llm_connection.py
@@ -933,33 +933,142 @@ async def test_receive_tool_call_and_grounding_metadata_with_native_audio(
   mock_metadata_msg.tool_call = None
   mock_metadata_msg.session_resumption_update = None
 
+  # 3. Message with turn_complete
+  mock_turn_complete_content = mock.create_autospec(
+      types.LiveServerContent, instance=True
+  )
+  mock_turn_complete_content.model_turn = None
+  mock_turn_complete_content.grounding_metadata = None
+  mock_turn_complete_content.turn_complete = True
+  mock_turn_complete_content.interrupted = False
+  mock_turn_complete_content.input_transcription = None
+  mock_turn_complete_content.output_transcription = None
+
+  mock_turn_complete_msg = mock.create_autospec(
+      types.LiveServerMessage, instance=True
+  )
+  mock_turn_complete_msg.usage_metadata = None
+  mock_turn_complete_msg.server_content = mock_turn_complete_content
+  mock_turn_complete_msg.tool_call = None
+  mock_turn_complete_msg.session_resumption_update = None
+
   async def mock_receive_generator():
     yield mock_tool_call_msg
     yield mock_metadata_msg
+    yield mock_turn_complete_msg
 
   receive_mock = mock.Mock(return_value=mock_receive_generator())
   mock_gemini_session.receive = receive_mock
 
   responses = [resp async for resp in connection.receive()]
 
-  assert len(responses) == 2
+  assert len(responses) == 3
 
-  # First response: the tool call
+  # First response: the audio content and grounding metadata
+  assert responses[0].grounding_metadata == grounding_metadata
+  assert responses[0].content == mock_content
   assert responses[0].content is not None
   assert responses[0].content.parts is not None
-  assert responses[0].content.parts[0].function_call is not None
+  assert responses[0].content.parts[0].inline_data == audio_blob
+
+  # Second response: the tool call, buffered until turn_complete
+  assert responses[1].content is not None
+  assert responses[1].content.parts is not None
+  assert responses[1].content.parts[0].function_call is not None
   assert (
-      responses[0].content.parts[0].function_call.name
+      responses[1].content.parts[0].function_call.name
       == 'enterprise_web_search'
   )
-  assert responses[0].content.parts[0].function_call.args == {
+  assert responses[1].content.parts[0].function_call.args == {
       'query': 'Google stock price today'
   }
-  assert responses[0].grounding_metadata is None
+  assert responses[1].grounding_metadata is None
 
-  # Second response: the audio content and grounding metadata
-  assert responses[1].grounding_metadata == grounding_metadata
-  assert responses[1].content == mock_content
-  assert responses[1].content is not None
-  assert responses[1].content.parts is not None
-  assert responses[1].content.parts[0].inline_data == audio_blob
+  # Third response: the turn_complete
+  assert responses[2].turn_complete is True
+
+
+@pytest.mark.asyncio
+async def test_receive_multiple_tool_calls_buffered_until_turn_complete(
+    gemini_connection, mock_gemini_session
+):
+  """Test receive buffers multiple tool call messages until turn complete."""
+  # First tool call message
+  mock_tool_call_msg1 = mock.create_autospec(
+      types.LiveServerMessage, instance=True
+  )
+  mock_tool_call_msg1.usage_metadata = None
+  mock_tool_call_msg1.server_content = None
+  mock_tool_call_msg1.session_resumption_update = None
+
+  function_call1 = types.FunctionCall(
+      name='tool_1',
+      args={'arg': 'value1'},
+  )
+  mock_tool_call1 = mock.create_autospec(
+      types.LiveServerToolCall, instance=True
+  )
+  mock_tool_call1.function_calls = [function_call1]
+  mock_tool_call_msg1.tool_call = mock_tool_call1
+
+  # Second tool call message
+  mock_tool_call_msg2 = mock.create_autospec(
+      types.LiveServerMessage, instance=True
+  )
+  mock_tool_call_msg2.usage_metadata = None
+  mock_tool_call_msg2.server_content = None
+  mock_tool_call_msg2.session_resumption_update = None
+
+  function_call2 = types.FunctionCall(
+      name='tool_2',
+      args={'arg': 'value2'},
+  )
+  mock_tool_call2 = mock.create_autospec(
+      types.LiveServerToolCall, instance=True
+  )
+  mock_tool_call2.function_calls = [function_call2]
+  mock_tool_call_msg2.tool_call = mock_tool_call2
+
+  # Turn complete message
+  mock_turn_complete_content = mock.create_autospec(
+      types.LiveServerContent, instance=True
+  )
+  mock_turn_complete_content.model_turn = None
+  mock_turn_complete_content.grounding_metadata = None
+  mock_turn_complete_content.turn_complete = True
+  mock_turn_complete_content.interrupted = False
+  mock_turn_complete_content.input_transcription = None
+  mock_turn_complete_content.output_transcription = None
+
+  mock_turn_complete_msg = mock.create_autospec(
+      types.LiveServerMessage, instance=True
+  )
+  mock_turn_complete_msg.usage_metadata = None
+  mock_turn_complete_msg.server_content = mock_turn_complete_content
+  mock_turn_complete_msg.tool_call = None
+  mock_turn_complete_msg.session_resumption_update = None
+
+  async def mock_receive_generator():
+    yield mock_tool_call_msg1
+    yield mock_tool_call_msg2
+    yield mock_turn_complete_msg
+
+  receive_mock = mock.Mock(return_value=mock_receive_generator())
+  mock_gemini_session.receive = receive_mock
+
+  responses = [resp async for resp in gemini_connection.receive()]
+
+  # Expected: One LlmResponse with both tool calls, then one with turn_complete
+  assert len(responses) == 2
+
+  # First response: single LlmResponse carrying both function calls
+  assert responses[0].content is not None
+  parts = responses[0].content.parts
+  assert len(parts) == 2
+  assert parts[0].function_call.name == 'tool_1'
+  assert parts[0].function_call.args == {'arg': 'value1'}
+  assert parts[1].function_call.name == 'tool_2'
+  assert parts[1].function_call.args == {'arg': 'value2'}
+
+  # Second response: turn_complete True
+  assert responses[1].turn_complete is True