Skip to content

Commit 52872b9

Browse files
committed
fix(live): yield tool_call events immediately to prevent Gemini 3.1 deadlock
GeminiLlmConnection.receive() accumulated tool_call messages in a tool_call_parts buffer and only yielded them when server_content.turn_complete arrived. This pattern deadlocks run_live() on gemini-3.1-flash-live-preview, which does NOT send turn_complete until AFTER it receives the tool response: ADK waits for turn_complete → 3.1 waits for tool_response → tool never dispatched → WebSocket times out (1000 close) ~15s later This was the pre-1.28 behavior; the accumulation pattern was introduced recently and broke 3.1 Live compatibility. Fix: yield tool_call messages as LlmResponse events immediately, one per incoming message. 2.5 and other models are unaffected — the flow layer handles yielded function_call content identically regardless of whether it arrives before or after turn_complete. Confirmed in production: after this fix, audio returns ~1150ms after tool response on 3.1 (matching the direct SDK timing). Without the fix, every tool call hangs until the WebSocket times out. Tests updated: - test_receive_multiple_tool_call_messages_yielded_immediately (renamed from ...buffered_until_turn_complete) — asserts each tool_call yields its own LlmResponse - test_receive_tool_call_and_grounding_metadata_with_native_audio — updated expected response ordering (tool_call now yields before the subsequent audio/grounding message instead of after it) - test_receive_tool_call_yielded_without_turn_complete (new) — regression test for the 3.1 deadlock: asserts tool_call yields even when no turn_complete is ever sent
1 parent 1af1b4a commit 52872b9

2 files changed

Lines changed: 95 additions & 45 deletions

File tree

src/google/adk/models/gemini_llm_connection.py

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,6 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
187187
"""
188188

189189
text = ''
190-
tool_call_parts = []
191190
async with Aclosing(self._gemini_session.receive()) as agen:
192191
# TODO(b/440101573): Reuse StreamingResponseAggregator to accumulate
193192
# partial content and emit responses as needed.
@@ -327,14 +326,6 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
327326
if text:
328327
yield self.__build_full_text_response(text)
329328
text = ''
330-
if tool_call_parts:
331-
logger.debug('Returning aggregated tool_call_parts')
332-
yield LlmResponse(
333-
content=types.Content(role='model', parts=tool_call_parts),
334-
model_version=self._model_version,
335-
live_session_id=live_session_id,
336-
)
337-
tool_call_parts = []
338329
yield LlmResponse(
339330
turn_complete=True,
340331
interrupted=message.server_content.interrupted,
@@ -362,10 +353,21 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
362353
if text:
363354
yield self.__build_full_text_response(text)
364355
text = ''
365-
tool_call_parts.extend([
356+
# Yield tool calls immediately. gemini-3.1-flash-live-preview does
357+
# not send turn_complete until AFTER it receives the tool response,
358+
# so buffering tool calls until turn_complete deadlocks run_live()
359+
# on that model. Earlier versions of this method (<= ADK 1.27)
360+
# yielded immediately as well; the accumulation pattern introduced
361+
# in 1.28 broke 3.1 Live compatibility.
362+
parts = [
366363
types.Part(function_call=function_call)
367364
for function_call in message.tool_call.function_calls
368-
])
365+
]
366+
yield LlmResponse(
367+
content=types.Content(role='model', parts=parts),
368+
model_version=self._model_version,
369+
live_session_id=live_session_id,
370+
)
369371
if message.session_resumption_update:
370372
logger.debug('Received session resumption message: %s', message)
371373
yield (
@@ -383,14 +385,6 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
383385
live_session_id=live_session_id,
384386
)
385387

386-
if tool_call_parts:
387-
logger.debug('Exited loop with pending tool_call_parts')
388-
yield LlmResponse(
389-
content=types.Content(role='model', parts=tool_call_parts),
390-
model_version=self._model_version,
391-
live_session_id=self._gemini_session.session_id,
392-
)
393-
394388
async def close(self):
395389
"""Closes the llm server connection."""
396390

tests/unittests/models/test_gemini_llm_connection.py

Lines changed: 82 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,35 +1023,40 @@ async def mock_receive_generator():
10231023

10241024
assert len(responses) == 3
10251025

1026-
# First response: the audio content and grounding metadata
1027-
assert responses[0].grounding_metadata == grounding_metadata
1028-
assert responses[0].content == mock_content
1026+
# First response: the tool call — yielded immediately on arrival
1027+
# (no longer buffered until turn_complete, which would deadlock on 3.1)
10291028
assert responses[0].content is not None
10301029
assert responses[0].content.parts is not None
1031-
assert responses[0].content.parts[0].inline_data == audio_blob
1032-
1033-
# Second response: the tool call, buffered until turn_complete
1034-
assert responses[1].content is not None
1035-
assert responses[1].content.parts is not None
1036-
assert responses[1].content.parts[0].function_call is not None
1030+
assert responses[0].content.parts[0].function_call is not None
10371031
assert (
1038-
responses[1].content.parts[0].function_call.name
1032+
responses[0].content.parts[0].function_call.name
10391033
== 'enterprise_web_search'
10401034
)
1041-
assert responses[1].content.parts[0].function_call.args == {
1035+
assert responses[0].content.parts[0].function_call.args == {
10421036
'query': 'Google stock price today'
10431037
}
1044-
assert responses[1].grounding_metadata is None
1038+
1039+
# Second response: the audio content and grounding metadata
1040+
assert responses[1].grounding_metadata == grounding_metadata
1041+
assert responses[1].content == mock_content
1042+
assert responses[1].content is not None
1043+
assert responses[1].content.parts is not None
1044+
assert responses[1].content.parts[0].inline_data == audio_blob
10451045

10461046
# Third response: the turn_complete
10471047
assert responses[2].turn_complete is True
10481048

10491049

10501050
@pytest.mark.asyncio
1051-
async def test_receive_multiple_tool_calls_buffered_until_turn_complete(
1051+
async def test_receive_multiple_tool_call_messages_yielded_immediately(
10521052
gemini_connection, mock_gemini_session
10531053
):
1054-
"""Test receive buffers multiple tool call messages until turn complete."""
1054+
"""Test receive yields each tool_call message immediately (no buffering).
1055+
1056+
Tool calls MUST be yielded the moment they arrive, not accumulated until
1057+
turn_complete. gemini-3.1-flash-live-preview does not send turn_complete
1058+
until AFTER it receives the tool response — buffering causes a deadlock.
1059+
"""
10551060
# First tool call message
10561061
mock_tool_call_msg1 = mock.create_autospec(
10571062
types.LiveServerMessage, instance=True
@@ -1120,20 +1125,71 @@ async def mock_receive_generator():
11201125

11211126
responses = [resp async for resp in gemini_connection.receive()]
11221127

1123-
# Expected: One LlmResponse with both tool calls, then one with turn_complete
1124-
assert len(responses) == 2
1128+
# Expected: one response per tool_call message, then one for turn_complete.
1129+
assert len(responses) == 3
11251130

1126-
# First response: single LlmResponse carrying both function calls
1131+
# First response: tool_1 yielded immediately (not waiting for turn_complete)
1132+
assert responses[0].content is not None
1133+
assert len(responses[0].content.parts) == 1
1134+
assert responses[0].content.parts[0].function_call.name == 'tool_1'
1135+
assert responses[0].content.parts[0].function_call.args == {'arg': 'value1'}
1136+
1137+
# Second response: tool_2 yielded immediately
1138+
assert responses[1].content is not None
1139+
assert len(responses[1].content.parts) == 1
1140+
assert responses[1].content.parts[0].function_call.name == 'tool_2'
1141+
assert responses[1].content.parts[0].function_call.args == {'arg': 'value2'}
1142+
1143+
# Third response: turn_complete True
1144+
assert responses[2].turn_complete is True
1145+
1146+
1147+
@pytest.mark.asyncio
1148+
async def test_receive_tool_call_yielded_without_turn_complete(
1149+
gemini_connection, mock_gemini_session
1150+
):
1151+
"""Regression test for the Gemini 3.1 Flash Live deadlock.
1152+
1153+
Scenario: model sends a tool_call message but NOT turn_complete (as
1154+
gemini-3.1-flash-live-preview does — it waits for the tool response
1155+
before sending turn_complete). receive() must yield the tool_call so
1156+
the flow layer can execute the tool and send the response back.
1157+
1158+
Before the fix: receive() buffered the tool_call internally and only
1159+
yielded on turn_complete, causing run_live() to hang indefinitely on
1160+
3.1 models (tool never dispatched -> response never sent -> server
1161+
never completes the turn -> WebSocket eventually times out).
1162+
"""
1163+
function_call = types.FunctionCall(name='get_weather', args={'city': 'Paris'})
1164+
mock_tool_call = mock.create_autospec(
1165+
types.LiveServerToolCall, instance=True
1166+
)
1167+
mock_tool_call.function_calls = [function_call]
1168+
1169+
mock_msg = mock.create_autospec(types.LiveServerMessage, instance=True)
1170+
mock_msg.usage_metadata = None
1171+
mock_msg.server_content = None
1172+
mock_msg.tool_call = mock_tool_call
1173+
mock_msg.session_resumption_update = None
1174+
mock_msg.go_away = None
1175+
1176+
async def mock_receive_generator():
1177+
yield mock_msg
1178+
# NOTE: deliberately no turn_complete — mimics Gemini 3.1 Live behavior.
1179+
# The generator simply exhausts after the tool_call message.
1180+
1181+
receive_mock = mock.Mock(return_value=mock_receive_generator())
1182+
mock_gemini_session.receive = receive_mock
1183+
1184+
responses = [resp async for resp in gemini_connection.receive()]
1185+
1186+
# Must yield the tool_call even without turn_complete.
1187+
assert len(responses) == 1
11271188
assert responses[0].content is not None
1128-
parts = responses[0].content.parts
1129-
assert len(parts) == 2
1130-
assert parts[0].function_call.name == 'tool_1'
1131-
assert parts[0].function_call.args == {'arg': 'value1'}
1132-
assert parts[1].function_call.name == 'tool_2'
1133-
assert parts[1].function_call.args == {'arg': 'value2'}
1134-
1135-
# Second response: turn_complete True
1136-
assert responses[1].turn_complete is True
1189+
assert len(responses[0].content.parts) == 1
1190+
assert responses[0].content.parts[0].function_call.name == 'get_weather'
1191+
assert responses[0].content.parts[0].function_call.args == {'city': 'Paris'}
1192+
assert responses[0].model_version == MODEL_VERSION
11371193

11381194

11391195
@pytest.mark.asyncio

0 commit comments

Comments
 (0)