fix: Fix Vertex AI Live API session replay on reconnect / modality switch

wukath · wukath · commit 0aac4409dee2 · 2026-06-25T11:29:27.000-07:00
Previously seeding Gemini 3.1 live history on Vertex AI collapsed turns into a single user turn and hardcoded turn_complete=True, causing the model to immediately trigger a tool call upon reconnect. Since history_config is now supported on Vertex AI, we can remove the workaround and determine turn_complete based on the last turn role (preventing model execution when history ends in a model turn)

Change-Id: I624a7adc8acdd9a88f0fc17e08051ac8a5b6f9bd
diff --git a/src/google/adk/models/gemini_llm_connection.py b/src/google/adk/models/gemini_llm_connection.py
@@ -83,30 +83,10 @@ async def send_history(self, history: list[types.Content]):
     ]
 
     if contents:
-      # Gemini Enterprise Agent Platform does not support history_config in the SDK.
-      # To initialize a live session with prior history without hitting a 1007
-      # protocol error (invalid role mid-session), we consolidate previous multi-turn
-      # interactions into a unified contextual preamble on a single user role turn.
-      if (
-          self._is_gemini_3_1_flash_live
-          and self._api_backend != GoogleLLMVariant.GEMINI_API
-      ):
-        collapsed_text = 'Previous conversation history:\n'
-        for c in contents:
-          text_parts = ''.join(p.text for p in c.parts if p.text)
-          collapsed_text += f'[{c.role}]: {text_parts}\n'
-        contents = [
-            types.Content(
-                role='user', parts=[types.Part.from_text(text=collapsed_text)]
-            )
-        ]
-
       logger.debug('Sending history to live connection: %s', contents)
       await self._gemini_session.send_client_content(
           turns=contents,
-          turn_complete=True
-          if self._is_gemini_3_1_flash_live
-          else (contents[-1].role == 'user'),
+          turn_complete=history[-1].role == 'user',
       )
     else:
       logger.info('no content is sent')
diff --git a/tests/unittests/models/test_gemini_llm_connection.py b/tests/unittests/models/test_gemini_llm_connection.py
@@ -1507,31 +1507,42 @@ async def mock_receive_generator():
 
 @pytest.mark.asyncio
 async def test_send_history_gemini_31_turn_complete(mock_gemini_session):
-  """Verify Gemini 3.1 Live history seeding explicitly appends turn_complete=True."""
+  """Verify Gemini 3.1 Live history seeding turn_complete logic based on last turn role."""
   from google.adk.models.google_llm import GoogleLLMVariant
 
   conn = GeminiLlmConnection(
       mock_gemini_session,
       api_backend=GoogleLLMVariant.GEMINI_API,
       model_version='gemini-3.1-flash-live-preview',
   )
-  mock_gemini_session.send_client_content = mock.AsyncMock()
 
-  mock_contents = [
+  # Case 1: Last turn is from model (turn_complete should be False)
+  mock_gemini_session.send_client_content = mock.AsyncMock()
+  mock_contents_model_last = [
       types.Content(role='user', parts=[types.Part.from_text(text='hi')]),
       types.Content(role='model', parts=[types.Part.from_text(text='hello')]),
   ]
-  await conn.send_history(mock_contents)
+  await conn.send_history(mock_contents_model_last)
+  mock_gemini_session.send_client_content.assert_called_once_with(
+      turns=mock_contents_model_last,
+      turn_complete=False,
+  )
 
+  # Case 2: Last turn is from user (turn_complete should be True)
+  mock_gemini_session.send_client_content = mock.AsyncMock()
+  mock_contents_user_last = [
+      types.Content(role='user', parts=[types.Part.from_text(text='hi')]),
+  ]
+  await conn.send_history(mock_contents_user_last)
   mock_gemini_session.send_client_content.assert_called_once_with(
-      turns=mock_contents,
+      turns=mock_contents_user_last,
       turn_complete=True,
   )
 
 
 @pytest.mark.asyncio
-async def test_send_history_collapse_vertex_ai(mock_gemini_session):
-  """Verify history prompt collapse when seeding Gemini 3.1 Live on Vertex AI backend."""
+async def test_send_history_no_collapse_vertex_ai(mock_gemini_session):
+  """Verify history is not collapsed when seeding Gemini 3.1 Live on Vertex AI backend."""
   from google.adk.models.google_llm import GoogleLLMVariant
 
   conn = GeminiLlmConnection(
@@ -1547,18 +1558,9 @@ async def test_send_history_collapse_vertex_ai(mock_gemini_session):
   ]
   await conn.send_history(mock_contents)
 
-  assert mock_gemini_session.send_client_content.call_count == 1
-  called_turns = mock_gemini_session.send_client_content.call_args.kwargs[
-      'turns'
-  ]
-  assert len(called_turns) == 1
-  assert called_turns[0].role == 'user'
-  assert 'Previous conversation history:' in called_turns[0].parts[0].text
-  assert '[user]: hi' in called_turns[0].parts[0].text
-  assert '[model]: hello' in called_turns[0].parts[0].text
-  assert (
-      mock_gemini_session.send_client_content.call_args.kwargs['turn_complete']
-      is True
+  mock_gemini_session.send_client_content.assert_called_once_with(
+      turns=mock_contents,
+      turn_complete=False,
   )