fix(models): Default grounding metadata for Gemini 3.1 live (#6018)

wukath · web-flow · commit fafafb38e102 · 2026-06-08T16:19:04.000-07:00
diff --git a/src/google/adk/models/gemini_llm_connection.py b/src/google/adk/models/gemini_llm_connection.py
@@ -50,6 +50,9 @@ def __init__(
     self._output_transcription_text: str = ''
     self._api_backend = api_backend
     self._model_version = model_version
+    self._is_gemini_3_1_flash_live = model_name_utils.is_gemini_3_1_flash_live(
+        model_version
+    )
 
   async def send_history(self, history: list[types.Content]):
     """Sends the conversation history to the gemini model.
@@ -80,14 +83,14 @@ async def send_history(self, history: list[types.Content]):
     ]
 
     if contents:
-      is_gemini_31 = model_name_utils.is_gemini_3_1_flash_live(
-          self._model_version
-      )
       # Gemini Enterprise Agent Platform does not support history_config in the SDK.
       # To initialize a live session with prior history without hitting a 1007
       # protocol error (invalid role mid-session), we consolidate previous multi-turn
       # interactions into a unified contextual preamble on a single user role turn.
-      if is_gemini_31 and self._api_backend != GoogleLLMVariant.GEMINI_API:
+      if (
+          self._is_gemini_3_1_flash_live
+          and self._api_backend != GoogleLLMVariant.GEMINI_API
+      ):
         collapsed_text = 'Previous conversation history:\n'
         for c in contents:
           text_parts = ''.join(p.text for p in c.parts if p.text)
@@ -101,7 +104,9 @@ async def send_history(self, history: list[types.Content]):
       logger.debug('Sending history to live connection: %s', contents)
       await self._gemini_session.send_client_content(
           turns=contents,
-          turn_complete=True if is_gemini_31 else (contents[-1].role == 'user'),
+          turn_complete=True
+          if self._is_gemini_3_1_flash_live
+          else (contents[-1].role == 'user'),
       )
     else:
       logger.info('no content is sent')
@@ -126,10 +131,11 @@ async def send_content(self, content: types.Content):
       )
     else:
       logger.debug('Sending LLM new content %s', content)
-      is_gemini_31 = model_name_utils.is_gemini_3_1_flash_live(
-          self._model_version
-      )
-      if is_gemini_31 and len(content.parts) == 1 and content.parts[0].text:
+      if (
+          self._is_gemini_3_1_flash_live
+          and len(content.parts) == 1
+          and content.parts[0].text
+      ):
         logger.debug('Using send_realtime_input for Gemini 3.1 text input')
         await self._gemini_session.send_realtime_input(
             text=content.parts[0].text
@@ -151,10 +157,7 @@ async def send_realtime(self, input: RealtimeInput):
     if isinstance(input, types.Blob):
       # The blob is binary and is very large. So let's not log it.
       logger.debug('Sending LLM Blob.')
-      is_gemini_31 = model_name_utils.is_gemini_3_1_flash_live(
-          self._model_version
-      )
-      if is_gemini_31:
+      if self._is_gemini_3_1_flash_live:
         if input.mime_type and input.mime_type.startswith('audio/'):
           await self._gemini_session.send_realtime_input(audio=input)
         elif input.mime_type and input.mime_type.startswith('image/'):
@@ -196,10 +199,15 @@ def __build_full_text_response(
     Returns:
       An LlmResponse containing the full text.
     """
+    part = types.Part.from_text(text=text)
+    if is_thought:
+      part.thought = True
+    if grounding_metadata is None and self._is_gemini_3_1_flash_live:
+      grounding_metadata = types.GroundingMetadata()
     return LlmResponse(
         content=types.Content(
             role='model',
-            parts=[types.Part.from_text(text=text)],
+            parts=[part],
         ),
         grounding_metadata=grounding_metadata,
         partial=False,
@@ -214,6 +222,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
     """
 
     text = ''
+    is_thought = False
     tool_call_parts = []
     pending_grounding_metadata = None
     async with Aclosing(self._gemini_session.receive()) as agen:
@@ -265,9 +274,12 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
             # grounding_metadata is yielded again at turn_complete,
             # so avoid duplicating it here if turn_complete is true.
             if not message.server_content.turn_complete:
-              llm_response.grounding_metadata = (
-                  message.server_content.grounding_metadata
-              )
+              if message.server_content.grounding_metadata is not None:
+                llm_response.grounding_metadata = (
+                    message.server_content.grounding_metadata
+                )
+              elif self._is_gemini_3_1_flash_live:
+                llm_response.grounding_metadata = types.GroundingMetadata()
             has_inline_data = any(p.inline_data for p in content.parts)
             for part in content.parts:
               if part.text:
@@ -394,7 +406,12 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
                 turn_complete=True,
                 interrupted=message.server_content.interrupted,
                 grounding_metadata=message.server_content.grounding_metadata
-                or g_metadata_to_yield,
+                or g_metadata_to_yield
+                or (
+                    types.GroundingMetadata()
+                    if self._is_gemini_3_1_flash_live
+                    else None
+                ),
                 model_version=self._model_version,
                 live_session_id=live_session_id,
                 turn_complete_reason=getattr(
@@ -430,10 +447,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
           # deadlocking the conversation. Other models (e.g. 2.5-pro,
           # native-audio) send turn_complete after tool calls, so buffer
           # and merge them into a single response at turn_complete.
-          if (
-              model_name_utils.is_gemini_3_1_flash_live(self._model_version)
-              and tool_call_parts
-          ):
+          if self._is_gemini_3_1_flash_live and tool_call_parts:
             logger.debug(
                 'Yielding tool_call_parts immediately for Gemini 3.1 live tool'
                 ' call'
@@ -442,6 +456,7 @@ async def receive(self) -> AsyncGenerator[LlmResponse, None]:
                 content=types.Content(role='model', parts=tool_call_parts),
                 model_version=self._model_version,
                 live_session_id=live_session_id,
+                grounding_metadata=types.GroundingMetadata(),
             )
             tool_call_parts = []
         if message.session_resumption_update:
diff --git a/src/google/adk/utils/model_name_utils.py b/src/google/adk/utils/model_name_utils.py
@@ -172,4 +172,5 @@ def is_gemini_3_1_flash_live(model_string: Optional[str]) -> bool:
   """
   if not model_string:
     return False
-  return model_string.startswith('gemini-3.1-flash-live')
+  model_name = extract_model_name(model_string)
+  return model_name.startswith('gemini-3.1-flash-live')
diff --git a/tests/unittests/models/test_gemini_llm_connection.py b/tests/unittests/models/test_gemini_llm_connection.py
@@ -1560,3 +1560,119 @@ async def test_send_history_collapse_vertex_ai(mock_gemini_session):
       mock_gemini_session.send_client_content.call_args.kwargs['turn_complete']
       is True
   )
+
+
+@pytest.mark.asyncio
+async def test_receive_grounding_metadata_default_gemini_3_1(
+    mock_gemini_session,
+):
+  """Verify grounding_metadata defaults to empty GroundingMetadata for Gemini 3.1."""
+  conn = GeminiLlmConnection(
+      mock_gemini_session,
+      model_version='gemini-3.1-flash-live-preview',
+  )
+
+  def make_msg(text=None, tc=False, tool_call=None):
+    msg = mock.create_autospec(types.LiveServerMessage, instance=True)
+    msg.usage_metadata = None
+    msg.tool_call = tool_call
+    msg.session_resumption_update = None
+    msg.go_away = None
+    msg.server_content = mock.Mock()
+    msg.server_content.interrupted = False
+    msg.server_content.input_transcription = None
+    msg.server_content.output_transcription = None
+    msg.server_content.generation_complete = False
+    msg.server_content.turn_complete = tc
+    msg.server_content.grounding_metadata = None
+    msg.server_content.model_turn = (
+        types.Content(role='model', parts=[types.Part.from_text(text=text)])
+        if text
+        else None
+    )
+    return msg
+
+  # 1. Content event
+  msg1 = make_msg(text='hello')
+  # 2. Tool call event (yields immediately for Gemini 3.1)
+  function_call = types.FunctionCall(name='foo', args={})
+  tool_call = mock.create_autospec(types.LiveServerToolCall, instance=True)
+  tool_call.function_calls = [function_call]
+  msg2 = make_msg(tool_call=tool_call)
+  # 3. Turn complete event
+  msg3 = make_msg(tc=True)
+
+  async def mock_receive_generator():
+    yield msg1
+    yield msg2
+    yield msg3
+
+  mock_gemini_session.receive = mock.Mock(return_value=mock_receive_generator())
+  responses = [resp async for resp in conn.receive()]
+  # Expected:
+  # responses[0] -> partial content response for msg1 (has grounding_metadata)
+  # responses[1] -> full text response for msg1 (has grounding_metadata)
+  # responses[2] -> tool call response for msg2 (has grounding_metadata)
+  # responses[3] -> turn_complete response for msg3 (has grounding_metadata)
+  assert len(responses) == 4
+  assert responses[0].content.parts[0].text == 'hello'
+  assert isinstance(responses[0].grounding_metadata, types.GroundingMetadata)
+  assert responses[0].grounding_metadata.web_search_queries is None
+  assert responses[0].partial is True
+  assert responses[1].content.parts[0].text == 'hello'
+  assert isinstance(responses[1].grounding_metadata, types.GroundingMetadata)
+  assert responses[1].partial is False
+  assert responses[2].content.parts[0].function_call.name == 'foo'
+  assert isinstance(responses[2].grounding_metadata, types.GroundingMetadata)
+  assert responses[3].turn_complete is True
+  assert isinstance(responses[3].grounding_metadata, types.GroundingMetadata)
+
+
+@pytest.mark.asyncio
+async def test_receive_grounding_metadata_default_non_gemini_3_1(
+    mock_gemini_session,
+):
+  """Verify grounding_metadata stays None for non-Gemini 3.1 models."""
+  conn = GeminiLlmConnection(
+      mock_gemini_session,
+      model_version='gemini-2.5-flash-live',
+  )
+
+  def make_msg(text=None, tc=False):
+    msg = mock.create_autospec(types.LiveServerMessage, instance=True)
+    msg.usage_metadata = None
+    msg.tool_call = None
+    msg.session_resumption_update = None
+    msg.go_away = None
+    msg.server_content = mock.Mock()
+    msg.server_content.interrupted = False
+    msg.server_content.input_transcription = None
+    msg.server_content.output_transcription = None
+    msg.server_content.generation_complete = False
+    msg.server_content.turn_complete = tc
+    msg.server_content.grounding_metadata = None
+    msg.server_content.model_turn = (
+        types.Content(role='model', parts=[types.Part.from_text(text=text)])
+        if text
+        else None
+    )
+    return msg
+
+  msg1 = make_msg(text='hello')
+  msg2 = make_msg(tc=True)
+
+  async def mock_receive_generator():
+    yield msg1
+    yield msg2
+
+  mock_gemini_session.receive = mock.Mock(return_value=mock_receive_generator())
+  responses = [resp async for resp in conn.receive()]
+  assert len(responses) == 3
+  assert responses[0].content.parts[0].text == 'hello'
+  assert responses[0].grounding_metadata is None
+  assert responses[0].partial is True
+  assert responses[1].content.parts[0].text == 'hello'
+  assert responses[1].grounding_metadata is None
+  assert responses[1].partial is False
+  assert responses[2].turn_complete is True
+  assert responses[2].grounding_metadata is None
diff --git a/tests/unittests/utils/test_model_name_utils.py b/tests/unittests/utils/test_model_name_utils.py
@@ -16,6 +16,7 @@
 
 from google.adk.utils.model_name_utils import extract_model_name
 from google.adk.utils.model_name_utils import is_gemini_1_model
+from google.adk.utils.model_name_utils import is_gemini_3_1_flash_live
 from google.adk.utils.model_name_utils import is_gemini_eap_or_2_or_above
 from google.adk.utils.model_name_utils import is_gemini_model
 from google.adk.utils.model_name_utils import is_gemini_model_id_check_disabled
@@ -338,3 +339,28 @@ def test_default_is_disabled(self, monkeypatch):
   def test_true_enables_check_bypass(self, monkeypatch):
     monkeypatch.setenv('ADK_DISABLE_GEMINI_MODEL_ID_CHECK', 'true')
     assert is_gemini_model_id_check_disabled() is True
+
+
+class TestIsGemini31FlashLive:
+  """Test the is_gemini_3_1_flash_live function."""
+
+  def test_is_gemini_3_1_flash_live_simple_name(self):
+    """Test with simple model name format."""
+    assert is_gemini_3_1_flash_live('gemini-3.1-flash-live') is True
+    assert is_gemini_3_1_flash_live('gemini-3.1-flash-live-preview') is True
+    assert is_gemini_3_1_flash_live('gemini-3.1-pro-live') is False
+    assert is_gemini_3_1_flash_live('gemini-2.5-flash-live') is False
+
+  def test_is_gemini_3_1_flash_live_path_based_name(self):
+    """Test with path-based format (Vertex AI etc.)."""
+    vertex_path = 'projects/123/locations/us-central1/publishers/google/models/gemini-3.1-flash-live'
+    assert is_gemini_3_1_flash_live(vertex_path) is True
+    vertex_path_preview = 'projects/123/locations/us-central1/publishers/google/models/gemini-3.1-flash-live-preview'
+    assert is_gemini_3_1_flash_live(vertex_path_preview) is True
+    non_live_path = 'projects/123/locations/us-central1/publishers/google/models/gemini-3.1-flash'
+    assert is_gemini_3_1_flash_live(non_live_path) is False
+
+  def test_is_gemini_3_1_flash_live_edge_cases(self):
+    """Test edge cases."""
+    assert is_gemini_3_1_flash_live(None) is False
+    assert is_gemini_3_1_flash_live('') is False