feat(live): support live for gemini-3.1-flash-live-preview model

wuliang229 · copybara-github · commit 8082893619bb · 2026-04-01T16:40:02.000-07:00
This change updates the method used for sending text, audio and video data to the model. Closes issue #5018 Co-authored-by: Liang Wu <wuliang@google.com> PiperOrigin-RevId: 893174037
diff --git a/src/google/adk/models/gemini_llm_connection.py b/src/google/adk/models/gemini_llm_connection.py
@@ -20,6 +20,7 @@
 
 from google.genai import types
 
+from ..utils import model_name_utils
 from ..utils.content_utils import filter_audio_parts
 from ..utils.context_utils import Aclosing
 from ..utils.variant_utils import GoogleLLMVariant
@@ -99,7 +100,6 @@ async def send_content(self, content: types.Content):
     Args:
       content: The content to send to the model.
     """
-
     assert content.parts
     if content.parts[0].function_response:
       # All parts have to be function responses.
@@ -112,12 +112,30 @@ async def send_content(self, content: types.Content):
       )
     else:
       logger.debug('Sending LLM new content %s', content)
-      await self._gemini_session.send(
-          input=types.LiveClientContent(
-              turns=[content],
-              turn_complete=True,
-          )
+      is_gemini_31 = model_name_utils.is_gemini_3_1_flash_live(
+          self._model_version
       )
+      is_gemini_api = self._api_backend == GoogleLLMVariant.GEMINI_API
+
+      # As of now, Gemini 3.1 Flash Live is only available in Gemini API, not
+      # Vertex AI.
+      if (
+          is_gemini_31
+          and is_gemini_api
+          and len(content.parts) == 1
+          and content.parts[0].text
+      ):
+        logger.debug('Using send_realtime_input for Gemini 3.1 text input')
+        await self._gemini_session.send_realtime_input(
+            text=content.parts[0].text
+        )
+      else:
+        await self._gemini_session.send(
+            input=types.LiveClientContent(
+                turns=[content],
+                turn_complete=True,
+            )
+        )
 
   async def send_realtime(self, input: RealtimeInput):
     """Sends a chunk of audio or a frame of video to the model in realtime.
@@ -128,7 +146,26 @@ async def send_realtime(self, input: RealtimeInput):
     if isinstance(input, types.Blob):
       # The blob is binary and is very large. So let's not log it.
       logger.debug('Sending LLM Blob.')
-      await self._gemini_session.send_realtime_input(media=input)
+      is_gemini_31 = model_name_utils.is_gemini_3_1_flash_live(
+          self._model_version
+      )
+      is_gemini_api = self._api_backend == GoogleLLMVariant.GEMINI_API
+
+      # As of now, Gemini 3.1 Flash Live is only available in Gemini API, not
+      # Vertex AI.
+      if is_gemini_31 and is_gemini_api:
+        if input.mime_type and input.mime_type.startswith('audio/'):
+          await self._gemini_session.send_realtime_input(audio=input)
+        elif input.mime_type and input.mime_type.startswith('image/'):
+          await self._gemini_session.send_realtime_input(video=input)
+        else:
+          logger.warning(
+              'Blob not sent. Unknown or empty mime type for'
+              ' send_realtime_input: %s',
+              input.mime_type,
+          )
+      else:
+        await self._gemini_session.send_realtime_input(media=input)
 
     elif isinstance(input, types.ActivityStart):
       logger.debug('Sending LLM activity start signal.')
diff --git a/src/google/adk/utils/model_name_utils.py b/src/google/adk/utils/model_name_utils.py
@@ -125,3 +125,21 @@ def is_gemini_2_or_above(model_string: Optional[str]) -> bool:
     return False
 
   return parsed_version.major >= 2
+
+
+def is_gemini_3_1_flash_live(model_string: Optional[str]) -> bool:
+  """Check if the model is a Gemini 3.1 Flash Live model.
+
+  Note: This is a very specific model name for live bidi streaming, so we check
+  for exact match.
+
+  Args:
+    model_string: The model name
+
+  Returns:
+    True if it's a Gemini 3.1 Flash Live model, False otherwise
+  """
+  if not model_string:
+    return False
+
+  return model_string == 'gemini-3.1-flash-live-preview'