Skip to content

Commit 8082893

Browse files
wuliang229copybara-github
authored andcommitted
feat(live): support live for gemini-3.1-flash-live-preview model
This change updates the method used for sending text, audio and video data to the model. Closes issue #5018 Co-authored-by: Liang Wu <wuliang@google.com> PiperOrigin-RevId: 893174037
1 parent 6ee0362 commit 8082893

File tree

2 files changed

+62
-7
lines changed

2 files changed

+62
-7
lines changed

src/google/adk/models/gemini_llm_connection.py

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from google.genai import types
2222

23+
from ..utils import model_name_utils
2324
from ..utils.content_utils import filter_audio_parts
2425
from ..utils.context_utils import Aclosing
2526
from ..utils.variant_utils import GoogleLLMVariant
@@ -99,7 +100,6 @@ async def send_content(self, content: types.Content):
99100
Args:
100101
content: The content to send to the model.
101102
"""
102-
103103
assert content.parts
104104
if content.parts[0].function_response:
105105
# All parts have to be function responses.
@@ -112,12 +112,30 @@ async def send_content(self, content: types.Content):
112112
)
113113
else:
114114
logger.debug('Sending LLM new content %s', content)
115-
await self._gemini_session.send(
116-
input=types.LiveClientContent(
117-
turns=[content],
118-
turn_complete=True,
119-
)
115+
is_gemini_31 = model_name_utils.is_gemini_3_1_flash_live(
116+
self._model_version
120117
)
118+
is_gemini_api = self._api_backend == GoogleLLMVariant.GEMINI_API
119+
120+
# As of now, Gemini 3.1 Flash Live is only available in Gemini API, not
121+
# Vertex AI.
122+
if (
123+
is_gemini_31
124+
and is_gemini_api
125+
and len(content.parts) == 1
126+
and content.parts[0].text
127+
):
128+
logger.debug('Using send_realtime_input for Gemini 3.1 text input')
129+
await self._gemini_session.send_realtime_input(
130+
text=content.parts[0].text
131+
)
132+
else:
133+
await self._gemini_session.send(
134+
input=types.LiveClientContent(
135+
turns=[content],
136+
turn_complete=True,
137+
)
138+
)
121139

122140
async def send_realtime(self, input: RealtimeInput):
123141
"""Sends a chunk of audio or a frame of video to the model in realtime.
@@ -128,7 +146,26 @@ async def send_realtime(self, input: RealtimeInput):
128146
if isinstance(input, types.Blob):
129147
# The blob is binary and is very large. So let's not log it.
130148
logger.debug('Sending LLM Blob.')
131-
await self._gemini_session.send_realtime_input(media=input)
149+
is_gemini_31 = model_name_utils.is_gemini_3_1_flash_live(
150+
self._model_version
151+
)
152+
is_gemini_api = self._api_backend == GoogleLLMVariant.GEMINI_API
153+
154+
# As of now, Gemini 3.1 Flash Live is only available in Gemini API, not
155+
# Vertex AI.
156+
if is_gemini_31 and is_gemini_api:
157+
if input.mime_type and input.mime_type.startswith('audio/'):
158+
await self._gemini_session.send_realtime_input(audio=input)
159+
elif input.mime_type and input.mime_type.startswith('image/'):
160+
await self._gemini_session.send_realtime_input(video=input)
161+
else:
162+
logger.warning(
163+
'Blob not sent. Unknown or empty mime type for'
164+
' send_realtime_input: %s',
165+
input.mime_type,
166+
)
167+
else:
168+
await self._gemini_session.send_realtime_input(media=input)
132169

133170
elif isinstance(input, types.ActivityStart):
134171
logger.debug('Sending LLM activity start signal.')

src/google/adk/utils/model_name_utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,21 @@ def is_gemini_2_or_above(model_string: Optional[str]) -> bool:
125125
return False
126126

127127
return parsed_version.major >= 2
128+
129+
130+
def is_gemini_3_1_flash_live(model_string: Optional[str]) -> bool:
131+
"""Check if the model is a Gemini 3.1 Flash Live model.
132+
133+
Note: This is a very specific model name for live bidi streaming, so we check
134+
for exact match.
135+
136+
Args:
137+
model_string: The model name
138+
139+
Returns:
140+
True if it's a Gemini 3.1 Flash Live model, False otherwise
141+
"""
142+
if not model_string:
143+
return False
144+
145+
return model_string == 'gemini-3.1-flash-live-preview'

0 commit comments

Comments
 (0)