Skip to content

Commit b2eefbe

Browse files
authored
fix(google): reject tool calls when tool_choice="none" in realtime (#6166)
1 parent 6b803ed commit b2eefbe

2 files changed

Lines changed: 101 additions & 20 deletions

File tree

livekit-plugins/livekit-plugins-google/livekit/plugins/google/realtime/realtime_api.py

Lines changed: 73 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from livekit.plugins.google.realtime.api_proto import ClientEvents, LiveAPIModels, Voice
2929

3030
from ..log import logger
31-
from ..utils import create_tools_config, get_tool_results_for_realtime
31+
from ..utils import create_function_response, create_tools_config, get_tool_results_for_realtime
3232
from ..version import __version__
3333

3434
INPUT_AUDIO_SAMPLE_RATE = 16000
@@ -44,6 +44,9 @@
4444

4545
lk_google_debug = int(os.getenv("LK_GOOGLE_DEBUG", 0))
4646

47+
# stop rejecting tool calls after this many in a row to avoid a loop (tool_choice="none")
48+
MAX_TOOL_CALL_REJECTIONS = 3
49+
4750
# Known VertexAI models for the Live API
4851
# See: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/live-api
4952
KNOWN_VERTEXAI_MODELS: frozenset[str] = frozenset(
@@ -148,6 +151,7 @@ class _RealtimeOptions:
148151
api_version: NotGivenOr[str] = NOT_GIVEN
149152
tool_behavior: NotGivenOr[types.Behavior] = NOT_GIVEN
150153
tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN
154+
tool_choice: NotGivenOr[llm.ToolChoice | None] = NOT_GIVEN
151155
thinking_config: NotGivenOr[types.ThinkingConfig] = NOT_GIVEN
152156
session_resumption: NotGivenOr[types.SessionResumptionConfig] = NOT_GIVEN
153157
credentials: google.auth.credentials.Credentials | None = None
@@ -488,6 +492,10 @@ def __init__(self, realtime_model: RealtimeModel) -> None:
488492
self._session_should_close = asyncio.Event()
489493
self._response_created_futures: dict[str, asyncio.Future[llm.GenerationCreatedEvent]] = {}
490494
self._pending_generation_fut: asyncio.Future[llm.GenerationCreatedEvent] | None = None
495+
# number of tool calls rejected in the current tool_choice="none" turn; non-zero also
496+
# means we're draining that turn's trailing events (which have no generation to attach
497+
# to). reset when the next generation starts.
498+
self._rejected_tool_calls = 0
491499

492500
self._session_resumption_handle: str | None = (
493501
self._opts.session_resumption.handle
@@ -557,7 +565,19 @@ def update_options(
557565
# no need to restart
558566

559567
if is_given(tool_choice):
560-
logger.warning("tool_choice is not supported by the Google Realtime API.")
568+
# no per-response tool_choice on Gemini; "none" is emulated by rejecting any tool
569+
# call emitted during the turn (see _reject_tool_calls).
570+
self._opts.tool_choice = tool_choice
571+
if tool_choice == "none":
572+
logger.warning(
573+
"the Google Realtime API has no tool_choice='none'; tool calls emitted "
574+
"this turn will be rejected so the model replies directly."
575+
)
576+
elif tool_choice not in (None, "auto"):
577+
logger.warning(
578+
f"tool_choice='{tool_choice}' is not supported by the Google Realtime API, "
579+
"falling back to 'auto'."
580+
)
561581

562582
if should_restart:
563583
self._mark_restart_needed()
@@ -1045,6 +1065,13 @@ async def _recv_task(self, session: AsyncSession) -> None:
10451065
part["inline_data"] = "<audio>"
10461066
logger.debug("<<< received response", extra={"response": resp_copy})
10471067

1068+
if response.tool_call and self._opts.tool_choice == "none":
1069+
# reject without opening a generation, so the pending generate_reply
1070+
# stays bound to the model's eventual reply and tools stay suppressed
1071+
# for the whole turn.
1072+
self._reject_tool_calls(response.tool_call.function_calls or [])
1073+
continue
1074+
10481075
if not self._current_generation or self._current_generation._done:
10491076
if (sc := response.server_content) and sc.interrupted:
10501077
# two cases an interrupted event is sent without an active generation
@@ -1163,6 +1190,7 @@ def _build_connect_config(self) -> types.LiveConnectConfig:
11631190
return conf
11641191

11651192
def _start_new_generation(self) -> None:
1193+
self._rejected_tool_calls = 0
11661194
if self._current_generation and not self._current_generation._done:
11671195
logger.warning("starting new generation while another is active. Finalizing previous.")
11681196
self._mark_current_generation_done()
@@ -1214,7 +1242,13 @@ def _start_new_generation(self) -> None:
12141242
def _handle_server_content(self, server_content: types.LiveServerContent) -> None:
12151243
current_gen = self._current_generation
12161244
if not current_gen:
1217-
logger.warning("received server content but no active generation.")
1245+
if self._rejected_tool_calls:
1246+
logger.debug(
1247+
"ignoring server content from a rejected tool call turn",
1248+
extra={"server_content": server_content.model_dump_json(exclude_none=True)},
1249+
)
1250+
else:
1251+
logger.warning("received server content but no active generation.")
12181252
return
12191253

12201254
if model_turn := server_content.model_turn:
@@ -1332,6 +1366,38 @@ def _handle_input_speech_stopped(self) -> None:
13321366
llm.InputSpeechStoppedEvent(user_transcription_enabled=False),
13331367
)
13341368

1369+
def _reject_tool_calls(self, function_calls: list[types.FunctionCall]) -> None:
1370+
if not function_calls:
1371+
return
1372+
1373+
self._rejected_tool_calls += 1
1374+
extra = {"functions": [fnc_call.name for fnc_call in function_calls]}
1375+
if self._rejected_tool_calls > MAX_TOOL_CALL_REJECTIONS:
1376+
# stop responding to break the loop; the user can still interrupt by voice
1377+
if self._rejected_tool_calls == MAX_TOOL_CALL_REJECTIONS + 1:
1378+
logger.error(
1379+
"model keeps calling tools despite tool_choice='none'; "
1380+
f"stopping after {MAX_TOOL_CALL_REJECTIONS} rejections to avoid a loop",
1381+
extra=extra,
1382+
)
1383+
return
1384+
1385+
logger.warning("rejecting tool call requested while tool_choice='none'", extra=extra)
1386+
responses = [
1387+
create_function_response(
1388+
llm.FunctionCallOutput(
1389+
name=fnc_call.name or "",
1390+
call_id=fnc_call.id or "",
1391+
output="Tool calls are disabled for this turn, respond to the user directly.",
1392+
is_error=True,
1393+
),
1394+
vertexai=self._opts.vertexai,
1395+
tool_response_scheduling=self._opts.tool_response_scheduling,
1396+
)
1397+
for fnc_call in function_calls
1398+
]
1399+
self._send_client_event(types.LiveClientToolResponse(function_responses=responses))
1400+
13351401
def _handle_tool_calls(self, tool_call: types.LiveServerToolCall) -> None:
13361402
if not self._current_generation:
13371403
logger.warning("received tool call but no active generation.")
@@ -1361,7 +1427,10 @@ def _handle_tool_call_cancellation(
13611427
def _handle_usage_metadata(self, usage_metadata: types.UsageMetadata) -> None:
13621428
current_gen = self._current_generation
13631429
if not current_gen:
1364-
logger.warning("no active generation to report metrics for")
1430+
if self._rejected_tool_calls:
1431+
logger.debug("ignoring usage metadata from a rejected tool call turn")
1432+
else:
1433+
logger.warning("no active generation to report metrics for")
13651434
return
13661435

13671436
ttft = (

livekit-plugins/livekit-plugins-google/livekit/plugins/google/utils.py

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -48,28 +48,40 @@ def create_tools_config(
4848
return gemini_tools
4949

5050

51+
def create_function_response(
52+
output: llm.FunctionCallOutput,
53+
*,
54+
vertexai: bool = False,
55+
tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN,
56+
) -> types.FunctionResponse:
57+
res = types.FunctionResponse(
58+
name=output.name,
59+
response={"error": output.output} if output.is_error else {"output": output.output},
60+
)
61+
if is_given(tool_response_scheduling):
62+
# vertexai currently doesn't support the scheduling parameter, gemini api defaults to idle
63+
# it's the user's responsibility to avoid this parameter when using vertexai
64+
res.scheduling = tool_response_scheduling
65+
if not vertexai:
66+
# vertexai does not support id in FunctionResponse
67+
# see: https://github.com/googleapis/python-genai/blob/85e00bc/google/genai/_live_converters.py#L1435
68+
res.id = output.call_id
69+
return res
70+
71+
5172
def get_tool_results_for_realtime(
5273
chat_ctx: llm.ChatContext,
5374
*,
5475
vertexai: bool = False,
5576
tool_response_scheduling: NotGivenOr[types.FunctionResponseScheduling] = NOT_GIVEN,
5677
) -> types.LiveClientToolResponse | None:
57-
function_responses: list[types.FunctionResponse] = []
58-
for msg in chat_ctx.items:
59-
if msg.type == "function_call_output":
60-
res = types.FunctionResponse(
61-
name=msg.name,
62-
response={"output": msg.output},
63-
)
64-
if is_given(tool_response_scheduling):
65-
# vertexai currently doesn't support the scheduling parameter, gemini api defaults to idle
66-
# it's the user's responsibility to avoid this parameter when using vertexai
67-
res.scheduling = tool_response_scheduling
68-
if not vertexai:
69-
# vertexai does not support id in FunctionResponse
70-
# see: https://github.com/googleapis/python-genai/blob/85e00bc/google/genai/_live_converters.py#L1435
71-
res.id = msg.call_id
72-
function_responses.append(res)
78+
function_responses = [
79+
create_function_response(
80+
msg, vertexai=vertexai, tool_response_scheduling=tool_response_scheduling
81+
)
82+
for msg in chat_ctx.items
83+
if msg.type == "function_call_output"
84+
]
7385
return (
7486
types.LiveClientToolResponse(function_responses=function_responses)
7587
if function_responses

0 commit comments

Comments
 (0)