Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions livekit-agents/livekit/agents/llm/_provider_format/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,14 @@ def to_chat_ctx(
"args": json.loads(msg.arguments or "{}"),
}
}
# Inject thought_signature if available (Gemini 3 multi-turn function calling)
if thought_signatures and (sig := thought_signatures.get(msg.call_id)):
fc_part["thought_signature"] = sig
# Gemini 2.5+ requires a thought_signature for every multi-turn
# function_call part. When a previous tool call came from another
# provider (for example through FallbackAdapter), we do not have a
# real signature, so use Google's documented validator-bypass sentinel.
if thought_signatures is not None:
fc_part["thought_signature"] = thought_signatures.get(
msg.call_id, b"skip_thought_signature_validator"
)
Comment on lines +72 to +75

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚩 Sentinel value relies on undocumented Google API behavior

The sentinel b"skip_thought_signature_validator" at livekit-agents/livekit/agents/llm/_provider_format/google.py:72 is described in the comment as "Google's documented validator-bypass sentinel." This is a critical dependency on an external API behavior — if Google removes or changes this sentinel in a future API version, all FallbackAdapter multi-turn function calls to Gemini 2.5+ would start failing. It would be worth confirming this sentinel is part of a stable, documented API contract rather than an internal implementation detail that could change without notice.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's pretty stable IMO (6+ months and counting) and I tested locally.

Comment on lines +72 to +75

@devin-ai-integration devin-ai-integration Bot Jun 18, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚩 Behavioral change: function calls without signatures now get sentinel instead of no signature

The old formatter code at livekit-agents/livekit/agents/llm/_provider_format/google.py:66-68 (base) used if thought_signatures and (sig := thought_signatures.get(msg.call_id)): — this meant empty dicts were falsy (no injection), and missing call_ids got no signature. The new code at lines 70-73 uses if thought_signatures is not None: with a sentinel default. This is a significant behavioral change: every function_call part in a Gemini 2.5+ request will now carry either a real signature or b"skip_thought_signature_validator". This is the intended fix, but it changes the wire format for all multi-turn requests, not just the FallbackAdapter case. Confirm that Google's API accepts the sentinel for function calls that previously had valid signatures in prior turns (i.e., ensure mixing real signatures and sentinels in the same request is valid).

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is good feedback and the main concern. I think, given older models were retired, this is safe.

parts.append(fc_part)
elif msg.type == "function_call_output":
response = {"output": msg.output} if not msg.is_error else {"error": msg.output}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -440,10 +440,12 @@ async def _run(self) -> None:
request_id = utils.shortuuid()

try:
# Pass thought_signatures for Gemini 2.5+ multi-turn function calling
thought_sigs = (
self._llm._thought_signatures if _requires_thought_signatures(self._model) else None
)
# Pass thought_signatures for Gemini 2.5+ multi-turn function calling.
# New LLM instances may have no stored signatures yet; use an empty
# mapping so formatter can add the Gemini 2.5+ fallback sentinel.
thought_sigs = None
if _requires_thought_signatures(self._model):
thought_sigs = getattr(self._llm, "_thought_signatures", None) or {}
turns_dict, extra_data = self._chat_ctx.to_provider_format(
format="google", thought_signatures=thought_sigs
)
Expand Down Expand Up @@ -613,6 +615,8 @@ def _parse_part(self, id: str, part: types.Part) -> llm.ChatChunk | None:
and hasattr(part, "thought_signature")
and part.thought_signature
):
if getattr(self._llm, "_thought_signatures", None) is None:
self._llm._thought_signatures = {}
self._llm._thought_signatures[tool_call.call_id] = part.thought_signature

@devin-ai-integration devin-ai-integration Bot Jun 18, 2026

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚩 Unbounded growth of _thought_signatures dict

The _thought_signatures dict on the LLM instance (livekit-plugins/livekit-plugins-google/livekit/plugins/google/llm.py:232) accumulates an entry for every function call made across the LLM's lifetime and is never pruned. For long-lived agents that make many tool calls, this dict will grow indefinitely. This is a pre-existing issue (not introduced by this PR), but worth noting since this PR increases the reliance on this dict by always consulting it for Gemini 2.5+ models. A possible improvement would be to evict entries once their corresponding chat turns are no longer in the active context.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

@igui igui Jun 18, 2026

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This finding, while true, is not related to this particular PR


chat_chunk = llm.ChatChunk(
Expand Down
40 changes: 40 additions & 0 deletions tests/test_google_thought_signatures.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest

from livekit.agents.llm import ChatContext, FunctionCall, FunctionCallOutput
from livekit.plugins.google.llm import (
_is_gemini_3_flash_model,
_is_gemini_3_model,
Expand All @@ -9,6 +10,45 @@
pytestmark = pytest.mark.unit


class TestGoogleThoughtSignatureFormatting:
def test_injects_existing_thought_signature_for_function_call(self):
ctx = ChatContext.empty()
ctx.add_message(role="user", content="hello")
ctx.insert(FunctionCall(call_id="call_1", name="tool", arguments="{}"))
ctx.insert(FunctionCallOutput(call_id="call_1", name="tool", output="ok", is_error=False))

turns, _ = ctx.to_provider_format(
format="google", thought_signatures={"call_1": b"real_signature"}
)

function_call_part = turns[1]["parts"][0]
assert function_call_part["thought_signature"] == b"real_signature"

def test_injects_skip_sentinel_for_missing_thought_signature(self):
ctx = ChatContext.empty()
ctx.add_message(role="user", content="hello")
ctx.insert(FunctionCall(call_id="call_from_openai", name="tool", arguments="{}"))
ctx.insert(
FunctionCallOutput(call_id="call_from_openai", name="tool", output="ok", is_error=False)
)

turns, _ = ctx.to_provider_format(format="google", thought_signatures={})

Comment on lines +35 to +36

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider removing the parameter, to resemble what happens in livekit-agents

Suggested change
turns, _ = ctx.to_provider_format(format="google", thought_signatures={})
turns, _ = ctx.to_provider_format(format="google")

function_call_part = turns[1]["parts"][0]
assert function_call_part["thought_signature"] == b"skip_thought_signature_validator"

def test_omits_thought_signature_when_not_required(self):
ctx = ChatContext.empty()
ctx.add_message(role="user", content="hello")
ctx.insert(FunctionCall(call_id="call_1", name="tool", arguments="{}"))
ctx.insert(FunctionCallOutput(call_id="call_1", name="tool", output="ok", is_error=False))

turns, _ = ctx.to_provider_format(format="google")

function_call_part = turns[1]["parts"][0]
assert "thought_signature" not in function_call_part


class TestGeminiModelDetection:
"""Tests for Gemini model detection helper functions."""

Expand Down
64 changes: 63 additions & 1 deletion tests/test_plugin_google_llm.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import annotations

from types import SimpleNamespace
from unittest.mock import AsyncMock, MagicMock, patch

import pytest
from google.genai import types

from livekit.agents import llm
from livekit.agents.llm import ChatContext, function_tool
from livekit.agents.llm import ChatContext, FunctionCall, FunctionCallOutput, function_tool
from livekit.agents.types import APIConnectOptions
from livekit.plugins.google.llm import LLM, LLMStream
from livekit.plugins.google.realtime.realtime_api import RealtimeModel, RealtimeSession
Expand Down Expand Up @@ -70,6 +71,19 @@ def test_empty_text_part_returns_none(self, llm_stream: LLMStream):

assert chunk is None

def test_stores_thought_signature_when_cache_is_none(self, llm_stream: LLMStream):
llm_stream._model = "gemini-2.5-flash"
llm_stream._llm._thought_signatures = None
part = SimpleNamespace(
function_call=SimpleNamespace(id="call_1", name="get_weather", args={"city": "Paris"}),
thought_signature=b"real_signature",
)

chunk = llm_stream._parse_part("test-id", part)

assert chunk is not None
assert llm_stream._llm._thought_signatures == {"call_1": b"real_signature"}


class TestCachedContentOption:
"""Verify the ``cached_content`` constructor option propagates from
Expand Down Expand Up @@ -272,6 +286,54 @@ async def test_request_merges_timeout_into_caller_http_options(self) -> None:
assert caller_http_options.timeout is None
assert caller_http_options.headers == {"X-Vertex-Test": "1"}

@pytest.mark.asyncio
async def test_gemini_25_uses_sentinel_when_thought_signature_cache_is_none(self) -> None:
llm = LLM(model="gemini-2.5-flash", api_key="test")
llm._thought_signatures = None

chat_ctx = ChatContext.empty()
chat_ctx.add_message(role="user", content="hello")
chat_ctx.insert(FunctionCall(call_id="call_from_openai", name="tool", arguments="{}"))
chat_ctx.insert(
FunctionCallOutput(call_id="call_from_openai", name="tool", output="ok", is_error=False)
)

fake, captured = self._patched_stream_capture()
with patch.object(llm._client.aio.models, "generate_content_stream", fake):
stream = llm.chat(chat_ctx=chat_ctx)
try:
async for _ in stream:
pass
finally:
await stream.aclose()

function_call_part = captured["contents"][1].parts[0]
assert function_call_part.thought_signature == b"skip_thought_signature_validator"

@pytest.mark.asyncio
async def test_pre_gemini_25_omits_thought_signature_even_with_cached_signature(self) -> None:
llm = LLM(model="gemini-2.0-flash", api_key="test")
llm._thought_signatures = {"call_1": b"real_signature"}

chat_ctx = ChatContext.empty()
chat_ctx.add_message(role="user", content="hello")
chat_ctx.insert(FunctionCall(call_id="call_1", name="tool", arguments="{}"))
chat_ctx.insert(
FunctionCallOutput(call_id="call_1", name="tool", output="ok", is_error=False)
)

fake, captured = self._patched_stream_capture()
with patch.object(llm._client.aio.models, "generate_content_stream", fake):
stream = llm.chat(chat_ctx=chat_ctx)
try:
async for _ in stream:
pass
finally:
await stream.aclose()

function_call_part = captured["contents"][1].parts[0]
assert function_call_part.thought_signature is None


class TestMediaResolution:
def test_llm_media_resolution_is_passed_to_stream_kwargs(self):
Expand Down