livekit · nightcityblade · Jun 17, 2026 · Jun 18, 2026 · Jun 18, 2026 · Jun 22, 2026
@@ -65,9 +65,14 @@ def to_chat_ctx(
                     "args": json.loads(msg.arguments or "{}"),
                 }
             }
-            # Inject thought_signature if available (Gemini 3 multi-turn function calling)
-            if thought_signatures and (sig := thought_signatures.get(msg.call_id)):
-                fc_part["thought_signature"] = sig
+            # Gemini 2.5+ requires a thought_signature for every multi-turn
+            # function_call part. When a previous tool call came from another
+            # provider (for example through FallbackAdapter), we do not have a
+            # real signature, so use Google's documented validator-bypass sentinel.
+            if thought_signatures is not None:
+                fc_part["thought_signature"] = thought_signatures.get(
+                    msg.call_id, b"skip_thought_signature_validator"
+                )
             parts.append(fc_part)
         elif msg.type == "function_call_output":
             response = {"output": msg.output} if not msg.is_error else {"error": msg.output}

@@ -440,10 +440,12 @@ async def _run(self) -> None:
         request_id = utils.shortuuid()
 
         try:
-            # Pass thought_signatures for Gemini 2.5+ multi-turn function calling
-            thought_sigs = (
-                self._llm._thought_signatures if _requires_thought_signatures(self._model) else None
-            )
+            # Pass thought_signatures for Gemini 2.5+ multi-turn function calling.
+            # New LLM instances may have no stored signatures yet; use an empty
+            # mapping so formatter can add the Gemini 2.5+ fallback sentinel.
+            thought_sigs = None
+            if _requires_thought_signatures(self._model):
+                thought_sigs = getattr(self._llm, "_thought_signatures", None) or {}
             turns_dict, extra_data = self._chat_ctx.to_provider_format(
                 format="google", thought_signatures=thought_sigs
             )
@@ -613,6 +615,8 @@ def _parse_part(self, id: str, part: types.Part) -> llm.ChatChunk | None:
                 and hasattr(part, "thought_signature")
                 and part.thought_signature
             ):
+                if getattr(self._llm, "_thought_signatures", None) is None:
+                    self._llm._thought_signatures = {}
                 self._llm._thought_signatures[tool_call.call_id] = part.thought_signature
 
             chat_chunk = llm.ChatChunk(

@@ -1,5 +1,6 @@
 import pytest
 
+from livekit.agents.llm import ChatContext, FunctionCall, FunctionCallOutput
 from livekit.plugins.google.llm import (
     _is_gemini_3_flash_model,
     _is_gemini_3_model,
@@ -9,6 +10,45 @@
 pytestmark = pytest.mark.unit
 
 
+class TestGoogleThoughtSignatureFormatting:
+    def test_injects_existing_thought_signature_for_function_call(self):
+        ctx = ChatContext.empty()
+        ctx.add_message(role="user", content="hello")
+        ctx.insert(FunctionCall(call_id="call_1", name="tool", arguments="{}"))
+        ctx.insert(FunctionCallOutput(call_id="call_1", name="tool", output="ok", is_error=False))
+
+        turns, _ = ctx.to_provider_format(
+            format="google", thought_signatures={"call_1": b"real_signature"}
+        )
+
+        function_call_part = turns[1]["parts"][0]
+        assert function_call_part["thought_signature"] == b"real_signature"
+
+    def test_injects_skip_sentinel_for_missing_thought_signature(self):
+        ctx = ChatContext.empty()
+        ctx.add_message(role="user", content="hello")
+        ctx.insert(FunctionCall(call_id="call_from_openai", name="tool", arguments="{}"))
+        ctx.insert(
+            FunctionCallOutput(call_id="call_from_openai", name="tool", output="ok", is_error=False)
+        )
+
+        turns, _ = ctx.to_provider_format(format="google", thought_signatures={})
+
-        turns, _ = ctx.to_provider_format(format="google", thought_signatures={})
+        turns, _ = ctx.to_provider_format(format="google")
+
-        turns, _ = ctx.to_provider_format(format="google", thought_signatures={})
+        turns, _ = ctx.to_provider_format(format="google")
+
+        function_call_part = turns[1]["parts"][0]
+        assert function_call_part["thought_signature"] == b"skip_thought_signature_validator"
+
+    def test_omits_thought_signature_when_not_required(self):
+        ctx = ChatContext.empty()
+        ctx.add_message(role="user", content="hello")
+        ctx.insert(FunctionCall(call_id="call_1", name="tool", arguments="{}"))
+        ctx.insert(FunctionCallOutput(call_id="call_1", name="tool", output="ok", is_error=False))
+
+        turns, _ = ctx.to_provider_format(format="google")
+
+        function_call_part = turns[1]["parts"][0]
+        assert "thought_signature" not in function_call_part
+
+
 class TestGeminiModelDetection:
     """Tests for Gemini model detection helper functions."""
 

@@ -1,12 +1,13 @@
 from __future__ import annotations
 
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 from google.genai import types
 
 from livekit.agents import llm
-from livekit.agents.llm import ChatContext, function_tool
+from livekit.agents.llm import ChatContext, FunctionCall, FunctionCallOutput, function_tool
 from livekit.agents.types import APIConnectOptions
 from livekit.plugins.google.llm import LLM, LLMStream
 from livekit.plugins.google.realtime.realtime_api import RealtimeModel, RealtimeSession
@@ -70,6 +71,19 @@ def test_empty_text_part_returns_none(self, llm_stream: LLMStream):
 
         assert chunk is None
 
+    def test_stores_thought_signature_when_cache_is_none(self, llm_stream: LLMStream):
+        llm_stream._model = "gemini-2.5-flash"
+        llm_stream._llm._thought_signatures = None
+        part = SimpleNamespace(
+            function_call=SimpleNamespace(id="call_1", name="get_weather", args={"city": "Paris"}),
+            thought_signature=b"real_signature",
+        )
+
+        chunk = llm_stream._parse_part("test-id", part)
+
+        assert chunk is not None
+        assert llm_stream._llm._thought_signatures == {"call_1": b"real_signature"}
+
 
 class TestCachedContentOption:
     """Verify the ``cached_content`` constructor option propagates from
@@ -272,6 +286,54 @@ async def test_request_merges_timeout_into_caller_http_options(self) -> None:
         assert caller_http_options.timeout is None
         assert caller_http_options.headers == {"X-Vertex-Test": "1"}
 
+    @pytest.mark.asyncio
+    async def test_gemini_25_uses_sentinel_when_thought_signature_cache_is_none(self) -> None:
+        llm = LLM(model="gemini-2.5-flash", api_key="test")
+        llm._thought_signatures = None
+
+        chat_ctx = ChatContext.empty()
+        chat_ctx.add_message(role="user", content="hello")
+        chat_ctx.insert(FunctionCall(call_id="call_from_openai", name="tool", arguments="{}"))
+        chat_ctx.insert(
+            FunctionCallOutput(call_id="call_from_openai", name="tool", output="ok", is_error=False)
+        )
+
+        fake, captured = self._patched_stream_capture()
+        with patch.object(llm._client.aio.models, "generate_content_stream", fake):
+            stream = llm.chat(chat_ctx=chat_ctx)
+            try:
+                async for _ in stream:
+                    pass
+            finally:
+                await stream.aclose()
+
+        function_call_part = captured["contents"][1].parts[0]
+        assert function_call_part.thought_signature == b"skip_thought_signature_validator"
+
+    @pytest.mark.asyncio
+    async def test_pre_gemini_25_omits_thought_signature_even_with_cached_signature(self) -> None:
+        llm = LLM(model="gemini-2.0-flash", api_key="test")
+        llm._thought_signatures = {"call_1": b"real_signature"}
+
+        chat_ctx = ChatContext.empty()
+        chat_ctx.add_message(role="user", content="hello")
+        chat_ctx.insert(FunctionCall(call_id="call_1", name="tool", arguments="{}"))
+        chat_ctx.insert(
+            FunctionCallOutput(call_id="call_1", name="tool", output="ok", is_error=False)
+        )
+
+        fake, captured = self._patched_stream_capture()
+        with patch.object(llm._client.aio.models, "generate_content_stream", fake):
+            stream = llm.chat(chat_ctx=chat_ctx)
+            try:
+                async for _ in stream:
+                    pass
+            finally:
+                await stream.aclose()
+
+        function_call_part = captured["contents"][1].parts[0]
+        assert function_call_part.thought_signature is None
+
 
 class TestMediaResolution:
     def test_llm_media_resolution_is_passed_to_stream_kwargs(self):