Skip to content

Commit 291b8d5

Browse files
committed
fix: round-trip Gemini 3 thought signatures through Vertex AI converter
Gemini 3.x models (gemini-3-flash, gemini-3.5-flash) attach a thought_signature to the first functionCall part of a tool-calling turn and require it to be replayed verbatim on the next turn, or the request fails with HTTP 400. llama-stack converts Gemini responses into the OpenAI chat-completion shape, which has no field for the signature, so it is dropped and every multi-turn tool call against a Gemini 3 model fails. Monkeypatch llama-stack's vertexai converter at app import time. Both wrappers defer entirely to the upstream originals and only smuggle the base64-encoded signature in and out through the opaque tool-call id (which llama-stack round-trips untouched and only ever compares for equality): the extract wrapper re-pairs each functionCall part with the tool call the original emitted and embeds the signature in its id; the assistant-message wrapper decodes it back onto the rebuilt Gemini part. The patch is idempotent and a no-op when the Vertex AI provider is not installed. Remove it once the fix lands upstream. Signed-off-by: Major Hayden <major@redhat.com>
1 parent ad85ff7 commit 291b8d5

3 files changed

Lines changed: 310 additions & 0 deletions

File tree

src/app/main.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,20 @@
2727
from sentry import initialize_sentry
2828
from utils.common import register_mcp_servers_async
2929
from utils.llama_stack_version import check_llama_stack_version
30+
from utils.vertexai_thought_signature import (
31+
apply_patch as apply_vertexai_thought_signature_patch,
32+
)
3033

3134
logger = get_logger(__name__)
3235

3336
logger.info("Initializing app")
3437

38+
# DOWNSTREAM PATCH: carry Gemini 3 thought signatures through llama-stack's
39+
# Vertex AI converter so multi-turn tool calls against gemini-3.x models do not
40+
# fail with HTTP 400. Applied at import time so every worker process patches
41+
# before serving requests. Remove once the fix lands upstream.
42+
apply_vertexai_thought_signature_patch()
43+
3544

3645
service_name = configuration.configuration.name
3746

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
"""Round-trip Gemini 3 thought signatures through llama-stack's Vertex AI path.
2+
3+
Gemini 3.x models (for example ``gemini-3-flash`` and ``gemini-3.5-flash``)
4+
attach a ``thought_signature`` to the first ``functionCall`` part of a
5+
tool-calling turn. The signature MUST be replayed verbatim on the following
6+
turn or Gemini rejects the request with HTTP 400.
7+
8+
llama-stack converts Gemini responses into the OpenAI chat-completion shape
9+
before they re-enter its own history, and that shape has no field for a
10+
thought signature, so the signature is dropped and every multi-turn tool call
11+
against a Gemini 3 model fails. This module monkeypatches llama-stack's
12+
``vertexai`` converter so the signature survives the round trip.
13+
14+
Strategy: both patched functions are thin wrappers around the upstream
15+
originals. We copy none of llama-stack's conversion logic; we only smuggle the
16+
signature in and out through the opaque tool-call ``id`` (which llama-stack
17+
round-trips untouched and only ever compares for equality).
18+
19+
- On the way out (Gemini -> OpenAI): ``_extract_candidate_parts`` produces a
20+
random tool-call id per ``functionCall`` part. We call the original, then
21+
re-walk the candidate's parts in the same deterministic order, pair each
22+
``functionCall`` part with the tool call the original emitted, and rewrite
23+
that tool call's id to embed the base64-encoded signature.
24+
25+
- On the way back (OpenAI -> Gemini): ``_convert_assistant_message`` builds
26+
the Gemini ``parts``. We call the original, then re-pair each
27+
``function_call`` part with its source tool call (same order) and attach the
28+
decoded signature.
29+
30+
This file shadows behaviour tied to a specific llama-stack release. Remove it
31+
once the upstream Vertex AI provider carries thought signatures natively.
32+
"""
33+
34+
import base64
35+
from typing import Any
36+
37+
from log import get_logger
38+
39+
logger = get_logger(__name__)
40+
41+
# Sentinel separating the real tool-call id from a smuggled Gemini
42+
# thought_signature. Chosen to be vanishingly unlikely in a normal id.
43+
_THOUGHT_SIG_SEP = "::gts::"
44+
45+
# Set once the patch has been applied so repeated startup calls are no-ops.
46+
_PATCH_APPLIED = False
47+
48+
49+
def _encode_thought_signature_into_id(call_id: str, signature: Any) -> str:
50+
"""Append a base64-encoded Gemini thought_signature to a tool-call id.
51+
52+
The signature is bytes; the id must stay a plain string that round-trips
53+
through llama-stack history. Returns ``call_id`` unchanged when there is no
54+
signature to carry or it cannot be encoded.
55+
"""
56+
if not signature:
57+
return call_id
58+
try:
59+
raw = signature.encode("utf-8") if isinstance(signature, str) else bytes(signature)
60+
encoded = base64.b64encode(raw).decode("ascii")
61+
except (TypeError, ValueError):
62+
return call_id
63+
return f"{call_id}{_THOUGHT_SIG_SEP}{encoded}"
64+
65+
66+
def _decode_thought_signature_from_id(call_id: str) -> bytes | None:
67+
"""Recover the thought_signature bytes smuggled into a tool-call id."""
68+
if not call_id or _THOUGHT_SIG_SEP not in call_id:
69+
return None
70+
_, _, encoded = call_id.partition(_THOUGHT_SIG_SEP)
71+
try:
72+
return base64.b64decode(encoded)
73+
except (ValueError, TypeError):
74+
return None
75+
76+
77+
def _iter_function_call_parts(candidate: Any) -> list[Any]:
78+
"""Return the ``functionCall`` parts of a Gemini candidate, in order.
79+
80+
Mirrors the iteration order llama-stack's ``_extract_candidate_parts`` uses
81+
so the parts line up one-to-one with the tool calls it produces.
82+
"""
83+
content_obj = getattr(candidate, "content", None)
84+
parts = getattr(content_obj, "parts", None) or []
85+
fc_parts: list[Any] = []
86+
for part in parts:
87+
# Thinking parts and text parts are skipped before the function-call
88+
# branch upstream; replicate that ordering precisely.
89+
if getattr(part, "thought", None):
90+
continue
91+
if getattr(part, "text", None) is not None:
92+
continue
93+
if getattr(part, "function_call", None) is not None:
94+
fc_parts.append(part)
95+
return fc_parts
96+
97+
98+
def apply_patch() -> bool:
99+
"""Monkeypatch the Vertex AI converter to carry Gemini thought signatures.
100+
101+
Idempotent. Returns ``True`` if the patch is in effect after the call,
102+
``False`` if the converter module could not be imported (for example when
103+
the Vertex AI provider is not installed), in which case nothing is changed.
104+
"""
105+
global _PATCH_APPLIED
106+
if _PATCH_APPLIED:
107+
return True
108+
109+
try:
110+
from llama_stack.providers.remote.inference.vertexai import (
111+
converters,
112+
)
113+
except ImportError:
114+
logger.info(
115+
"Vertex AI converter not importable; skipping Gemini thought-signature patch"
116+
)
117+
return False
118+
119+
original_extract = converters._extract_candidate_parts
120+
original_convert_assistant = converters._convert_assistant_message
121+
122+
def patched_extract_candidate_parts(candidate: Any) -> Any:
123+
text_parts, thinking_parts, tool_calls = original_extract(candidate)
124+
if not tool_calls:
125+
return text_parts, thinking_parts, tool_calls
126+
fc_parts = _iter_function_call_parts(candidate)
127+
# The original emits exactly one tool call per function-call part, in
128+
# the same order. Pair them and embed any signature into the id.
129+
for tool_call, part in zip(tool_calls, fc_parts):
130+
signature = getattr(part, "thought_signature", None)
131+
if not signature:
132+
continue
133+
tool_call.id = _encode_thought_signature_into_id(tool_call.id, signature)
134+
return text_parts, thinking_parts, tool_calls
135+
136+
def patched_convert_assistant_message(msg: dict[str, Any]) -> dict[str, Any] | None:
137+
result = original_convert_assistant(msg)
138+
if result is None:
139+
return None
140+
tool_calls = msg.get("tool_calls") or []
141+
if not tool_calls:
142+
return result
143+
# Re-pair each Gemini function_call part with its source tool call, in
144+
# order, and attach the decoded signature. The original appends one
145+
# function_call part per tool call after any leading text part, so we
146+
# walk the function_call parts and the tool calls together.
147+
fc_parts = [p for p in result.get("parts", []) if "function_call" in p]
148+
for part, tool_call in zip(fc_parts, tool_calls):
149+
call_id = converters._to_dict(tool_call).get("id", "")
150+
signature = _decode_thought_signature_from_id(call_id)
151+
if signature is not None:
152+
part["thought_signature"] = signature
153+
return result
154+
155+
converters._extract_candidate_parts = patched_extract_candidate_parts
156+
converters._convert_assistant_message = patched_convert_assistant_message
157+
_PATCH_APPLIED = True
158+
logger.info("Applied Gemini 3 thought-signature patch to Vertex AI converter")
159+
return True
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
"""Unit tests for the Gemini 3 thought-signature Vertex AI converter patch."""
2+
3+
from types import SimpleNamespace
4+
5+
import pytest
6+
7+
from utils import vertexai_thought_signature as patch
8+
9+
10+
@pytest.fixture(autouse=True)
11+
def _reset_patch_state(monkeypatch):
12+
"""Each test starts with the apply-once guard cleared."""
13+
monkeypatch.setattr(patch, "_PATCH_APPLIED", False)
14+
yield
15+
16+
17+
class TestEncodeDecode:
18+
"""The base64 smuggling helpers round-trip signatures through the id."""
19+
20+
def test_round_trip_bytes(self):
21+
sig = b"\x01\x02\xfe\xffsig"
22+
encoded = patch._encode_thought_signature_into_id("call_x", sig)
23+
assert encoded.startswith("call_x")
24+
assert patch._THOUGHT_SIG_SEP in encoded
25+
assert patch._decode_thought_signature_from_id(encoded) == sig
26+
27+
def test_round_trip_str_signature(self):
28+
encoded = patch._encode_thought_signature_into_id("call_x", "abc")
29+
assert patch._decode_thought_signature_from_id(encoded) == b"abc"
30+
31+
def test_no_signature_leaves_id_untouched(self):
32+
assert patch._encode_thought_signature_into_id("call_x", None) == "call_x"
33+
assert patch._encode_thought_signature_into_id("call_x", b"") == "call_x"
34+
35+
def test_plain_id_decodes_to_none(self):
36+
assert patch._decode_thought_signature_from_id("call_x") is None
37+
assert patch._decode_thought_signature_from_id("") is None
38+
39+
def test_corrupt_payload_decodes_to_none(self):
40+
corrupt = f"call_x{patch._THOUGHT_SIG_SEP}!!!not-base64!!!"
41+
assert patch._decode_thought_signature_from_id(corrupt) is None
42+
43+
44+
def _make_part(**kw):
45+
base = {"thought": None, "text": None, "function_call": None, "thought_signature": None}
46+
base.update(kw)
47+
return SimpleNamespace(**base)
48+
49+
50+
def _make_candidate(parts):
51+
return SimpleNamespace(content=SimpleNamespace(parts=parts), finish_reason=None)
52+
53+
54+
class TestApplyPatch:
55+
"""apply_patch wires the wrappers onto the real converter and is idempotent."""
56+
57+
def test_apply_is_idempotent(self):
58+
converters = pytest.importorskip(
59+
"llama_stack.providers.remote.inference.vertexai.converters"
60+
)
61+
assert patch.apply_patch() is True
62+
first = converters._extract_candidate_parts
63+
assert patch.apply_patch() is True
64+
assert converters._extract_candidate_parts is first
65+
66+
def test_apply_missing_provider_returns_false(self, monkeypatch):
67+
import builtins
68+
69+
real_import = builtins.__import__
70+
71+
def fake_import(name, *args, **kwargs):
72+
if name.startswith("llama_stack.providers.remote.inference.vertexai"):
73+
raise ImportError("provider not installed")
74+
return real_import(name, *args, **kwargs)
75+
76+
monkeypatch.setattr(builtins, "__import__", fake_import)
77+
assert patch.apply_patch() is False
78+
79+
def test_extract_embeds_signature_into_tool_call_id(self):
80+
converters = pytest.importorskip(
81+
"llama_stack.providers.remote.inference.vertexai.converters"
82+
)
83+
patch.apply_patch()
84+
sig = b"\x10\x20signature"
85+
fc = SimpleNamespace(name="search_portal", args={"q": "selinux"})
86+
cand = _make_candidate(
87+
[
88+
_make_part(text="thinking"),
89+
_make_part(function_call=fc, thought_signature=sig),
90+
]
91+
)
92+
_text, _thinking, tool_calls = converters._extract_candidate_parts(cand)
93+
assert len(tool_calls) == 1
94+
# name stays clean, signature rides on the id
95+
assert tool_calls[0].function.name == "search_portal"
96+
assert patch._decode_thought_signature_from_id(tool_calls[0].id) == sig
97+
98+
def test_round_trip_through_assistant_message(self):
99+
converters = pytest.importorskip(
100+
"llama_stack.providers.remote.inference.vertexai.converters"
101+
)
102+
patch.apply_patch()
103+
sig = b"round-trip-bytes"
104+
fc = SimpleNamespace(name="get_document", args={"id": "1"})
105+
cand = _make_candidate([_make_part(function_call=fc, thought_signature=sig)])
106+
_t, _th, tool_calls = converters._extract_candidate_parts(cand)
107+
108+
msg = {
109+
"role": "assistant",
110+
"content": None,
111+
"tool_calls": [
112+
{
113+
"id": tool_calls[0].id,
114+
"type": "function",
115+
"function": {"name": "get_document", "arguments": '{"id":"1"}'},
116+
}
117+
],
118+
}
119+
out = converters._convert_assistant_message(msg)
120+
fc_parts = [p for p in out["parts"] if "function_call" in p]
121+
assert len(fc_parts) == 1
122+
assert fc_parts[0]["thought_signature"] == sig
123+
124+
def test_tool_call_without_signature_stays_clean(self):
125+
converters = pytest.importorskip(
126+
"llama_stack.providers.remote.inference.vertexai.converters"
127+
)
128+
patch.apply_patch()
129+
msg = {
130+
"role": "assistant",
131+
"content": None,
132+
"tool_calls": [
133+
{
134+
"id": "call_plain",
135+
"type": "function",
136+
"function": {"name": "x", "arguments": "{}"},
137+
}
138+
],
139+
}
140+
out = converters._convert_assistant_message(msg)
141+
fc_part = next(p for p in out["parts"] if "function_call" in p)
142+
assert "thought_signature" not in fc_part

0 commit comments

Comments
 (0)