fix: preserve Anthropic thinking blocks and signatures in LiteLLM round-trip

giulio-leone · giulio-leone · commit cb407cc9b01b · 2026-04-09T01:31:36.000+02:00
When using Claude models through LiteLLM, extended thinking blocks (with signatures) were lost after the first turn because: 1. _extract_reasoning_value() only read reasoning_content (flattened string without signatures), ignoring thinking_blocks 2. _content_to_message_param() set reasoning_content on the outgoing message, which LiteLLM's anthropic_messages_pt() template silently drops This fix: - Adds _is_anthropic_provider() helper to detect anthropic/bedrock/ vertex_ai providers - Updates _extract_reasoning_value() to prefer thinking_blocks (with per-block signatures) over reasoning_content - Updates _convert_reasoning_value_to_parts() to handle ChatCompletionThinkingBlock dicts, preserving thought_signature - Updates _content_to_message_param() to embed thinking blocks directly in the message content list for Anthropic providers, bypassing the broken reasoning_content path Fixes #4801
diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py
@@ -233,6 +233,16 @@ def _get_provider_from_model(model: str) -> str:
   return ""
 
 
+# Providers that route to Anthropic's API and require thinking blocks
+# embedded directly in the message content list.
+_ANTHROPIC_PROVIDERS = frozenset({"anthropic", "bedrock", "vertex_ai"})
+
+
+def _is_anthropic_provider(provider: str) -> bool:
+  """Returns True if the provider routes to an Anthropic model endpoint."""
+  return provider.lower() in _ANTHROPIC_PROVIDERS if provider else False
+
+
 # Default MIME type when none can be inferred
 _DEFAULT_MIME_TYPE = "application/octet-stream"
 
@@ -399,26 +409,34 @@ def _is_thinking_blocks_format(reasoning_value: Any) -> bool:
 def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
   """Converts provider reasoning payloads into Gemini thought parts.
 
-  Handles Anthropic thinking_blocks (list of dicts with type/thinking/signature)
-  by preserving the signature on each part's thought_signature field. This is
-  required for Anthropic to maintain thinking across tool call boundaries.
+  Handles two formats:
+  - Anthropic thinking_blocks with 'thinking' and optional 'signature' fields.
+  - A plain string or nested structure (OpenAI/Azure/Ollama) via
+    _iter_reasoning_texts.
   """
-  if _is_thinking_blocks_format(reasoning_value):
+  if isinstance(reasoning_value, list):
     parts: List[types.Part] = []
     for block in reasoning_value:
-      if not isinstance(block, dict):
-        continue
-      block_type = block.get("type", "")
-      if block_type == "redacted":
-        continue
-      thinking_text = block.get("thinking", "")
-      signature = block.get("signature", "")
-      if not thinking_text:
-        continue
-      part = types.Part(text=thinking_text, thought=True)
-      if signature:
-        part.thought_signature = signature.encode("utf-8")
-      parts.append(part)
+      if isinstance(block, dict):
+        block_type = block.get("type", "")
+        if block_type == "redacted":
+          continue
+        if block_type == "thinking":
+          thinking_text = block.get("thinking", "")
+          if thinking_text:
+            part = types.Part(text=thinking_text, thought=True)
+            signature = block.get("signature")
+            if signature:
+              decoded_signature = _decode_thought_signature(signature)
+              part.thought_signature = (
+                  decoded_signature or str(signature).encode("utf-8")
+              )
+            parts.append(part)
+          continue
+      # Fall back to text extraction for non-thinking-block items.
+      for text in _iter_reasoning_texts(block):
+        if text:
+          parts.append(types.Part(text=text, thought=True))
     return parts
   return [
       types.Part(text=text, thought=True)
@@ -430,16 +448,16 @@ def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
 def _extract_reasoning_value(message: Message | Delta | None) -> Any:
   """Fetches the reasoning payload from a LiteLLM message.
 
-  Checks for 'thinking_blocks' (Anthropic structured format with signatures),
-  'reasoning_content' (LiteLLM standard, used by Azure/Foundry, Ollama via
-  LiteLLM) and 'reasoning' (used by LM Studio, vLLM).
-  Prioritizes 'thinking_blocks' when present (Anthropic models), then
-  'reasoning_content', then 'reasoning'.
+  Checks for 'thinking_blocks' (Anthropic thinking with signatures),
+  'reasoning_content' (LiteLLM standard, used by Azure/Foundry,
+  Ollama via LiteLLM), and 'reasoning' (used by LM Studio, vLLM).
+  Prioritizes 'thinking_blocks' when the key is present, as they contain
+  the signature required for Anthropic's extended thinking API.
   """
   if message is None:
     return None
-  # Anthropic models return thinking_blocks with type/thinking/signature fields.
-  # This must be preserved to maintain thinking across tool call boundaries.
+  # Prefer thinking_blocks (Anthropic) — they carry per-block signatures
+  # needed for multi-turn conversations with extended thinking.
   thinking_blocks = message.get("thinking_blocks")
   if thinking_blocks is not None:
     return thinking_blocks
@@ -912,6 +930,33 @@ async def _content_to_message_param(
       ):
         reasoning_texts.append(_decode_inline_text_data(part.inline_data.data))
 
+    # Anthropic/Bedrock providers require thinking blocks to be embedded
+    # directly in the message content list. LiteLLM's prompt template for
+    # Anthropic drops the top-level reasoning_content field, so thinking
+    # blocks disappear from multi-turn histories and the model stops
+    # producing them after the first turn. Signatures are required by the
+    # Anthropic API for thinking blocks in multi-turn conversations.
+    if reasoning_parts and _is_anthropic_provider(provider):
+      content_list = []
+      for part in reasoning_parts:
+        if part.text:
+          block = {"type": "thinking", "thinking": part.text}
+          if part.thought_signature:
+            sig = part.thought_signature
+            if isinstance(sig, bytes):
+              sig = base64.b64encode(sig).decode("utf-8")
+            block["signature"] = sig
+          content_list.append(block)
+      if isinstance(final_content, list):
+        content_list.extend(final_content)
+      elif final_content:
+        content_list.append({"type": "text", "text": final_content})
+      return ChatCompletionAssistantMessage(
+          role=role,
+          content=content_list or None,
+          tool_calls=tool_calls or None,
+      )
+
     reasoning_content = _NEW_LINE.join(text for text in reasoning_texts if text)
     return ChatCompletionAssistantMessage(
         role=role,
diff --git a/tests/unittests/models/test_litellm.py b/tests/unittests/models/test_litellm.py
@@ -38,6 +38,7 @@
 from google.adk.models.lite_llm import _get_completion_inputs
 from google.adk.models.lite_llm import _get_content
 from google.adk.models.lite_llm import _get_provider_from_model
+from google.adk.models.lite_llm import _is_anthropic_provider
 from google.adk.models.lite_llm import _is_anthropic_model
 from google.adk.models.lite_llm import _message_to_generate_content_response
 from google.adk.models.lite_llm import _MISSING_TOOL_RESULT_MESSAGE
@@ -4689,6 +4690,17 @@ def test_handles_litellm_logger_names(logger_name):
 # ── Anthropic thinking_blocks tests ─────────────────────────────
 
 
+def test_is_anthropic_provider():
+  """Verify _is_anthropic_provider matches known Claude provider prefixes."""
+  assert _is_anthropic_provider("anthropic")
+  assert _is_anthropic_provider("bedrock")
+  assert _is_anthropic_provider("vertex_ai")
+  assert _is_anthropic_provider("ANTHROPIC")  # case-insensitive
+  assert not _is_anthropic_provider("openai")
+  assert not _is_anthropic_provider("")
+  assert not _is_anthropic_provider(None)
+
+
 @pytest.mark.parametrize(
     "model_string,expected",
     [
@@ -4723,9 +4735,10 @@ def test_is_anthropic_model(model_string, expected):
 
 
 def test_extract_reasoning_value_prefers_thinking_blocks():
-  """thinking_blocks takes precedence over reasoning_content."""
+  """thinking_blocks (Anthropic format with signatures) take priority."""
   thinking_blocks = [
-      {"type": "thinking", "thinking": "deep thought", "signature": "sig123"},
+      {"type": "thinking", "thinking": "step 1", "signature": "c2lnX2E="},
+      {"type": "thinking", "thinking": "step 2", "signature": "c2lnX2I="},
   ]
   message = {
       "role": "assistant",
@@ -4748,25 +4761,36 @@ def test_extract_reasoning_value_falls_back_without_thinking_blocks():
   assert result == "flat reasoning"
 
 
-def test_convert_reasoning_value_to_parts_thinking_blocks_preserves_signature():
-  """thinking_blocks format produces parts with thought_signature."""
+def test_convert_reasoning_value_to_parts_preserves_base64_signature():
+  """Base64 signatures are decoded to raw bytes on thought parts."""
   thinking_blocks = [
-      {"type": "thinking", "thinking": "step 1", "signature": "sig_abc"},
-      {"type": "thinking", "thinking": "step 2", "signature": "sig_def"},
+      {"type": "thinking", "thinking": "step 1", "signature": "c2lnX2E="},
+      {"type": "thinking", "thinking": "step 2", "signature": "c2lnX2I="},
   ]
   parts = _convert_reasoning_value_to_parts(thinking_blocks)
   assert len(parts) == 2
   assert parts[0].text == "step 1"
   assert parts[0].thought is True
-  assert parts[0].thought_signature == b"sig_abc"
+  assert parts[0].thought_signature == b"sig_a"
   assert parts[1].text == "step 2"
-  assert parts[1].thought_signature == b"sig_def"
+  assert parts[1].thought_signature == b"sig_b"
+
+
+def test_convert_reasoning_value_to_parts_raw_signature_falls_back_to_utf8():
+  """Non-base64 signatures are preserved as utf-8 bytes."""
+  thinking_blocks = [
+      {"type": "thinking", "thinking": "step 1", "signature": "sig_raw"},
+  ]
+  parts = _convert_reasoning_value_to_parts(thinking_blocks)
+  assert len(parts) == 1
+  assert parts[0].text == "step 1"
+  assert parts[0].thought_signature == b"sig_raw"
 
 
 def test_convert_reasoning_value_to_parts_skips_redacted_blocks():
   """Redacted thinking blocks are excluded from parts."""
   thinking_blocks = [
-      {"type": "thinking", "thinking": "visible", "signature": "sig1"},
+      {"type": "thinking", "thinking": "visible", "signature": "c2lnMQ=="},
       {"type": "redacted", "data": "hidden"},
   ]
   parts = _convert_reasoning_value_to_parts(thinking_blocks)
@@ -4777,8 +4801,8 @@ def test_convert_reasoning_value_to_parts_skips_redacted_blocks():
 def test_convert_reasoning_value_to_parts_skips_empty_thinking():
   """Blocks with empty thinking text are excluded."""
   thinking_blocks = [
-      {"type": "thinking", "thinking": "", "signature": "sig1"},
-      {"type": "thinking", "thinking": "real thought", "signature": "sig2"},
+      {"type": "thinking", "thinking": "", "signature": "c2lnMQ=="},
+      {"type": "thinking", "thinking": "real thought", "signature": "c2lnMg=="},
   ]
   parts = _convert_reasoning_value_to_parts(thinking_blocks)
   assert len(parts) == 1
@@ -4812,13 +4836,14 @@ async def test_content_to_message_param_anthropic_outputs_thinking_blocks():
       content, model="anthropic/claude-4-sonnet"
   )
   assert result["role"] == "assistant"
-  assert "thinking_blocks" in result
+  assert result["thinking_blocks"] == [
+      {
+          "type": "thinking",
+          "thinking": "deep thought",
+          "signature": "sig_round_trip",
+      }
+  ]
   assert result.get("reasoning_content") is None
-  blocks = result["thinking_blocks"]
-  assert len(blocks) == 1
-  assert blocks[0]["type"] == "thinking"
-  assert blocks[0]["thinking"] == "deep thought"
-  assert blocks[0]["signature"] == "sig_round_trip"
   assert result["content"] == "Hello!"
 
 
@@ -4839,43 +4864,45 @@ async def test_content_to_message_param_non_anthropic_uses_reasoning_content():
 
 
 @pytest.mark.asyncio
-async def test_anthropic_thinking_blocks_round_trip():
-  """End-to-end: thinking_blocks in response → Part → thinking_blocks out."""
-  # Simulate LiteLLM response with thinking_blocks
+async def test_anthropic_provider_thinking_blocks_round_trip():
+  """End-to-end: thinking_blocks in response stay intact for Anthropic provider."""
   response_message = {
       "role": "assistant",
       "content": "Final answer",
       "thinking_blocks": [
           {
               "type": "thinking",
               "thinking": "Let me reason...",
-              "signature": "abc123signature",
+              "signature": "c2lnX2E=",
           },
       ],
   }
 
-  # Step 1: Extract reasoning value
   reasoning_value = _extract_reasoning_value(response_message)
   assert isinstance(reasoning_value, list)
 
-  # Step 2: Convert to parts (preserves signature)
   parts = _convert_reasoning_value_to_parts(reasoning_value)
   assert len(parts) == 1
-  assert parts[0].thought_signature == b"abc123signature"
+  assert parts[0].thought_signature == b"sig_a"
 
-  # Step 3: Build Content for history
-  all_parts = parts + [types.Part(text="Final answer")]
+  all_parts = parts + [
+      types.Part(text="Final answer"),
+      types.Part.from_function_call(name="add", args={"a": 1, "b": 2}),
+  ]
   content = types.Content(role="model", parts=all_parts)
 
-  # Step 4: Convert back to message param for Anthropic
-  result = await _content_to_message_param(
-      content, model="anthropic/claude-4-sonnet"
-  )
-  blocks = result["thinking_blocks"]
-  assert len(blocks) == 1
-  assert blocks[0]["type"] == "thinking"
-  assert blocks[0]["thinking"] == "Let me reason..."
-  assert blocks[0]["signature"] == "abc123signature"
+  msg = await _content_to_message_param(content, provider="anthropic")
+  assert isinstance(msg["content"], list)
+  assert msg["content"][0] == {
+      "type": "thinking",
+      "thinking": "Let me reason...",
+      "signature": "c2lnX2E=",
+  }
+  assert msg["content"][1] == {"type": "text", "text": "Final answer"}
+  assert msg["tool_calls"] is not None
+  assert len(msg["tool_calls"]) == 1
+  assert msg["tool_calls"][0]["function"]["name"] == "add"
+  assert msg.get("reasoning_content") is None
 
 
 @pytest.mark.asyncio
@@ -4891,6 +4918,5 @@ async def test_content_to_message_param_anthropic_no_signature_falls_back():
   result = await _content_to_message_param(
       content, model="anthropic/claude-4-sonnet"
   )
-  # Falls back to reasoning_content when no signatures present
   assert result.get("reasoning_content") == "thinking without sig"
   assert "thinking_blocks" not in result