@@ -330,6 +330,30 @@ def _get_provider_from_model(model: str) -> str:
330330 return ""
331331
332332
333+ # Providers that can route to Anthropic. bedrock and vertex_ai are multi-model
334+ # platforms, so _is_anthropic_route also checks the model name for them.
335+ _ANTHROPIC_PROVIDERS = frozenset ({"anthropic" , "bedrock" , "vertex_ai" })
336+
337+
338+ def _is_anthropic_provider (provider : str ) -> bool :
339+ """Returns True if the provider can route to an Anthropic model endpoint."""
340+ return provider .lower () in _ANTHROPIC_PROVIDERS if provider else False
341+
342+
343+ def _is_anthropic_route (provider : str , model : str ) -> bool :
344+ """Returns True only when requests actually reach an Anthropic Claude model.
345+
346+ bedrock and vertex_ai also host non-Anthropic models (Llama, Gemini), so for
347+ those platforms the model name must identify a Claude model too. Formatting
348+ thinking blocks for a non-Claude model triggers API validation (400) errors.
349+ """
350+ if not _is_anthropic_provider (provider ):
351+ return False
352+ if provider .lower () in ("bedrock" , "vertex_ai" ):
353+ return _is_anthropic_model (model )
354+ return True
355+
356+
333357def _infer_mime_type_from_uri (uri : str ) -> Optional [str ]:
334358 """Attempts to infer MIME type from a URI's path extension.
335359
@@ -491,42 +515,48 @@ def _iter_reasoning_texts(reasoning_value: Any) -> Iterable[str]:
491515
492516
493517def _is_thinking_blocks_format (reasoning_value : Any ) -> bool :
494- """Returns True if reasoning_value is thinking_blocks format.
518+ """Returns True if reasoning_value is Anthropic thinking_blocks format.
495519
496- Anthropic blocks carry a 'signature'; Gemini blocks carry 'thinking'/'type'
497- without one. Match either so Gemini thought text is not dropped .
520+ Anthropic thinking_blocks is a list of dicts, each with 'type', 'thinking',
521+ and 'signature' keys .
498522 """
499523 if not isinstance (reasoning_value , list ) or not reasoning_value :
500524 return False
501525 first = reasoning_value [0 ]
502- return isinstance (first , dict ) and (
503- "thinking" in first or "signature" in first
504- )
526+ return isinstance (first , dict ) and "signature" in first
505527
506528
507529def _convert_reasoning_value_to_parts (reasoning_value : Any ) -> List [types .Part ]:
508530 """Converts provider reasoning payloads into Gemini thought parts.
509531
510- Handles Anthropic thinking_blocks (list of dicts with type/thinking/signature)
511- by preserving the signature on each part's thought_signature field. This is
512- required for Anthropic to maintain thinking across tool call boundaries.
532+ Handles two formats:
533+ - Anthropic thinking_blocks with 'thinking' and optional 'signature' fields.
534+ - A plain string or nested structure (OpenAI/Azure/Ollama) via
535+ _iter_reasoning_texts.
513536 """
514- if _is_thinking_blocks_format (reasoning_value ):
537+ if isinstance (reasoning_value , list ):
515538 parts : List [types .Part ] = []
516539 for block in reasoning_value :
517- if not isinstance (block , dict ):
518- continue
519- block_type = block .get ("type" , "" )
520- if block_type == "redacted" :
521- continue
522- thinking_text = block .get ("thinking" , "" )
523- signature = block .get ("signature" , "" )
524- if not thinking_text and not signature :
525- continue
526- part = types .Part (text = thinking_text , thought = True )
527- if signature :
528- part .thought_signature = signature .encode ("utf-8" )
529- parts .append (part )
540+ if isinstance (block , dict ):
541+ block_type = block .get ("type" , "" )
542+ if block_type == "redacted" :
543+ continue
544+ if block_type == "thinking" :
545+ thinking_text = block .get ("thinking" , "" )
546+ if thinking_text :
547+ part = types .Part (text = thinking_text , thought = True )
548+ signature = block .get ("signature" )
549+ if signature :
550+ decoded_signature = _decode_thought_signature (signature )
551+ part .thought_signature = decoded_signature or str (
552+ signature
553+ ).encode ("utf-8" )
554+ parts .append (part )
555+ continue
556+ # Fall back to text extraction for non-thinking-block items.
557+ for text in _iter_reasoning_texts (block ):
558+ if text :
559+ parts .append (types .Part (text = text , thought = True ))
530560 return parts
531561 return [
532562 types .Part (text = text , thought = True )
@@ -538,16 +568,16 @@ def _convert_reasoning_value_to_parts(reasoning_value: Any) -> List[types.Part]:
538568def _extract_reasoning_value (message : Message | Delta | None ) -> Any :
539569 """Fetches the reasoning payload from a LiteLLM message.
540570
541- Checks for 'thinking_blocks' (Anthropic structured format with signatures),
542- 'reasoning_content' (LiteLLM standard, used by Azure/Foundry, Ollama via
543- LiteLLM) and 'reasoning' (used by LM Studio, vLLM).
544- Prioritizes 'thinking_blocks' when present (Anthropic models), then
545- 'reasoning_content', then 'reasoning' .
571+ Checks for 'thinking_blocks' (Anthropic thinking with signatures),
572+ 'reasoning_content' (LiteLLM standard, used by Azure/Foundry,
573+ Ollama via LiteLLM), and 'reasoning' (used by LM Studio, vLLM).
574+ Prioritizes 'thinking_blocks' when the key is present, as they contain
575+ the signature required for Anthropic's extended thinking API .
546576 """
547577 if message is None :
548578 return None
549- # Anthropic models return thinking_blocks with type/thinking/signature fields.
550- # This must be preserved to maintain thinking across tool call boundaries .
579+ # Prefer thinking_blocks (Anthropic) — they carry per-block signatures
580+ # needed for multi-turn conversations with extended thinking.
551581 thinking_blocks = message .get ("thinking_blocks" )
552582 if thinking_blocks is not None :
553583 return thinking_blocks
@@ -999,7 +1029,7 @@ async def _content_to_message_param(
9991029 if part .text and part .thought_signature :
10001030 sig = part .thought_signature
10011031 if isinstance (sig , bytes ):
1002- sig = sig .decode ("utf-8" )
1032+ sig = base64 . b64encode ( sig ) .decode ("utf-8" )
10031033 thinking_blocks .append ({
10041034 "type" : "thinking" ,
10051035 "thinking" : part .text ,
@@ -1026,6 +1056,34 @@ async def _content_to_message_param(
10261056 ):
10271057 reasoning_texts .append (_decode_inline_text_data (part .inline_data .data ))
10281058
1059+ # Anthropic routes require thinking blocks to be embedded directly in the
1060+ # message content list. LiteLLM's prompt template for Anthropic drops the
1061+ # top-level reasoning_content field, so thinking blocks disappear from
1062+ # multi-turn histories and the model stops producing them after the first
1063+ # turn. Signatures are required by the Anthropic API for thinking blocks in
1064+ # multi-turn conversations. On multi-model platforms (bedrock, vertex_ai)
1065+ # this must only apply to actual Claude models, not Gemini/Llama/etc.
1066+ if reasoning_parts and _is_anthropic_route (provider , model ):
1067+ content_list = []
1068+ for part in reasoning_parts :
1069+ if part .text :
1070+ block = {"type" : "thinking" , "thinking" : part .text }
1071+ if part .thought_signature :
1072+ sig = part .thought_signature
1073+ if isinstance (sig , bytes ):
1074+ sig = base64 .b64encode (sig ).decode ("utf-8" )
1075+ block ["signature" ] = sig
1076+ content_list .append (block )
1077+ if isinstance (final_content , list ):
1078+ content_list .extend (final_content )
1079+ elif final_content :
1080+ content_list .append ({"type" : "text" , "text" : final_content })
1081+ return ChatCompletionAssistantMessage (
1082+ role = role ,
1083+ content = content_list or None ,
1084+ tool_calls = tool_calls or None ,
1085+ )
1086+
10291087 # Preserve reasoning deltas exactly as received. Injecting separators
10301088 # between fragments can corrupt provider-streamed thinking text.
10311089 reasoning_content = "" .join (text for text in reasoning_texts if text )
0 commit comments