@@ -62,6 +62,62 @@ class _ToolUseAccumulator:
6262 args_json : str
6363
6464
65+ @dataclasses .dataclass
66+ class _ThinkingAccumulator :
67+ """Accumulates streamed thinking content block data."""
68+
69+ thinking : str
70+ signature : str
71+
72+
73+ def _build_anthropic_thinking_param (
74+ config : Optional [types .GenerateContentConfig ],
75+ ) -> Union [
76+ anthropic_types .ThinkingConfigEnabledParam ,
77+ anthropic_types .ThinkingConfigDisabledParam ,
78+ NotGiven ,
79+ ]:
80+ """Maps genai ThinkingConfig to Anthropic's thinking parameter.
81+
82+ Per ``google.genai.types.ThinkingConfig``, ``thinking_budget`` semantics are:
83+ * ``None``: not specified; the genai default is model-dependent. Anthropic
84+ requires an explicit ``budget_tokens`` whenever thinking is enabled, so
85+ we surface this as a ``ValueError`` to keep the developer's intent
86+ explicit (mirroring the Anthropic API).
87+ * ``0``: thinking is DISABLED.
88+ * ``-1``: AUTOMATIC; not supported by Anthropic models.
89+ * positive int: budget in tokens (Anthropic requires ``>= 1024`` and
90+ ``< max_tokens``; validation is delegated to the Anthropic API so the
91+ caller gets the canonical error message).
92+ """
93+ if not config or not config .thinking_config :
94+ return NOT_GIVEN
95+
96+ thinking_budget = config .thinking_config .thinking_budget
97+
98+ if thinking_budget is None :
99+ raise ValueError (
100+ "thinking_budget must be set explicitly when ThinkingConfig is"
101+ " provided for Anthropic models. Use 0 to disable thinking, or a"
102+ " positive integer (>= 1024) for the token budget."
103+ )
104+
105+ if thinking_budget == 0 :
106+ return anthropic_types .ThinkingConfigDisabledParam (type = "disabled" )
107+
108+ if thinking_budget < 0 :
109+ raise ValueError (
110+ f"thinking_budget={ thinking_budget } is not supported for Anthropic"
111+ " models (AUTOMATIC mode is unavailable). Use a positive integer"
112+ " (>= 1024) for the token budget, or 0 to disable thinking."
113+ )
114+
115+ return anthropic_types .ThinkingConfigEnabledParam (
116+ type = "enabled" ,
117+ budget_tokens = thinking_budget ,
118+ )
119+
120+
65121class ClaudeRequest (BaseModel ):
66122 system_instruction : str
67123 messages : Iterable [anthropic_types .MessageParam ]
@@ -104,11 +160,28 @@ def part_to_message_block(
104160 part : types .Part ,
105161) -> Union [
106162 anthropic_types .TextBlockParam ,
163+ anthropic_types .ThinkingBlockParam ,
107164 anthropic_types .ImageBlockParam ,
108165 anthropic_types .DocumentBlockParam ,
109166 anthropic_types .ToolUseBlockParam ,
110167 anthropic_types .ToolResultBlockParam ,
111168]:
169+ if part .thought and part .text :
170+ signature = ""
171+ if part .thought_signature :
172+ signature = part .thought_signature .decode ("utf-8" )
173+ return anthropic_types .ThinkingBlockParam (
174+ type = "thinking" ,
175+ thinking = part .text ,
176+ signature = signature ,
177+ )
178+ if part .thought and part .thought_signature :
179+ # Redacted thinking: no plaintext, only the encrypted blob produced by
180+ # content_block_to_part for round-tripping back to Claude.
181+ return anthropic_types .RedactedThinkingBlockParam (
182+ type = "redacted_thinking" ,
183+ data = part .thought_signature .decode ("utf-8" ),
184+ )
112185 if part .text :
113186 return anthropic_types .TextBlockParam (text = part .text , type = "text" )
114187 elif part .function_call :
@@ -220,6 +293,19 @@ def content_to_message_param(
220293def content_block_to_part (
221294 content_block : anthropic_types .ContentBlock ,
222295) -> types .Part :
296+ """Converts an Anthropic content block to a genai Part."""
297+ if isinstance (content_block , anthropic_types .ThinkingBlock ):
298+ part = types .Part (text = content_block .thinking , thought = True )
299+ if content_block .signature :
300+ part .thought_signature = content_block .signature .encode ("utf-8" )
301+ return part
302+ if isinstance (content_block , anthropic_types .RedactedThinkingBlock ):
303+ # Preserve the encrypted blob so it can round-trip back to Claude in
304+ # the next turn; required to keep the model's reasoning chain intact.
305+ return types .Part (
306+ thought = True ,
307+ thought_signature = content_block .data .encode ("utf-8" ),
308+ )
223309 if isinstance (content_block , anthropic_types .TextBlock ):
224310 return types .Part .from_text (text = content_block .text )
225311 if isinstance (content_block , anthropic_types .ToolUseBlock ):
@@ -229,7 +315,9 @@ def content_block_to_part(
229315 )
230316 part .function_call .id = content_block .id
231317 return part
232- raise NotImplementedError ("Not supported yet." )
318+ raise NotImplementedError (
319+ f"Unsupported content block type: { type (content_block )} "
320+ )
233321
234322
235323def message_to_generate_content_response (
@@ -241,10 +329,12 @@ def message_to_generate_content_response(
241329 message .model_dump_json (indent = 2 , exclude_none = True ),
242330 )
243331
332+ parts = [content_block_to_part (cb ) for cb in message .content ]
333+
244334 return LlmResponse (
245335 content = types .Content (
246336 role = "model" ,
247- parts = [ content_block_to_part ( cb ) for cb in message . content ] ,
337+ parts = parts ,
248338 ),
249339 usage_metadata = types .GenerateContentResponseUsageMetadata (
250340 prompt_token_count = message .usage .input_tokens ,
@@ -401,6 +491,7 @@ async def generate_content_async(
401491 if llm_request .tools_dict
402492 else NOT_GIVEN
403493 )
494+ thinking = _build_anthropic_thinking_param (llm_request .config )
404495
405496 if not stream :
406497 message = await self ._anthropic_client .messages .create (
@@ -410,11 +501,12 @@ async def generate_content_async(
410501 tools = tools ,
411502 tool_choice = tool_choice ,
412503 max_tokens = self .max_tokens ,
504+ thinking = thinking ,
413505 )
414506 yield message_to_generate_content_response (message )
415507 else :
416508 async for response in self ._generate_content_streaming (
417- llm_request , messages , tools , tool_choice
509+ llm_request , messages , tools , tool_choice , thinking
418510 ):
419511 yield response
420512
@@ -424,6 +516,11 @@ async def _generate_content_streaming(
424516 messages : list [anthropic_types .MessageParam ],
425517 tools : Union [Iterable [anthropic_types .ToolUnionParam ], NotGiven ],
426518 tool_choice : Union [anthropic_types .ToolChoiceParam , NotGiven ],
519+ thinking : Union [
520+ anthropic_types .ThinkingConfigEnabledParam ,
521+ anthropic_types .ThinkingConfigDisabledParam ,
522+ NotGiven ,
523+ ] = NOT_GIVEN ,
427524 ) -> AsyncGenerator [LlmResponse , None ]:
428525 """Handles streaming responses from Anthropic models.
429526
@@ -439,12 +536,15 @@ async def _generate_content_streaming(
439536 tool_choice = tool_choice ,
440537 max_tokens = self .max_tokens ,
441538 stream = True ,
539+ thinking = thinking ,
442540 )
443541
444542 # Track content blocks being built during streaming.
445543 # Each entry maps a block index to its accumulated state.
446544 text_blocks : dict [int , str ] = {}
447545 tool_use_blocks : dict [int , _ToolUseAccumulator ] = {}
546+ thinking_blocks : dict [int , _ThinkingAccumulator ] = {}
547+ redacted_thinking_blocks : dict [int , str ] = {}
448548 input_tokens = 0
449549 output_tokens = 0
450550
@@ -455,7 +555,15 @@ async def _generate_content_streaming(
455555
456556 elif event .type == "content_block_start" :
457557 block = event .content_block
458- if isinstance (block , anthropic_types .TextBlock ):
558+ if isinstance (block , anthropic_types .ThinkingBlock ):
559+ thinking_blocks [event .index ] = _ThinkingAccumulator (
560+ thinking = block .thinking ,
561+ signature = block .signature ,
562+ )
563+ elif isinstance (block , anthropic_types .RedactedThinkingBlock ):
564+ # Redacted blocks arrive fully formed at start; no deltas follow.
565+ redacted_thinking_blocks [event .index ] = block .data
566+ elif isinstance (block , anthropic_types .TextBlock ):
459567 text_blocks [event .index ] = block .text
460568 elif isinstance (block , anthropic_types .ToolUseBlock ):
461569 tool_use_blocks [event .index ] = _ToolUseAccumulator (
@@ -466,7 +574,20 @@ async def _generate_content_streaming(
466574
467575 elif event .type == "content_block_delta" :
468576 delta = event .delta
469- if isinstance (delta , anthropic_types .TextDelta ):
577+ if isinstance (delta , anthropic_types .ThinkingDelta ):
578+ thinking_blocks .setdefault (
579+ event .index ,
580+ _ThinkingAccumulator (thinking = "" , signature = "" ),
581+ )
582+ thinking_blocks [event .index ].thinking += delta .thinking
583+ yield LlmResponse (
584+ content = types .Content (
585+ role = "model" ,
586+ parts = [types .Part (text = delta .thinking , thought = True )],
587+ ),
588+ partial = True ,
589+ )
590+ elif isinstance (delta , anthropic_types .TextDelta ):
470591 text_blocks .setdefault (event .index , "" )
471592 text_blocks [event .index ] += delta .text
472593 yield LlmResponse (
@@ -486,9 +607,27 @@ async def _generate_content_streaming(
486607 # Build the final aggregated response with all content.
487608 all_parts : list [types .Part ] = []
488609 all_indices = sorted (
489- set (list (text_blocks .keys ()) + list (tool_use_blocks .keys ()))
610+ set (
611+ list (thinking_blocks .keys ())
612+ + list (redacted_thinking_blocks .keys ())
613+ + list (text_blocks .keys ())
614+ + list (tool_use_blocks .keys ())
615+ )
490616 )
491617 for idx in all_indices :
618+ if idx in thinking_blocks :
619+ acc = thinking_blocks [idx ]
620+ part = types .Part (text = acc .thinking , thought = True )
621+ if acc .signature :
622+ part .thought_signature = acc .signature .encode ("utf-8" )
623+ all_parts .append (part )
624+ if idx in redacted_thinking_blocks :
625+ all_parts .append (
626+ types .Part (
627+ thought = True ,
628+ thought_signature = redacted_thinking_blocks [idx ].encode ("utf-8" ),
629+ )
630+ )
492631 if idx in text_blocks :
493632 all_parts .append (types .Part .from_text (text = text_blocks [idx ]))
494633 if idx in tool_use_blocks :
0 commit comments