Skip to content

Commit 16952bd

Browse files
GWealecopybara-github
authored andcommitted
feat: Add support for Anthropic's thinking blocks
This change enables the use of Anthropic's thinking feature by mapping the GenerateContentConfig.thinking_config to the Anthropic API's `thinking` parameter. It includes handling for both non-streaming and streaming responses, converting Anthropic's ThinkingBlock and ThinkingDelta into types.Part objects with `thought=True`. Redacted thinking blocks are preserved as Parts with the encrypted blob in `thought_signature`, so they round-trip back to Claude on subsequent turns. Co-authored-by: George Weale <gweale@google.com> PiperOrigin-RevId: 905273540
1 parent 3838dd4 commit 16952bd

2 files changed

Lines changed: 649 additions & 6 deletions

File tree

src/google/adk/models/anthropic_llm.py

Lines changed: 145 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,62 @@ class _ToolUseAccumulator:
6262
args_json: str
6363

6464

65+
@dataclasses.dataclass
66+
class _ThinkingAccumulator:
67+
"""Accumulates streamed thinking content block data."""
68+
69+
thinking: str
70+
signature: str
71+
72+
73+
def _build_anthropic_thinking_param(
74+
config: Optional[types.GenerateContentConfig],
75+
) -> Union[
76+
anthropic_types.ThinkingConfigEnabledParam,
77+
anthropic_types.ThinkingConfigDisabledParam,
78+
NotGiven,
79+
]:
80+
"""Maps genai ThinkingConfig to Anthropic's thinking parameter.
81+
82+
Per ``google.genai.types.ThinkingConfig``, ``thinking_budget`` semantics are:
83+
* ``None``: not specified; the genai default is model-dependent. Anthropic
84+
requires an explicit ``budget_tokens`` whenever thinking is enabled, so
85+
we surface this as a ``ValueError`` to keep the developer's intent
86+
explicit (mirroring the Anthropic API).
87+
* ``0``: thinking is DISABLED.
88+
* ``-1``: AUTOMATIC; not supported by Anthropic models.
89+
* positive int: budget in tokens (Anthropic requires ``>= 1024`` and
90+
``< max_tokens``; validation is delegated to the Anthropic API so the
91+
caller gets the canonical error message).
92+
"""
93+
if not config or not config.thinking_config:
94+
return NOT_GIVEN
95+
96+
thinking_budget = config.thinking_config.thinking_budget
97+
98+
if thinking_budget is None:
99+
raise ValueError(
100+
"thinking_budget must be set explicitly when ThinkingConfig is"
101+
" provided for Anthropic models. Use 0 to disable thinking, or a"
102+
" positive integer (>= 1024) for the token budget."
103+
)
104+
105+
if thinking_budget == 0:
106+
return anthropic_types.ThinkingConfigDisabledParam(type="disabled")
107+
108+
if thinking_budget < 0:
109+
raise ValueError(
110+
f"thinking_budget={thinking_budget} is not supported for Anthropic"
111+
" models (AUTOMATIC mode is unavailable). Use a positive integer"
112+
" (>= 1024) for the token budget, or 0 to disable thinking."
113+
)
114+
115+
return anthropic_types.ThinkingConfigEnabledParam(
116+
type="enabled",
117+
budget_tokens=thinking_budget,
118+
)
119+
120+
65121
class ClaudeRequest(BaseModel):
66122
system_instruction: str
67123
messages: Iterable[anthropic_types.MessageParam]
@@ -104,11 +160,28 @@ def part_to_message_block(
104160
part: types.Part,
105161
) -> Union[
106162
anthropic_types.TextBlockParam,
163+
anthropic_types.ThinkingBlockParam,
107164
anthropic_types.ImageBlockParam,
108165
anthropic_types.DocumentBlockParam,
109166
anthropic_types.ToolUseBlockParam,
110167
anthropic_types.ToolResultBlockParam,
111168
]:
169+
if part.thought and part.text:
170+
signature = ""
171+
if part.thought_signature:
172+
signature = part.thought_signature.decode("utf-8")
173+
return anthropic_types.ThinkingBlockParam(
174+
type="thinking",
175+
thinking=part.text,
176+
signature=signature,
177+
)
178+
if part.thought and part.thought_signature:
179+
# Redacted thinking: no plaintext, only the encrypted blob produced by
180+
# content_block_to_part for round-tripping back to Claude.
181+
return anthropic_types.RedactedThinkingBlockParam(
182+
type="redacted_thinking",
183+
data=part.thought_signature.decode("utf-8"),
184+
)
112185
if part.text:
113186
return anthropic_types.TextBlockParam(text=part.text, type="text")
114187
elif part.function_call:
@@ -220,6 +293,19 @@ def content_to_message_param(
220293
def content_block_to_part(
221294
content_block: anthropic_types.ContentBlock,
222295
) -> types.Part:
296+
"""Converts an Anthropic content block to a genai Part."""
297+
if isinstance(content_block, anthropic_types.ThinkingBlock):
298+
part = types.Part(text=content_block.thinking, thought=True)
299+
if content_block.signature:
300+
part.thought_signature = content_block.signature.encode("utf-8")
301+
return part
302+
if isinstance(content_block, anthropic_types.RedactedThinkingBlock):
303+
# Preserve the encrypted blob so it can round-trip back to Claude in
304+
# the next turn; required to keep the model's reasoning chain intact.
305+
return types.Part(
306+
thought=True,
307+
thought_signature=content_block.data.encode("utf-8"),
308+
)
223309
if isinstance(content_block, anthropic_types.TextBlock):
224310
return types.Part.from_text(text=content_block.text)
225311
if isinstance(content_block, anthropic_types.ToolUseBlock):
@@ -229,7 +315,9 @@ def content_block_to_part(
229315
)
230316
part.function_call.id = content_block.id
231317
return part
232-
raise NotImplementedError("Not supported yet.")
318+
raise NotImplementedError(
319+
f"Unsupported content block type: {type(content_block)}"
320+
)
233321

234322

235323
def message_to_generate_content_response(
@@ -241,10 +329,12 @@ def message_to_generate_content_response(
241329
message.model_dump_json(indent=2, exclude_none=True),
242330
)
243331

332+
parts = [content_block_to_part(cb) for cb in message.content]
333+
244334
return LlmResponse(
245335
content=types.Content(
246336
role="model",
247-
parts=[content_block_to_part(cb) for cb in message.content],
337+
parts=parts,
248338
),
249339
usage_metadata=types.GenerateContentResponseUsageMetadata(
250340
prompt_token_count=message.usage.input_tokens,
@@ -401,6 +491,7 @@ async def generate_content_async(
401491
if llm_request.tools_dict
402492
else NOT_GIVEN
403493
)
494+
thinking = _build_anthropic_thinking_param(llm_request.config)
404495

405496
if not stream:
406497
message = await self._anthropic_client.messages.create(
@@ -410,11 +501,12 @@ async def generate_content_async(
410501
tools=tools,
411502
tool_choice=tool_choice,
412503
max_tokens=self.max_tokens,
504+
thinking=thinking,
413505
)
414506
yield message_to_generate_content_response(message)
415507
else:
416508
async for response in self._generate_content_streaming(
417-
llm_request, messages, tools, tool_choice
509+
llm_request, messages, tools, tool_choice, thinking
418510
):
419511
yield response
420512

@@ -424,6 +516,11 @@ async def _generate_content_streaming(
424516
messages: list[anthropic_types.MessageParam],
425517
tools: Union[Iterable[anthropic_types.ToolUnionParam], NotGiven],
426518
tool_choice: Union[anthropic_types.ToolChoiceParam, NotGiven],
519+
thinking: Union[
520+
anthropic_types.ThinkingConfigEnabledParam,
521+
anthropic_types.ThinkingConfigDisabledParam,
522+
NotGiven,
523+
] = NOT_GIVEN,
427524
) -> AsyncGenerator[LlmResponse, None]:
428525
"""Handles streaming responses from Anthropic models.
429526
@@ -439,12 +536,15 @@ async def _generate_content_streaming(
439536
tool_choice=tool_choice,
440537
max_tokens=self.max_tokens,
441538
stream=True,
539+
thinking=thinking,
442540
)
443541

444542
# Track content blocks being built during streaming.
445543
# Each entry maps a block index to its accumulated state.
446544
text_blocks: dict[int, str] = {}
447545
tool_use_blocks: dict[int, _ToolUseAccumulator] = {}
546+
thinking_blocks: dict[int, _ThinkingAccumulator] = {}
547+
redacted_thinking_blocks: dict[int, str] = {}
448548
input_tokens = 0
449549
output_tokens = 0
450550

@@ -455,7 +555,15 @@ async def _generate_content_streaming(
455555

456556
elif event.type == "content_block_start":
457557
block = event.content_block
458-
if isinstance(block, anthropic_types.TextBlock):
558+
if isinstance(block, anthropic_types.ThinkingBlock):
559+
thinking_blocks[event.index] = _ThinkingAccumulator(
560+
thinking=block.thinking,
561+
signature=block.signature,
562+
)
563+
elif isinstance(block, anthropic_types.RedactedThinkingBlock):
564+
# Redacted blocks arrive fully formed at start; no deltas follow.
565+
redacted_thinking_blocks[event.index] = block.data
566+
elif isinstance(block, anthropic_types.TextBlock):
459567
text_blocks[event.index] = block.text
460568
elif isinstance(block, anthropic_types.ToolUseBlock):
461569
tool_use_blocks[event.index] = _ToolUseAccumulator(
@@ -466,7 +574,20 @@ async def _generate_content_streaming(
466574

467575
elif event.type == "content_block_delta":
468576
delta = event.delta
469-
if isinstance(delta, anthropic_types.TextDelta):
577+
if isinstance(delta, anthropic_types.ThinkingDelta):
578+
thinking_blocks.setdefault(
579+
event.index,
580+
_ThinkingAccumulator(thinking="", signature=""),
581+
)
582+
thinking_blocks[event.index].thinking += delta.thinking
583+
yield LlmResponse(
584+
content=types.Content(
585+
role="model",
586+
parts=[types.Part(text=delta.thinking, thought=True)],
587+
),
588+
partial=True,
589+
)
590+
elif isinstance(delta, anthropic_types.TextDelta):
470591
text_blocks.setdefault(event.index, "")
471592
text_blocks[event.index] += delta.text
472593
yield LlmResponse(
@@ -486,9 +607,27 @@ async def _generate_content_streaming(
486607
# Build the final aggregated response with all content.
487608
all_parts: list[types.Part] = []
488609
all_indices = sorted(
489-
set(list(text_blocks.keys()) + list(tool_use_blocks.keys()))
610+
set(
611+
list(thinking_blocks.keys())
612+
+ list(redacted_thinking_blocks.keys())
613+
+ list(text_blocks.keys())
614+
+ list(tool_use_blocks.keys())
615+
)
490616
)
491617
for idx in all_indices:
618+
if idx in thinking_blocks:
619+
acc = thinking_blocks[idx]
620+
part = types.Part(text=acc.thinking, thought=True)
621+
if acc.signature:
622+
part.thought_signature = acc.signature.encode("utf-8")
623+
all_parts.append(part)
624+
if idx in redacted_thinking_blocks:
625+
all_parts.append(
626+
types.Part(
627+
thought=True,
628+
thought_signature=redacted_thinking_blocks[idx].encode("utf-8"),
629+
)
630+
)
492631
if idx in text_blocks:
493632
all_parts.append(types.Part.from_text(text=text_blocks[idx]))
494633
if idx in tool_use_blocks:

0 commit comments

Comments
 (0)