Skip to content

Commit 6b8d3d1

Browse files
authored
use unified thinking for pydantic-ai (#9477)
## 📝 Summary <!-- If this PR closes any issues, list them here by number (e.g., Closes #123). Detail the specific changes made in this pull request. Explain the problem addressed and how it was resolved. If applicable, provide before and after comparisons, screenshots, or any relevant details to help reviewers understand the changes easily. --> Closes #9417 . This avoids specifying thinking and params for individual models. https://pydantic.dev/docs/ai/advanced-features/thinking/#unified-thinking-settings ## 📋 Pre-Review Checklist <!-- These checks need to be completed before a PR is reviewed --> - [x] For large changes, or changes that affect the public API: this change was discussed or approved through an issue, on [Discord](https://marimo.io/discord?ref=pr), or the community [discussions](https://github.com/marimo-team/marimo/discussions) (Please provide a link if applicable). - [x] Any AI generated code has been reviewed line-by-line by the human PR author, who stands by it. - [ ] Video or media evidence is provided for any visual changes (optional). <!-- PR is more likely to be merged if evidence is provided for changes made --> ## ✅ Merge Checklist - [x] I have read the [contributor guidelines](https://github.com/marimo-team/marimo/blob/main/CONTRIBUTING.md). - [ ] Documentation has been updated where applicable, including docstrings for API changes. - [x] Tests have been added for the changes made.
1 parent 297fc2e commit 6b8d3d1

4 files changed

Lines changed: 279 additions & 199 deletions

File tree

marimo/_server/ai/providers.py

Lines changed: 88 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,16 @@
3838
if TYPE_CHECKING:
3939
from collections.abc import AsyncGenerator, AsyncIterator
4040

41-
from anthropic.types.beta import BetaThinkingConfigParam
4241
from openai import AsyncOpenAI
43-
from openai.types.shared.reasoning_effort import ReasoningEffort
4442
from pydantic_ai import Agent, DeferredToolRequests, FunctionToolset
4543
from pydantic_ai.models import Model
4644
from pydantic_ai.models.bedrock import BedrockConverseModel
4745
from pydantic_ai.models.google import GoogleModel
48-
from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel
46+
from pydantic_ai.models.openai import (
47+
OpenAIChatModel,
48+
OpenAIResponsesModel,
49+
OpenAIResponsesModelSettings,
50+
)
4951
from pydantic_ai.providers import Provider
5052
from pydantic_ai.providers.anthropic import (
5153
AnthropicProvider as PydanticAnthropic,
@@ -55,6 +57,7 @@
5557
)
5658
from pydantic_ai.providers.google import GoogleProvider as PydanticGoogle
5759
from pydantic_ai.providers.openai import OpenAIProvider as PydanticOpenAI
60+
from pydantic_ai.settings import ModelSettings, ThinkingLevel
5861
from pydantic_ai.ui.vercel_ai.request_types import UIMessage, UIMessagePart
5962
from starlette.responses import StreamingResponse
6063

@@ -127,11 +130,31 @@ def create_agent(
127130
toolset, output_type = self._get_toolsets_and_output_type(tools)
128131
return Agent(
129132
model,
133+
model_settings=self._build_agent_settings(model),
130134
toolsets=[toolset] if tools else None,
131135
instructions=system_prompt,
132136
output_type=output_type,
133137
)
134138

139+
def _build_agent_settings(self, model: Model) -> ModelSettings | None:
140+
"""Settings applied at agent level on every request."""
141+
from pydantic_ai.settings import ModelSettings
142+
143+
thinking = self._default_thinking(model)
144+
if thinking is None:
145+
return None
146+
if not (
147+
model.profile.supports_thinking
148+
or model.profile.thinking_always_enabled
149+
):
150+
return None
151+
return ModelSettings(thinking=thinking)
152+
153+
def _default_thinking(self, model: Model) -> ThinkingLevel | None:
154+
"""Default unified thinking flag. Return None to skip."""
155+
del model
156+
return True
157+
135158
def convert_messages(
136159
self, messages: list[ServerUIMessage]
137160
) -> list[UIMessage]:
@@ -277,11 +300,7 @@ def create_model(self, max_tokens: int) -> GoogleModel:
277300
return GoogleModel(
278301
model_name=self.model,
279302
provider=self.provider,
280-
settings=GoogleModelSettings(
281-
max_tokens=max_tokens,
282-
# Works on non-thinking models too
283-
google_thinking_config={"include_thoughts": True},
284-
),
303+
settings=GoogleModelSettings(max_tokens=max_tokens),
285304
)
286305

287306

@@ -367,9 +386,9 @@ def get_openai_client(self, config: AnyProviderConfig) -> AsyncOpenAI:
367386

368387

369388
class OpenAIProvider(OpenAIClientMixin, PydanticProvider["PydanticOpenAI"]):
370-
# Medium effort provides a balance between speed and accuracy
371389
# https://openai.com/index/openai-o3-mini/
372-
DEFAULT_REASONING_EFFORT: ReasoningEffort = "medium"
390+
# 'auto' lets OpenAI decide between detailed/concise based on the prompt;
391+
# marimo wants reasoning summaries surfaced for display.
373392
DEFAULT_REASONING_SUMMARY: Literal["detailed", "concise", "auto"] = "auto"
374393

375394
def create_provider(self, config: AnyProviderConfig) -> PydanticOpenAI:
@@ -386,62 +405,41 @@ def create_model(self, max_tokens: int) -> OpenAIResponsesModel:
386405
OpenAIResponsesModelSettings,
387406
)
388407

389-
is_reasoning_model = self._is_reasoning_model(self.model)
390-
391-
settings = (
392-
OpenAIResponsesModelSettings(
393-
max_tokens=max_tokens,
394-
openai_reasoning_summary=self.DEFAULT_REASONING_SUMMARY,
395-
openai_reasoning_effort=self.DEFAULT_REASONING_EFFORT,
396-
)
397-
if is_reasoning_model
398-
else OpenAIResponsesModelSettings(max_tokens=max_tokens)
399-
)
400408
return OpenAIResponsesModel(
401409
model_name=self.model,
402410
provider=self.provider,
403-
settings=settings,
411+
settings=OpenAIResponsesModelSettings(max_tokens=max_tokens),
404412
)
405413

406-
def _is_reasoning_model(self, model: str) -> bool:
407-
"""
408-
Check if reasoning_effort should be added to the request.
409-
Only add for actual OpenAI reasoning models, not for OpenAI-compatible APIs.
410-
411-
OpenAI-compatible APIs (identified by custom base_url) may not support
412-
the reasoning_effort parameter even if the model name suggests it's a
413-
reasoning model.
414-
"""
415-
import re
416-
417-
# Check for reasoning model patterns: o{digit} or gpt-5, with optional openai/ prefix
418-
reasoning_patterns = [
419-
r"^openai/o\d", # openai/o1, openai/o3, etc.
420-
r"^o\d", # o1, o3, etc.
421-
r"^openai/gpt-5", # openai/gpt-5*
422-
r"^gpt-5", # gpt-5*
423-
]
424-
425-
is_reasoning_model_name = any(
426-
re.match(pattern, model) for pattern in reasoning_patterns
427-
)
428-
429-
if not is_reasoning_model_name:
430-
return False
431-
432-
# If using a custom base_url that's not OpenAI, don't assume reasoning is supported
433-
return not (
414+
def _build_agent_settings(self, model: Model) -> ModelSettings | None:
415+
# `reasoning.summary` is only valid for OpenAI reasoning models (gpt-5
416+
# and the o-series).
417+
settings = super()._build_agent_settings(model)
418+
if settings is not None and "thinking" in settings:
419+
extra: OpenAIResponsesModelSettings = {
420+
"openai_reasoning_summary": self.DEFAULT_REASONING_SUMMARY,
421+
}
422+
settings.update(extra)
423+
return settings
424+
425+
def _default_thinking(self, model: Model) -> ThinkingLevel | None:
426+
# OpenAI-compatible third-party endpoints (custom base_url) may not
427+
# accept `reasoning_effort` even when the model name looks like a
428+
# reasoning model. Suppress the unified thinking flag in that case.
429+
if (
434430
self.config.base_url
435431
and "api.openai.com" not in self.config.base_url
436-
)
432+
):
433+
return None
434+
return super()._default_thinking(model)
437435

438436

439437
class AzureOpenAIProvider(OpenAIProvider):
440-
def _is_reasoning_model(self, model: str) -> bool:
441-
# https://learn.microsoft.com/en-us/answers/questions/5519548/does-gpt-5-via-azure-support-reasoning-effort-and
442-
# Only custom models support reasoning effort, we can expose this as a parameter in the future
438+
# Only custom Azure deployments support `reasoning_effort`, and we don't expose that config yet.
439+
# https://learn.microsoft.com/en-us/answers/questions/5519548/does-gpt-5-via-azure-support-reasoning-effort-and
440+
def _default_thinking(self, model: Model) -> ThinkingLevel | None:
443441
del model
444-
return False
442+
return None
445443

446444
def _handle_azure_openai(self, base_url: str) -> tuple[str, str, str]:
447445
"""Handle Azure OpenAI.
@@ -689,34 +687,35 @@ def create_agent(
689687
)
690688
model = self.create_model(max_tokens)
691689

690+
agent_settings = ModelSettings(max_tokens=max_tokens)
691+
agent_settings.update(self._build_agent_settings(model) or {})
692+
692693
toolset, output_type = self._get_toolsets_and_output_type(tools)
693694
return Agent(
694695
model,
695-
model_settings=ModelSettings(max_tokens=max_tokens),
696+
model_settings=agent_settings,
696697
toolsets=[toolset] if tools else None,
697698
instructions=system_prompt,
698699
output_type=output_type,
699700
)
700701

702+
def _default_thinking(self, model: Model) -> ThinkingLevel | None:
703+
# Custom OpenAI-compatible endpoints (Together, vLLM, LM Studio, ...)
704+
# often don't honor `reasoning_effort`
705+
if self._is_openai_compatible():
706+
return None
707+
return super()._default_thinking(model)
708+
701709

702710
class AnthropicProvider(PydanticProvider["PydanticAnthropic"]):
703711
# Temperature of 0.2 was recommended for coding and data science in these links:
704712
# https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api/172683
705713
# https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/reduce-latency?utm_source=chatgpt.com
706714
DEFAULT_TEMPERATURE = 0.2
707715

708-
# Extended thinking defaults based on:
716+
# Extended thinking requires temperature of 1.
709717
# https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
710-
# Extended thinking requires temperature of 1
711718
DEFAULT_EXTENDED_THINKING_TEMPERATURE = 1
712-
EXTENDED_THINKING_MODEL_PREFIXES = [
713-
"claude-opus-4",
714-
"claude-sonnet-4",
715-
"claude-haiku-4-5",
716-
"claude-3-7-sonnet",
717-
]
718-
# 1024 tokens is the minimum budget for extended thinking
719-
DEFAULT_EXTENDED_THINKING_BUDGET_TOKENS = 1024
720719

721720
def create_provider(self, config: AnyProviderConfig) -> PydanticAnthropic:
722721
from pydantic_ai.providers.anthropic import (
@@ -730,36 +729,33 @@ def create_model(self, max_tokens: int) -> Model:
730729
AnthropicModel,
731730
AnthropicModelSettings,
732731
)
732+
from pydantic_ai.profiles.anthropic import (
733+
AnthropicModelProfile,
734+
anthropic_model_profile,
735+
)
733736

734-
is_thinking_model = self.is_extended_thinking_model(self.model)
735-
thinking_config: BetaThinkingConfigParam = {"type": "disabled"}
736-
if is_thinking_model:
737-
thinking_config = {
738-
"type": "enabled",
739-
"budget_tokens": self.DEFAULT_EXTENDED_THINKING_BUDGET_TOKENS,
740-
}
737+
settings: AnthropicModelSettings = {"max_tokens": max_tokens}
738+
739+
# Anthropic extended thinking requires temperature=1; non-thinking
740+
# models keep our default coding temperature. Some adaptive-only
741+
# models (Opus 4.7+) reject sampling settings entirely — skip
742+
# `temperature` for them so pydantic-ai doesn't drop it with a warning.
743+
profile = AnthropicModelProfile.from_profile(
744+
anthropic_model_profile(self.model)
745+
)
746+
if not getattr(
747+
profile, "anthropic_disallows_sampling_settings", False
748+
):
749+
settings["temperature"] = (
750+
self.DEFAULT_EXTENDED_THINKING_TEMPERATURE
751+
if profile.supports_thinking
752+
else self.DEFAULT_TEMPERATURE
753+
)
741754

742755
return AnthropicModel(
743756
model_name=self.model,
744757
provider=self.provider,
745-
settings=AnthropicModelSettings(
746-
max_tokens=max_tokens,
747-
temperature=self.get_temperature(),
748-
anthropic_thinking=thinking_config,
749-
),
750-
)
751-
752-
def is_extended_thinking_model(self, model: str) -> bool:
753-
return any(
754-
model.startswith(prefix)
755-
for prefix in self.EXTENDED_THINKING_MODEL_PREFIXES
756-
)
757-
758-
def get_temperature(self) -> float:
759-
return (
760-
self.DEFAULT_EXTENDED_THINKING_TEMPERATURE
761-
if self.is_extended_thinking_model(self.model)
762-
else self.DEFAULT_TEMPERATURE
758+
settings=settings,
763759
)
764760

765761
def convert_messages(
@@ -825,10 +821,7 @@ def create_model(self, max_tokens: int) -> BedrockConverseModel:
825821
return BedrockConverseModel(
826822
model_name=self.model,
827823
provider=self.provider,
828-
settings=BedrockModelSettings(
829-
max_tokens=max_tokens,
830-
# TODO: Add reasoning support
831-
),
824+
settings=BedrockModelSettings(max_tokens=max_tokens),
832825
)
833826

834827

packages/llm-info/data/models.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@
33

44
# Anthropic
55

6+
- name: Claude Opus 4.7
7+
model: claude-opus-4-7
8+
description: Latest Opus model, strongest for coding and long-running professional tasks
9+
providers: [anthropic]
10+
roles: [chat, edit]
11+
thinking: true
12+
613
- name: Claude Sonnet 4.6
714
model: claude-sonnet-4-6
815
description: Most capable Sonnet-class model, with frontier performance across coding, agents, and professional work

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ dev = [
140140
# For linting
141141
"ruff>=0.15.9",
142142
# For AI
143-
"pydantic-ai-slim[openai]>=1.71.0",
143+
"pydantic-ai-slim[openai]>=1.84.0",
144144
]
145145

146146
test = [
@@ -203,7 +203,7 @@ test-optional = [
203203
"anywidget~=0.9.21",
204204
"ipython~=8.12.3",
205205
# testing gen ai
206-
"pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.71.0",
206+
"pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.84.0",
207207
# - google-auth uses cachetools, and cachetools<5.0.0 uses collections.MutableMapping (removed in Python 3.10)
208208
"cachetools>=5.0.0",
209209
"boto3>=1.38.46",
@@ -240,7 +240,7 @@ typecheck = [
240240
"sqlalchemy>=2.0.40",
241241
"obstore>=0.8.2",
242242
"fsspec>=2026.2.0",
243-
"pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.71.0",
243+
"pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.84.0",
244244
"loro>=1.5.0",
245245
"boto3-stubs>=1.38.46",
246246
"pandas-stubs>=1.5.3.230321",

0 commit comments

Comments
 (0)