use unified thinking for pydantic-ai (#9477)

Light2Dark · web-flow · commit 6b8d3d12be10 · 2026-05-07T15:25:40.000-07:00
## 📝 Summary  Closes #9417 . This avoids specifying thinking and params for individual models. https://pydantic.dev/docs/ai/advanced-features/thinking/#unified-thinking-settings ## 📋 Pre-Review Checklist  - [x] For large changes, or changes that affect the public API: this change was discussed or approved through an issue, on [Discord](https://marimo.io/discord?ref=pr), or the community [discussions](https://github.com/marimo-team/marimo/discussions) (Please provide a link if applicable). - [x] Any AI generated code has been reviewed line-by-line by the human PR author, who stands by it. - [ ] Video or media evidence is provided for any visual changes (optional).  ## ✅ Merge Checklist - [x] I have read the [contributor guidelines](https://github.com/marimo-team/marimo/blob/main/CONTRIBUTING.md). - [ ] Documentation has been updated where applicable, including docstrings for API changes. - [x] Tests have been added for the changes made.
diff --git a/marimo/_server/ai/providers.py b/marimo/_server/ai/providers.py
@@ -38,14 +38,16 @@
 if TYPE_CHECKING:
     from collections.abc import AsyncGenerator, AsyncIterator
 
-    from anthropic.types.beta import BetaThinkingConfigParam
     from openai import AsyncOpenAI
-    from openai.types.shared.reasoning_effort import ReasoningEffort
     from pydantic_ai import Agent, DeferredToolRequests, FunctionToolset
     from pydantic_ai.models import Model
     from pydantic_ai.models.bedrock import BedrockConverseModel
     from pydantic_ai.models.google import GoogleModel
-    from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel
+    from pydantic_ai.models.openai import (
+        OpenAIChatModel,
+        OpenAIResponsesModel,
+        OpenAIResponsesModelSettings,
+    )
     from pydantic_ai.providers import Provider
     from pydantic_ai.providers.anthropic import (
         AnthropicProvider as PydanticAnthropic,
@@ -55,6 +57,7 @@
     )
     from pydantic_ai.providers.google import GoogleProvider as PydanticGoogle
     from pydantic_ai.providers.openai import OpenAIProvider as PydanticOpenAI
+    from pydantic_ai.settings import ModelSettings, ThinkingLevel
     from pydantic_ai.ui.vercel_ai.request_types import UIMessage, UIMessagePart
     from starlette.responses import StreamingResponse
 
@@ -127,11 +130,31 @@ def create_agent(
         toolset, output_type = self._get_toolsets_and_output_type(tools)
         return Agent(
             model,
+            model_settings=self._build_agent_settings(model),
             toolsets=[toolset] if tools else None,
             instructions=system_prompt,
             output_type=output_type,
         )
 
+    def _build_agent_settings(self, model: Model) -> ModelSettings | None:
+        """Settings applied at agent level on every request."""
+        from pydantic_ai.settings import ModelSettings
+
+        thinking = self._default_thinking(model)
+        if thinking is None:
+            return None
+        if not (
+            model.profile.supports_thinking
+            or model.profile.thinking_always_enabled
+        ):
+            return None
+        return ModelSettings(thinking=thinking)
+
+    def _default_thinking(self, model: Model) -> ThinkingLevel | None:
+        """Default unified thinking flag. Return None to skip."""
+        del model
+        return True
+
     def convert_messages(
         self, messages: list[ServerUIMessage]
     ) -> list[UIMessage]:
@@ -277,11 +300,7 @@ def create_model(self, max_tokens: int) -> GoogleModel:
         return GoogleModel(
             model_name=self.model,
             provider=self.provider,
-            settings=GoogleModelSettings(
-                max_tokens=max_tokens,
-                # Works on non-thinking models too
-                google_thinking_config={"include_thoughts": True},
-            ),
+            settings=GoogleModelSettings(max_tokens=max_tokens),
         )
 
 
@@ -367,9 +386,9 @@ def get_openai_client(self, config: AnyProviderConfig) -> AsyncOpenAI:
 
 
 class OpenAIProvider(OpenAIClientMixin, PydanticProvider["PydanticOpenAI"]):
-    # Medium effort provides a balance between speed and accuracy
     # https://openai.com/index/openai-o3-mini/
-    DEFAULT_REASONING_EFFORT: ReasoningEffort = "medium"
+    # 'auto' lets OpenAI decide between detailed/concise based on the prompt;
+    # marimo wants reasoning summaries surfaced for display.
     DEFAULT_REASONING_SUMMARY: Literal["detailed", "concise", "auto"] = "auto"
 
     def create_provider(self, config: AnyProviderConfig) -> PydanticOpenAI:
@@ -386,62 +405,41 @@ def create_model(self, max_tokens: int) -> OpenAIResponsesModel:
             OpenAIResponsesModelSettings,
         )
 
-        is_reasoning_model = self._is_reasoning_model(self.model)
-
-        settings = (
-            OpenAIResponsesModelSettings(
-                max_tokens=max_tokens,
-                openai_reasoning_summary=self.DEFAULT_REASONING_SUMMARY,
-                openai_reasoning_effort=self.DEFAULT_REASONING_EFFORT,
-            )
-            if is_reasoning_model
-            else OpenAIResponsesModelSettings(max_tokens=max_tokens)
-        )
         return OpenAIResponsesModel(
             model_name=self.model,
             provider=self.provider,
-            settings=settings,
+            settings=OpenAIResponsesModelSettings(max_tokens=max_tokens),
         )
 
-    def _is_reasoning_model(self, model: str) -> bool:
-        """
-        Check if reasoning_effort should be added to the request.
-        Only add for actual OpenAI reasoning models, not for OpenAI-compatible APIs.
-
-        OpenAI-compatible APIs (identified by custom base_url) may not support
-        the reasoning_effort parameter even if the model name suggests it's a
-        reasoning model.
-        """
-        import re
-
-        # Check for reasoning model patterns: o{digit} or gpt-5, with optional openai/ prefix
-        reasoning_patterns = [
-            r"^openai/o\d",  # openai/o1, openai/o3, etc.
-            r"^o\d",  # o1, o3, etc.
-            r"^openai/gpt-5",  # openai/gpt-5*
-            r"^gpt-5",  # gpt-5*
-        ]
-
-        is_reasoning_model_name = any(
-            re.match(pattern, model) for pattern in reasoning_patterns
-        )
-
-        if not is_reasoning_model_name:
-            return False
-
-        # If using a custom base_url that's not OpenAI, don't assume reasoning is supported
-        return not (
+    def _build_agent_settings(self, model: Model) -> ModelSettings | None:
+        # `reasoning.summary` is only valid for OpenAI reasoning models (gpt-5
+        # and the o-series).
+        settings = super()._build_agent_settings(model)
+        if settings is not None and "thinking" in settings:
+            extra: OpenAIResponsesModelSettings = {
+                "openai_reasoning_summary": self.DEFAULT_REASONING_SUMMARY,
+            }
+            settings.update(extra)
+        return settings
+
+    def _default_thinking(self, model: Model) -> ThinkingLevel | None:
+        # OpenAI-compatible third-party endpoints (custom base_url) may not
+        # accept `reasoning_effort` even when the model name looks like a
+        # reasoning model. Suppress the unified thinking flag in that case.
+        if (
             self.config.base_url
             and "api.openai.com" not in self.config.base_url
-        )
+        ):
+            return None
+        return super()._default_thinking(model)
 
 
 class AzureOpenAIProvider(OpenAIProvider):
-    def _is_reasoning_model(self, model: str) -> bool:
-        # https://learn.microsoft.com/en-us/answers/questions/5519548/does-gpt-5-via-azure-support-reasoning-effort-and
-        # Only custom models support reasoning effort, we can expose this as a parameter in the future
+    # Only custom Azure deployments support `reasoning_effort`, and we don't expose that config yet.
+    # https://learn.microsoft.com/en-us/answers/questions/5519548/does-gpt-5-via-azure-support-reasoning-effort-and
+    def _default_thinking(self, model: Model) -> ThinkingLevel | None:
         del model
-        return False
+        return None
 
     def _handle_azure_openai(self, base_url: str) -> tuple[str, str, str]:
         """Handle Azure OpenAI.
@@ -689,34 +687,35 @@ def create_agent(
             )
             model = self.create_model(max_tokens)
 
+        agent_settings = ModelSettings(max_tokens=max_tokens)
+        agent_settings.update(self._build_agent_settings(model) or {})
+
         toolset, output_type = self._get_toolsets_and_output_type(tools)
         return Agent(
             model,
-            model_settings=ModelSettings(max_tokens=max_tokens),
+            model_settings=agent_settings,
             toolsets=[toolset] if tools else None,
             instructions=system_prompt,
             output_type=output_type,
         )
 
+    def _default_thinking(self, model: Model) -> ThinkingLevel | None:
+        # Custom OpenAI-compatible endpoints (Together, vLLM, LM Studio, ...)
+        # often don't honor `reasoning_effort`
+        if self._is_openai_compatible():
+            return None
+        return super()._default_thinking(model)
+
 
 class AnthropicProvider(PydanticProvider["PydanticAnthropic"]):
     # Temperature of 0.2 was recommended for coding and data science in these links:
     # https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api/172683
     # https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/reduce-latency?utm_source=chatgpt.com
     DEFAULT_TEMPERATURE = 0.2
 
-    # Extended thinking defaults based on:
+    # Extended thinking requires temperature of 1.
     # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
-    # Extended thinking requires temperature of 1
     DEFAULT_EXTENDED_THINKING_TEMPERATURE = 1
-    EXTENDED_THINKING_MODEL_PREFIXES = [
-        "claude-opus-4",
-        "claude-sonnet-4",
-        "claude-haiku-4-5",
-        "claude-3-7-sonnet",
-    ]
-    # 1024 tokens is the minimum budget for extended thinking
-    DEFAULT_EXTENDED_THINKING_BUDGET_TOKENS = 1024
 
     def create_provider(self, config: AnyProviderConfig) -> PydanticAnthropic:
         from pydantic_ai.providers.anthropic import (
@@ -730,36 +729,33 @@ def create_model(self, max_tokens: int) -> Model:
             AnthropicModel,
             AnthropicModelSettings,
         )
+        from pydantic_ai.profiles.anthropic import (
+            AnthropicModelProfile,
+            anthropic_model_profile,
+        )
 
-        is_thinking_model = self.is_extended_thinking_model(self.model)
-        thinking_config: BetaThinkingConfigParam = {"type": "disabled"}
-        if is_thinking_model:
-            thinking_config = {
-                "type": "enabled",
-                "budget_tokens": self.DEFAULT_EXTENDED_THINKING_BUDGET_TOKENS,
-            }
+        settings: AnthropicModelSettings = {"max_tokens": max_tokens}
+
+        # Anthropic extended thinking requires temperature=1; non-thinking
+        # models keep our default coding temperature. Some adaptive-only
+        # models (Opus 4.7+) reject sampling settings entirely — skip
+        # `temperature` for them so pydantic-ai doesn't drop it with a warning.
+        profile = AnthropicModelProfile.from_profile(
+            anthropic_model_profile(self.model)
+        )
+        if not getattr(
+            profile, "anthropic_disallows_sampling_settings", False
+        ):
+            settings["temperature"] = (
+                self.DEFAULT_EXTENDED_THINKING_TEMPERATURE
+                if profile.supports_thinking
+                else self.DEFAULT_TEMPERATURE
+            )
 
         return AnthropicModel(
             model_name=self.model,
             provider=self.provider,
-            settings=AnthropicModelSettings(
-                max_tokens=max_tokens,
-                temperature=self.get_temperature(),
-                anthropic_thinking=thinking_config,
-            ),
-        )
-
-    def is_extended_thinking_model(self, model: str) -> bool:
-        return any(
-            model.startswith(prefix)
-            for prefix in self.EXTENDED_THINKING_MODEL_PREFIXES
-        )
-
-    def get_temperature(self) -> float:
-        return (
-            self.DEFAULT_EXTENDED_THINKING_TEMPERATURE
-            if self.is_extended_thinking_model(self.model)
-            else self.DEFAULT_TEMPERATURE
+            settings=settings,
         )
 
     def convert_messages(
@@ -825,10 +821,7 @@ def create_model(self, max_tokens: int) -> BedrockConverseModel:
         return BedrockConverseModel(
             model_name=self.model,
             provider=self.provider,
-            settings=BedrockModelSettings(
-                max_tokens=max_tokens,
-                # TODO: Add reasoning support
-            ),
+            settings=BedrockModelSettings(max_tokens=max_tokens),
         )
 
 
diff --git a/packages/llm-info/data/models.yml b/packages/llm-info/data/models.yml
@@ -3,6 +3,13 @@
 
 # Anthropic
 
+- name: Claude Opus 4.7
+  model: claude-opus-4-7
+  description: Latest Opus model, strongest for coding and long-running professional tasks
+  providers: [anthropic]
+  roles: [chat, edit]
+  thinking: true
+
 - name: Claude Sonnet 4.6
   model: claude-sonnet-4-6
   description: Most capable Sonnet-class model, with frontier performance across coding, agents, and professional work
diff --git a/pyproject.toml b/pyproject.toml
@@ -140,7 +140,7 @@ dev = [
     # For linting
     "ruff>=0.15.9",
     # For AI
-    "pydantic-ai-slim[openai]>=1.71.0",
+    "pydantic-ai-slim[openai]>=1.84.0",
 ]
 
 test = [
@@ -203,7 +203,7 @@ test-optional = [
     "anywidget~=0.9.21",
     "ipython~=8.12.3",
     # testing gen ai
-    "pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.71.0",
+    "pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.84.0",
     # - google-auth uses cachetools, and cachetools<5.0.0 uses collections.MutableMapping (removed in Python 3.10)
     "cachetools>=5.0.0",
     "boto3>=1.38.46",
@@ -240,7 +240,7 @@ typecheck = [
     "sqlalchemy>=2.0.40",
     "obstore>=0.8.2",
     "fsspec>=2026.2.0",
-    "pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.71.0",
+    "pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.84.0",
     "loro>=1.5.0",
     "boto3-stubs>=1.38.46",
     "pandas-stubs>=1.5.3.230321",
diff --git a/tests/_server/ai/test_providers.py b/tests/_server/ai/test_providers.py