3838if TYPE_CHECKING :
3939 from collections .abc import AsyncGenerator , AsyncIterator
4040
41- from anthropic .types .beta import BetaThinkingConfigParam
4241 from openai import AsyncOpenAI
43- from openai .types .shared .reasoning_effort import ReasoningEffort
4442 from pydantic_ai import Agent , DeferredToolRequests , FunctionToolset
4543 from pydantic_ai .models import Model
4644 from pydantic_ai .models .bedrock import BedrockConverseModel
4745 from pydantic_ai .models .google import GoogleModel
48- from pydantic_ai .models .openai import OpenAIChatModel , OpenAIResponsesModel
46+ from pydantic_ai .models .openai import (
47+ OpenAIChatModel ,
48+ OpenAIResponsesModel ,
49+ OpenAIResponsesModelSettings ,
50+ )
4951 from pydantic_ai .providers import Provider
5052 from pydantic_ai .providers .anthropic import (
5153 AnthropicProvider as PydanticAnthropic ,
5557 )
5658 from pydantic_ai .providers .google import GoogleProvider as PydanticGoogle
5759 from pydantic_ai .providers .openai import OpenAIProvider as PydanticOpenAI
60+ from pydantic_ai .settings import ModelSettings , ThinkingLevel
5861 from pydantic_ai .ui .vercel_ai .request_types import UIMessage , UIMessagePart
5962 from starlette .responses import StreamingResponse
6063
@@ -127,11 +130,31 @@ def create_agent(
127130 toolset , output_type = self ._get_toolsets_and_output_type (tools )
128131 return Agent (
129132 model ,
133+ model_settings = self ._build_agent_settings (model ),
130134 toolsets = [toolset ] if tools else None ,
131135 instructions = system_prompt ,
132136 output_type = output_type ,
133137 )
134138
139+ def _build_agent_settings (self , model : Model ) -> ModelSettings | None :
140+ """Settings applied at agent level on every request."""
141+ from pydantic_ai .settings import ModelSettings
142+
143+ thinking = self ._default_thinking (model )
144+ if thinking is None :
145+ return None
146+ if not (
147+ model .profile .supports_thinking
148+ or model .profile .thinking_always_enabled
149+ ):
150+ return None
151+ return ModelSettings (thinking = thinking )
152+
153+ def _default_thinking (self , model : Model ) -> ThinkingLevel | None :
154+ """Default unified thinking flag. Return None to skip."""
155+ del model
156+ return True
157+
135158 def convert_messages (
136159 self , messages : list [ServerUIMessage ]
137160 ) -> list [UIMessage ]:
@@ -277,11 +300,7 @@ def create_model(self, max_tokens: int) -> GoogleModel:
277300 return GoogleModel (
278301 model_name = self .model ,
279302 provider = self .provider ,
280- settings = GoogleModelSettings (
281- max_tokens = max_tokens ,
282- # Works on non-thinking models too
283- google_thinking_config = {"include_thoughts" : True },
284- ),
303+ settings = GoogleModelSettings (max_tokens = max_tokens ),
285304 )
286305
287306
@@ -367,9 +386,9 @@ def get_openai_client(self, config: AnyProviderConfig) -> AsyncOpenAI:
367386
368387
369388class OpenAIProvider (OpenAIClientMixin , PydanticProvider ["PydanticOpenAI" ]):
370- # Medium effort provides a balance between speed and accuracy
371389 # https://openai.com/index/openai-o3-mini/
372- DEFAULT_REASONING_EFFORT : ReasoningEffort = "medium"
390+ # 'auto' lets OpenAI decide between detailed/concise based on the prompt;
391+ # marimo wants reasoning summaries surfaced for display.
373392 DEFAULT_REASONING_SUMMARY : Literal ["detailed" , "concise" , "auto" ] = "auto"
374393
375394 def create_provider (self , config : AnyProviderConfig ) -> PydanticOpenAI :
@@ -386,62 +405,41 @@ def create_model(self, max_tokens: int) -> OpenAIResponsesModel:
386405 OpenAIResponsesModelSettings ,
387406 )
388407
389- is_reasoning_model = self ._is_reasoning_model (self .model )
390-
391- settings = (
392- OpenAIResponsesModelSettings (
393- max_tokens = max_tokens ,
394- openai_reasoning_summary = self .DEFAULT_REASONING_SUMMARY ,
395- openai_reasoning_effort = self .DEFAULT_REASONING_EFFORT ,
396- )
397- if is_reasoning_model
398- else OpenAIResponsesModelSettings (max_tokens = max_tokens )
399- )
400408 return OpenAIResponsesModel (
401409 model_name = self .model ,
402410 provider = self .provider ,
403- settings = settings ,
411+ settings = OpenAIResponsesModelSettings ( max_tokens = max_tokens ) ,
404412 )
405413
406- def _is_reasoning_model (self , model : str ) -> bool :
407- """
408- Check if reasoning_effort should be added to the request.
409- Only add for actual OpenAI reasoning models, not for OpenAI-compatible APIs.
410-
411- OpenAI-compatible APIs (identified by custom base_url) may not support
412- the reasoning_effort parameter even if the model name suggests it's a
413- reasoning model.
414- """
415- import re
416-
417- # Check for reasoning model patterns: o{digit} or gpt-5, with optional openai/ prefix
418- reasoning_patterns = [
419- r"^openai/o\d" , # openai/o1, openai/o3, etc.
420- r"^o\d" , # o1, o3, etc.
421- r"^openai/gpt-5" , # openai/gpt-5*
422- r"^gpt-5" , # gpt-5*
423- ]
424-
425- is_reasoning_model_name = any (
426- re .match (pattern , model ) for pattern in reasoning_patterns
427- )
428-
429- if not is_reasoning_model_name :
430- return False
431-
432- # If using a custom base_url that's not OpenAI, don't assume reasoning is supported
433- return not (
414+ def _build_agent_settings (self , model : Model ) -> ModelSettings | None :
415+ # `reasoning.summary` is only valid for OpenAI reasoning models (gpt-5
416+ # and the o-series).
417+ settings = super ()._build_agent_settings (model )
418+ if settings is not None and "thinking" in settings :
419+ extra : OpenAIResponsesModelSettings = {
420+ "openai_reasoning_summary" : self .DEFAULT_REASONING_SUMMARY ,
421+ }
422+ settings .update (extra )
423+ return settings
424+
425+ def _default_thinking (self , model : Model ) -> ThinkingLevel | None :
426+ # OpenAI-compatible third-party endpoints (custom base_url) may not
427+ # accept `reasoning_effort` even when the model name looks like a
428+ # reasoning model. Suppress the unified thinking flag in that case.
429+ if (
434430 self .config .base_url
435431 and "api.openai.com" not in self .config .base_url
436- )
432+ ):
433+ return None
434+ return super ()._default_thinking (model )
437435
438436
439437class AzureOpenAIProvider (OpenAIProvider ):
440- def _is_reasoning_model ( self , model : str ) -> bool :
441- # https://learn.microsoft.com/en-us/answers/questions/5519548/does-gpt-5-via-azure-support-reasoning-effort-and
442- # Only custom models support reasoning effort, we can expose this as a parameter in the future
438+ # Only custom Azure deployments support `reasoning_effort`, and we don't expose that config yet.
439+ # https://learn.microsoft.com/en-us/answers/questions/5519548/does-gpt-5-via-azure-support-reasoning-effort-and
440+ def _default_thinking ( self , model : Model ) -> ThinkingLevel | None :
443441 del model
444- return False
442+ return None
445443
446444 def _handle_azure_openai (self , base_url : str ) -> tuple [str , str , str ]:
447445 """Handle Azure OpenAI.
@@ -689,34 +687,35 @@ def create_agent(
689687 )
690688 model = self .create_model (max_tokens )
691689
690+ agent_settings = ModelSettings (max_tokens = max_tokens )
691+ agent_settings .update (self ._build_agent_settings (model ) or {})
692+
692693 toolset , output_type = self ._get_toolsets_and_output_type (tools )
693694 return Agent (
694695 model ,
695- model_settings = ModelSettings ( max_tokens = max_tokens ) ,
696+ model_settings = agent_settings ,
696697 toolsets = [toolset ] if tools else None ,
697698 instructions = system_prompt ,
698699 output_type = output_type ,
699700 )
700701
702+ def _default_thinking (self , model : Model ) -> ThinkingLevel | None :
703+ # Custom OpenAI-compatible endpoints (Together, vLLM, LM Studio, ...)
704+ # often don't honor `reasoning_effort`
705+ if self ._is_openai_compatible ():
706+ return None
707+ return super ()._default_thinking (model )
708+
701709
702710class AnthropicProvider (PydanticProvider ["PydanticAnthropic" ]):
703711 # Temperature of 0.2 was recommended for coding and data science in these links:
704712 # https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api/172683
705713 # https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/reduce-latency?utm_source=chatgpt.com
706714 DEFAULT_TEMPERATURE = 0.2
707715
708- # Extended thinking defaults based on:
716+ # Extended thinking requires temperature of 1.
709717 # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
710- # Extended thinking requires temperature of 1
711718 DEFAULT_EXTENDED_THINKING_TEMPERATURE = 1
712- EXTENDED_THINKING_MODEL_PREFIXES = [
713- "claude-opus-4" ,
714- "claude-sonnet-4" ,
715- "claude-haiku-4-5" ,
716- "claude-3-7-sonnet" ,
717- ]
718- # 1024 tokens is the minimum budget for extended thinking
719- DEFAULT_EXTENDED_THINKING_BUDGET_TOKENS = 1024
720719
721720 def create_provider (self , config : AnyProviderConfig ) -> PydanticAnthropic :
722721 from pydantic_ai .providers .anthropic import (
@@ -730,36 +729,33 @@ def create_model(self, max_tokens: int) -> Model:
730729 AnthropicModel ,
731730 AnthropicModelSettings ,
732731 )
732+ from pydantic_ai .profiles .anthropic import (
733+ AnthropicModelProfile ,
734+ anthropic_model_profile ,
735+ )
733736
734- is_thinking_model = self .is_extended_thinking_model (self .model )
735- thinking_config : BetaThinkingConfigParam = {"type" : "disabled" }
736- if is_thinking_model :
737- thinking_config = {
738- "type" : "enabled" ,
739- "budget_tokens" : self .DEFAULT_EXTENDED_THINKING_BUDGET_TOKENS ,
740- }
737+ settings : AnthropicModelSettings = {"max_tokens" : max_tokens }
738+
739+ # Anthropic extended thinking requires temperature=1; non-thinking
740+ # models keep our default coding temperature. Some adaptive-only
741+ # models (Opus 4.7+) reject sampling settings entirely — skip
742+ # `temperature` for them so pydantic-ai doesn't drop it with a warning.
743+ profile = AnthropicModelProfile .from_profile (
744+ anthropic_model_profile (self .model )
745+ )
746+ if not getattr (
747+ profile , "anthropic_disallows_sampling_settings" , False
748+ ):
749+ settings ["temperature" ] = (
750+ self .DEFAULT_EXTENDED_THINKING_TEMPERATURE
751+ if profile .supports_thinking
752+ else self .DEFAULT_TEMPERATURE
753+ )
741754
742755 return AnthropicModel (
743756 model_name = self .model ,
744757 provider = self .provider ,
745- settings = AnthropicModelSettings (
746- max_tokens = max_tokens ,
747- temperature = self .get_temperature (),
748- anthropic_thinking = thinking_config ,
749- ),
750- )
751-
752- def is_extended_thinking_model (self , model : str ) -> bool :
753- return any (
754- model .startswith (prefix )
755- for prefix in self .EXTENDED_THINKING_MODEL_PREFIXES
756- )
757-
758- def get_temperature (self ) -> float :
759- return (
760- self .DEFAULT_EXTENDED_THINKING_TEMPERATURE
761- if self .is_extended_thinking_model (self .model )
762- else self .DEFAULT_TEMPERATURE
758+ settings = settings ,
763759 )
764760
765761 def convert_messages (
@@ -825,10 +821,7 @@ def create_model(self, max_tokens: int) -> BedrockConverseModel:
825821 return BedrockConverseModel (
826822 model_name = self .model ,
827823 provider = self .provider ,
828- settings = BedrockModelSettings (
829- max_tokens = max_tokens ,
830- # TODO: Add reasoning support
831- ),
824+ settings = BedrockModelSettings (max_tokens = max_tokens ),
832825 )
833826
834827
0 commit comments