Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 88 additions & 95 deletions marimo/_server/ai/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,16 @@
if TYPE_CHECKING:
from collections.abc import AsyncGenerator, AsyncIterator

from anthropic.types.beta import BetaThinkingConfigParam
from openai import AsyncOpenAI
from openai.types.shared.reasoning_effort import ReasoningEffort
from pydantic_ai import Agent, DeferredToolRequests, FunctionToolset
from pydantic_ai.models import Model
from pydantic_ai.models.bedrock import BedrockConverseModel
from pydantic_ai.models.google import GoogleModel
from pydantic_ai.models.openai import OpenAIChatModel, OpenAIResponsesModel
from pydantic_ai.models.openai import (
OpenAIChatModel,
OpenAIResponsesModel,
OpenAIResponsesModelSettings,
)
from pydantic_ai.providers import Provider
from pydantic_ai.providers.anthropic import (
AnthropicProvider as PydanticAnthropic,
Expand All @@ -55,6 +57,7 @@
)
from pydantic_ai.providers.google import GoogleProvider as PydanticGoogle
from pydantic_ai.providers.openai import OpenAIProvider as PydanticOpenAI
from pydantic_ai.settings import ModelSettings, ThinkingLevel
from pydantic_ai.ui.vercel_ai.request_types import UIMessage, UIMessagePart
from starlette.responses import StreamingResponse

Expand Down Expand Up @@ -127,11 +130,31 @@ def create_agent(
toolset, output_type = self._get_toolsets_and_output_type(tools)
return Agent(
model,
model_settings=self._build_agent_settings(model),
toolsets=[toolset] if tools else None,
instructions=system_prompt,
output_type=output_type,
)

def _build_agent_settings(self, model: Model) -> ModelSettings | None:
"""Settings applied at agent level on every request."""
from pydantic_ai.settings import ModelSettings

thinking = self._default_thinking(model)
if thinking is None:
return None
if not (
model.profile.supports_thinking
or model.profile.thinking_always_enabled
):
return None
return ModelSettings(thinking=thinking)

def _default_thinking(self, model: Model) -> ThinkingLevel | None:
"""Default unified thinking flag. Return None to skip."""
del model
return True
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

claude 3.5 and below aren't even available anymore by Claude


def convert_messages(
self, messages: list[ServerUIMessage]
) -> list[UIMessage]:
Expand Down Expand Up @@ -277,11 +300,7 @@ def create_model(self, max_tokens: int) -> GoogleModel:
return GoogleModel(
model_name=self.model,
provider=self.provider,
settings=GoogleModelSettings(
max_tokens=max_tokens,
# Works on non-thinking models too
google_thinking_config={"include_thoughts": True},
),
settings=GoogleModelSettings(max_tokens=max_tokens),
)


Expand Down Expand Up @@ -367,9 +386,9 @@ def get_openai_client(self, config: AnyProviderConfig) -> AsyncOpenAI:


class OpenAIProvider(OpenAIClientMixin, PydanticProvider["PydanticOpenAI"]):
# Medium effort provides a balance between speed and accuracy
# https://openai.com/index/openai-o3-mini/
DEFAULT_REASONING_EFFORT: ReasoningEffort = "medium"
# 'auto' lets OpenAI decide between detailed/concise based on the prompt;
# marimo wants reasoning summaries surfaced for display.
DEFAULT_REASONING_SUMMARY: Literal["detailed", "concise", "auto"] = "auto"

def create_provider(self, config: AnyProviderConfig) -> PydanticOpenAI:
Expand All @@ -386,62 +405,41 @@ def create_model(self, max_tokens: int) -> OpenAIResponsesModel:
OpenAIResponsesModelSettings,
)

is_reasoning_model = self._is_reasoning_model(self.model)

settings = (
OpenAIResponsesModelSettings(
max_tokens=max_tokens,
openai_reasoning_summary=self.DEFAULT_REASONING_SUMMARY,
openai_reasoning_effort=self.DEFAULT_REASONING_EFFORT,
)
if is_reasoning_model
else OpenAIResponsesModelSettings(max_tokens=max_tokens)
)
return OpenAIResponsesModel(
model_name=self.model,
provider=self.provider,
settings=settings,
settings=OpenAIResponsesModelSettings(max_tokens=max_tokens),
)

def _is_reasoning_model(self, model: str) -> bool:
"""
Check if reasoning_effort should be added to the request.
Only add for actual OpenAI reasoning models, not for OpenAI-compatible APIs.

OpenAI-compatible APIs (identified by custom base_url) may not support
the reasoning_effort parameter even if the model name suggests it's a
reasoning model.
"""
import re

# Check for reasoning model patterns: o{digit} or gpt-5, with optional openai/ prefix
reasoning_patterns = [
r"^openai/o\d", # openai/o1, openai/o3, etc.
r"^o\d", # o1, o3, etc.
r"^openai/gpt-5", # openai/gpt-5*
r"^gpt-5", # gpt-5*
]

is_reasoning_model_name = any(
re.match(pattern, model) for pattern in reasoning_patterns
)

if not is_reasoning_model_name:
return False

# If using a custom base_url that's not OpenAI, don't assume reasoning is supported
return not (
def _build_agent_settings(self, model: Model) -> ModelSettings | None:
# `reasoning.summary` is only valid for OpenAI reasoning models (gpt-5
# and the o-series).
settings = super()._build_agent_settings(model)
if settings is not None and "thinking" in settings:
extra: OpenAIResponsesModelSettings = {
"openai_reasoning_summary": self.DEFAULT_REASONING_SUMMARY,
}
settings.update(extra)
return settings

def _default_thinking(self, model: Model) -> ThinkingLevel | None:
# OpenAI-compatible third-party endpoints (custom base_url) may not
# accept `reasoning_effort` even when the model name looks like a
# reasoning model. Suppress the unified thinking flag in that case.
if (
self.config.base_url
and "api.openai.com" not in self.config.base_url
)
):
return None
return super()._default_thinking(model)


class AzureOpenAIProvider(OpenAIProvider):
def _is_reasoning_model(self, model: str) -> bool:
# https://learn.microsoft.com/en-us/answers/questions/5519548/does-gpt-5-via-azure-support-reasoning-effort-and
# Only custom models support reasoning effort, we can expose this as a parameter in the future
# Only custom Azure deployments support `reasoning_effort`, and we don't expose that config yet.
# https://learn.microsoft.com/en-us/answers/questions/5519548/does-gpt-5-via-azure-support-reasoning-effort-and
def _default_thinking(self, model: Model) -> ThinkingLevel | None:
del model
return False
return None

def _handle_azure_openai(self, base_url: str) -> tuple[str, str, str]:
"""Handle Azure OpenAI.
Expand Down Expand Up @@ -689,34 +687,35 @@ def create_agent(
)
model = self.create_model(max_tokens)

agent_settings = ModelSettings(max_tokens=max_tokens)
agent_settings.update(self._build_agent_settings(model) or {})

toolset, output_type = self._get_toolsets_and_output_type(tools)
return Agent(
model,
model_settings=ModelSettings(max_tokens=max_tokens),
model_settings=agent_settings,
toolsets=[toolset] if tools else None,
instructions=system_prompt,
output_type=output_type,
)

def _default_thinking(self, model: Model) -> ThinkingLevel | None:
# Custom OpenAI-compatible endpoints (Together, vLLM, LM Studio, ...)
# often don't honor `reasoning_effort`
if self._is_openai_compatible():
return None
return super()._default_thinking(model)


class AnthropicProvider(PydanticProvider["PydanticAnthropic"]):
# Temperature of 0.2 was recommended for coding and data science in these links:
# https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api/172683
# https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/reduce-latency?utm_source=chatgpt.com
DEFAULT_TEMPERATURE = 0.2

# Extended thinking defaults based on:
# Extended thinking requires temperature of 1.
# https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
# Extended thinking requires temperature of 1
DEFAULT_EXTENDED_THINKING_TEMPERATURE = 1
EXTENDED_THINKING_MODEL_PREFIXES = [
"claude-opus-4",
"claude-sonnet-4",
"claude-haiku-4-5",
"claude-3-7-sonnet",
]
# 1024 tokens is the minimum budget for extended thinking
DEFAULT_EXTENDED_THINKING_BUDGET_TOKENS = 1024

def create_provider(self, config: AnyProviderConfig) -> PydanticAnthropic:
from pydantic_ai.providers.anthropic import (
Expand All @@ -730,36 +729,33 @@ def create_model(self, max_tokens: int) -> Model:
AnthropicModel,
AnthropicModelSettings,
)
from pydantic_ai.profiles.anthropic import (
AnthropicModelProfile,
anthropic_model_profile,
)

is_thinking_model = self.is_extended_thinking_model(self.model)
thinking_config: BetaThinkingConfigParam = {"type": "disabled"}
if is_thinking_model:
thinking_config = {
"type": "enabled",
"budget_tokens": self.DEFAULT_EXTENDED_THINKING_BUDGET_TOKENS,
}
settings: AnthropicModelSettings = {"max_tokens": max_tokens}

# Anthropic extended thinking requires temperature=1; non-thinking
# models keep our default coding temperature. Some adaptive-only
# models (Opus 4.7+) reject sampling settings entirely β€” skip
# `temperature` for them so pydantic-ai doesn't drop it with a warning.
profile = AnthropicModelProfile.from_profile(
anthropic_model_profile(self.model)
)
if not getattr(
profile, "anthropic_disallows_sampling_settings", False
):
settings["temperature"] = (
self.DEFAULT_EXTENDED_THINKING_TEMPERATURE
if profile.supports_thinking
else self.DEFAULT_TEMPERATURE
)

return AnthropicModel(
model_name=self.model,
provider=self.provider,
settings=AnthropicModelSettings(
max_tokens=max_tokens,
temperature=self.get_temperature(),
anthropic_thinking=thinking_config,
),
)

def is_extended_thinking_model(self, model: str) -> bool:
return any(
model.startswith(prefix)
for prefix in self.EXTENDED_THINKING_MODEL_PREFIXES
)

def get_temperature(self) -> float:
return (
self.DEFAULT_EXTENDED_THINKING_TEMPERATURE
if self.is_extended_thinking_model(self.model)
else self.DEFAULT_TEMPERATURE
settings=settings,
)

def convert_messages(
Expand Down Expand Up @@ -825,10 +821,7 @@ def create_model(self, max_tokens: int) -> BedrockConverseModel:
return BedrockConverseModel(
model_name=self.model,
provider=self.provider,
settings=BedrockModelSettings(
max_tokens=max_tokens,
# TODO: Add reasoning support
),
settings=BedrockModelSettings(max_tokens=max_tokens),
)


Expand Down
7 changes: 7 additions & 0 deletions packages/llm-info/data/models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@

# Anthropic

- name: Claude Opus 4.7
model: claude-opus-4-7
description: Latest Opus model, strongest for coding and long-running professional tasks
providers: [anthropic]
roles: [chat, edit]
thinking: true

- name: Claude Sonnet 4.6
model: claude-sonnet-4-6
description: Most capable Sonnet-class model, with frontier performance across coding, agents, and professional work
Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ dev = [
# For linting
"ruff>=0.15.9",
# For AI
"pydantic-ai-slim[openai]>=1.71.0",
"pydantic-ai-slim[openai]>=1.84.0",
]
Comment thread
Light2Dark marked this conversation as resolved.

test = [
Expand Down Expand Up @@ -203,7 +203,7 @@ test-optional = [
"anywidget~=0.9.21",
"ipython~=8.12.3",
# testing gen ai
"pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.71.0",
"pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.84.0",
# - google-auth uses cachetools, and cachetools<5.0.0 uses collections.MutableMapping (removed in Python 3.10)
"cachetools>=5.0.0",
"boto3>=1.38.46",
Expand Down Expand Up @@ -240,7 +240,7 @@ typecheck = [
"sqlalchemy>=2.0.40",
"obstore>=0.8.2",
"fsspec>=2026.2.0",
"pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.71.0",
"pydantic-ai-slim[google,anthropic,bedrock,openai]>=1.84.0",
"loro>=1.5.0",
"boto3-stubs>=1.38.46",
"pandas-stubs>=1.5.3.230321",
Expand Down
Loading
Loading