Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 177 additions & 0 deletions src/strands/models/_defaults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""Default model metadata lookup tables.
Comment thread
opieter-aws marked this conversation as resolved.

Provides context window limits for known model IDs across all providers.
Values sourced from provider documentation and
https://github.com/BerriAI/litellm/blob/litellm_internal_staging/model_prices_and_context_window.json

Applied to providers with well-known, fixed model IDs: Bedrock, Anthropic, OpenAI,
OpenAI Responses, Gemini, and Mistral. Providers that use local/custom model IDs
(Ollama, LlamaCpp, SageMaker) or proxy to other providers with their own prefixed
ID format (LiteLLM) are excluded — their context windows depend on deployment config,
not a static table.
"""

import logging
from collections.abc import Mapping
from typing import TypeVar

logger = logging.getLogger(__name__)

_C = TypeVar("_C", bound=Mapping[str, object])

# Context window limits (in tokens) for known model IDs.
#
# Best-effort lookup table — unknown models return None and callers
# fall back gracefully (e.g. proactive compression is disabled).
Comment thread
opieter-aws marked this conversation as resolved.
# Users can always override with an explicit context_window_limit in their model config.
#
# For Bedrock models with cross-region prefixes (e.g. us., eu., global.),
# get_context_window_limit strips the prefix before lookup so only the base model ID is needed here.
_CONTEXT_WINDOW_LIMITS: dict[str, int] = {
# Anthropic (direct API)
"claude-sonnet-4-6": 1_000_000,
"claude-sonnet-4-20250514": 1_000_000,
"claude-sonnet-4-5": 200_000,
"claude-sonnet-4-5-20250929": 200_000,
"claude-opus-4-6": 1_000_000,
"claude-opus-4-6-20260205": 1_000_000,
"claude-opus-4-7": 1_000_000,
"claude-opus-4-7-20260416": 1_000_000,
"claude-opus-4-5": 200_000,
"claude-opus-4-5-20251101": 200_000,
"claude-opus-4-20250514": 200_000,
"claude-opus-4-1": 200_000,
"claude-opus-4-1-20250805": 200_000,
"claude-haiku-4-5": 200_000,
"claude-haiku-4-5-20251001": 200_000,
"claude-3-7-sonnet-20250219": 200_000,
"claude-3-5-sonnet-20241022": 200_000,
"claude-3-5-sonnet-20240620": 200_000,
"claude-3-5-haiku-20241022": 200_000,
"claude-3-opus-20240229": 200_000,
"claude-3-haiku-20240307": 200_000,
# Bedrock Anthropic (base model IDs — cross-region prefixes stripped by get_context_window_limit)
"anthropic.claude-sonnet-4-6": 1_000_000,
"anthropic.claude-sonnet-4-20250514-v1:0": 1_000_000,
"anthropic.claude-sonnet-4-5-20250929-v1:0": 200_000,
"anthropic.claude-opus-4-6-v1": 1_000_000,
"anthropic.claude-opus-4-7": 1_000_000,
"anthropic.claude-opus-4-5-20251101-v1:0": 200_000,
"anthropic.claude-opus-4-20250514-v1:0": 200_000,
"anthropic.claude-opus-4-1-20250805-v1:0": 200_000,
"anthropic.claude-haiku-4-5-20251001-v1:0": 200_000,
"anthropic.claude-haiku-4-5@20251001": 200_000,
"anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000,
"anthropic.claude-3-7-sonnet-20240620-v1:0": 200_000,
"anthropic.claude-3-5-sonnet-20241022-v2:0": 200_000,
"anthropic.claude-3-5-sonnet-20240620-v1:0": 200_000,
"anthropic.claude-3-5-haiku-20241022-v1:0": 200_000,
"anthropic.claude-3-opus-20240229-v1:0": 200_000,
"anthropic.claude-3-haiku-20240307-v1:0": 200_000,
"anthropic.claude-3-sonnet-20240229-v1:0": 200_000,
"anthropic.claude-mythos-preview": 1_000_000,
# Bedrock Amazon Nova
"amazon.nova-pro-v1:0": 300_000,
"amazon.nova-lite-v1:0": 300_000,
"amazon.nova-micro-v1:0": 128_000,
"amazon.nova-premier-v1:0": 1_000_000,
"amazon.nova-2-lite-v1:0": 1_000_000,
"amazon.nova-2-pro-preview-20251202-v1:0": 1_000_000,
# OpenAI
"gpt-5.5": 1_050_000,
"gpt-5.5-pro": 1_050_000,
"gpt-5.4": 1_050_000,
"gpt-5.4-pro": 1_050_000,
"gpt-5.4-mini": 272_000,
"gpt-5.4-nano": 272_000,
"gpt-5.2": 272_000,
"gpt-5.2-pro": 272_000,
"gpt-5.1": 272_000,
"gpt-5": 272_000,
"gpt-5-mini": 272_000,
"gpt-5-nano": 272_000,
"gpt-5-pro": 128_000,
"gpt-4.1": 1_047_576,
"gpt-4.1-mini": 1_047_576,
"gpt-4.1-nano": 1_047_576,
"gpt-4o": 128_000,
"gpt-4o-mini": 128_000,
"gpt-4-turbo": 128_000,
"o3": 200_000,
"o3-mini": 200_000,
"o3-pro": 200_000,
"o4-mini": 200_000,
"o1": 200_000,
# Google Gemini
"gemini-2.5-flash": 1_048_576,
"gemini-2.5-flash-lite": 1_048_576,
"gemini-2.5-pro": 1_048_576,
"gemini-2.0-flash": 1_048_576,
"gemini-2.0-flash-lite": 1_048_576,
"gemini-3-pro-preview": 1_048_576,
"gemini-3-flash-preview": 1_048_576,
"gemini-3.1-pro-preview": 1_048_576,
"gemini-3.1-flash-lite-preview": 1_048_576,
# Mistral
"mistral-large-latest": 262_144,
"mistral-large-2512": 262_144,
"mistral-large-3": 262_144,
"mistral-medium-latest": 131_072,
"mistral-medium-2505": 131_072,
"mistral-small-latest": 131_072,
"mistral-small-3-2-2506": 131_072,
}


def get_context_window_limit(model_id: str) -> int | None:
"""Look up the context window limit for a model ID.

For Bedrock cross-region model IDs (e.g. ``us.anthropic.claude-sonnet-4-6``),
the region prefix is stripped as a fallback if the direct lookup fails.

Args:
model_id: The model ID to look up.

Returns:
Comment thread
opieter-aws marked this conversation as resolved.
The context window limit in tokens, or None if not found.
"""
direct = _CONTEXT_WINDOW_LIMITS.get(model_id)
if direct is not None:
return direct

# Fallback: strip prefix before first dot and retry (handles cross-region prefixes)
dot_index = model_id.find(".")
if dot_index != -1:
stripped = model_id[dot_index + 1 :]
result = _CONTEXT_WINDOW_LIMITS.get(stripped)
if result is not None:
logger.debug(
"model_id=<%s>, stripped_id=<%s> | resolved context window limit via prefix strip", model_id, stripped
)
return result

return None


def resolve_config_metadata(config: _C, model_id: str) -> _C:
"""Resolve model metadata fields on a config dict from built-in lookup tables.

When ``context_window_limit`` is not explicitly set, looks it up from the built-in table.
Explicit values pass through unchanged. Returns a new dict only when resolution adds a field;
otherwise returns the original config to avoid unnecessary allocation.

Args:
config: The stored model config dict.
model_id: The model ID to look up.

Returns:
The config with resolved metadata, or the original config if nothing to resolve.
"""
if "context_window_limit" in config:
return config

limit = get_context_window_limit(model_id)
if limit is None:
return config
Comment thread
lizradway marked this conversation as resolved.

return {**config, "context_window_limit": limit} # type: ignore[return-value]
3 changes: 2 additions & 1 deletion src/strands/models/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException
from ..types.streaming import StreamEvent
from ..types.tools import ToolChoice, ToolChoiceToolDict, ToolSpec
from ._defaults import resolve_config_metadata
from ._validation import _has_location_source, validate_config_keys
from .model import BaseModelConfig, Model

Expand Down Expand Up @@ -95,7 +96,7 @@ def get_config(self) -> AnthropicConfig:
Returns:
The Anthropic model configuration.
"""
return self.config
return resolve_config_metadata(self.config, self.config["model_id"])

def _format_request_message_content(self, content: ContentBlock) -> dict[str, Any]:
"""Format an Anthropic content block.
Expand Down
3 changes: 2 additions & 1 deletion src/strands/models/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
)
from ..types.streaming import CitationsDelta, StreamEvent
from ..types.tools import ToolChoice, ToolSpec
from ._defaults import resolve_config_metadata
from ._strict_schema import ensure_strict_json_schema
from ._validation import validate_config_keys
from .model import BaseModelConfig, CacheConfig, Model
Expand Down Expand Up @@ -217,7 +218,7 @@ def get_config(self) -> BedrockConfig:
Returns:
The Bedrock model configuration.
"""
return self.config
return resolve_config_metadata(self.config, self.config.get("model_id", ""))

def _format_request(
self,
Expand Down
3 changes: 2 additions & 1 deletion src/strands/models/gemini.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException, ProviderTokenCountError
from ..types.streaming import StreamEvent
from ..types.tools import ToolChoice, ToolSpec
from ._defaults import resolve_config_metadata
from ._validation import _has_location_source, validate_config_keys
from .model import BaseModelConfig, Model

Expand Down Expand Up @@ -115,7 +116,7 @@ def get_config(self) -> GeminiConfig:
Returns:
The Gemini model configuration.
"""
return self.config
return resolve_config_metadata(self.config, self.config["model_id"])

def _get_client(self) -> genai.Client:
"""Get a Gemini client for making requests.
Expand Down
3 changes: 2 additions & 1 deletion src/strands/models/mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ..types.exceptions import ModelThrottledException
from ..types.streaming import StopReason, StreamEvent
from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
from ._defaults import resolve_config_metadata
from ._validation import _has_location_source, validate_config_keys, warn_on_tool_choice_not_supported
from .model import BaseModelConfig, Model

Expand Down Expand Up @@ -114,7 +115,7 @@ def get_config(self) -> MistralConfig:
Returns:
The Mistral model configuration.
"""
return self.config
return resolve_config_metadata(self.config, self.config["model_id"])

def _format_request_message_content(self, content: ContentBlock) -> str | dict[str, Any]:
"""Format a Mistral content block.
Expand Down
5 changes: 4 additions & 1 deletion src/strands/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException
from ..types.streaming import StreamEvent
from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
from ._defaults import resolve_config_metadata
from ._openai_bedrock import BedrockMantleConfig, resolve_bedrock_client_args
from ._validation import _has_location_source, validate_config_keys
from .model import BaseModelConfig, Model
Expand Down Expand Up @@ -150,7 +151,9 @@ def get_config(self) -> OpenAIConfig:
Returns:
The OpenAI model configuration.
"""
return cast(OpenAIModel.OpenAIConfig, self.config)
return cast(
OpenAIModel.OpenAIConfig, resolve_config_metadata(self.config, str(self.config.get("model_id", "")))
Comment thread
opieter-aws marked this conversation as resolved.
)

@classmethod
def format_request_message_content(cls, content: ContentBlock, **kwargs: Any) -> dict[str, Any]:
Expand Down
6 changes: 5 additions & 1 deletion src/strands/models/openai_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException # noqa: E402
from ..types.streaming import StreamEvent # noqa: E402
from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse # noqa: E402
from ._defaults import resolve_config_metadata # noqa: E402
from ._openai_bedrock import BedrockMantleConfig, resolve_bedrock_client_args # noqa: E402
from ._validation import validate_config_keys # noqa: E402
from .model import BaseModelConfig, Model # noqa: E402
Expand Down Expand Up @@ -210,7 +211,10 @@ def get_config(self) -> OpenAIResponsesConfig:
Returns:
The OpenAI Responses API model configuration.
"""
return cast(OpenAIResponsesModel.OpenAIResponsesConfig, self.config)
return cast(
OpenAIResponsesModel.OpenAIResponsesConfig,
resolve_config_metadata(self.config, str(self.config.get("model_id", ""))),
Comment thread
opieter-aws marked this conversation as resolved.
)

@override
async def count_tokens(
Expand Down
24 changes: 24 additions & 0 deletions tests/strands/models/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,30 @@ def test__init__model_configs(anthropic_client, model_id, max_tokens):
assert tru_temperature == exp_temperature


def test__init__auto_populates_context_window_limit(anthropic_client):
_ = anthropic_client

model = AnthropicModel(model_id="claude-sonnet-4-20250514", max_tokens=1)

assert model.get_config().get("context_window_limit") == 1_000_000


def test__init__explicit_context_window_limit_not_overridden(anthropic_client):
_ = anthropic_client

model = AnthropicModel(model_id="claude-sonnet-4-20250514", max_tokens=1, context_window_limit=100_000)

assert model.get_config().get("context_window_limit") == 100_000


def test__init__unknown_model_no_context_window_limit(anthropic_client):
_ = anthropic_client

model = AnthropicModel(model_id="unknown-model", max_tokens=1)

assert model.get_config().get("context_window_limit") is None


def test_update_config(model, model_id):
model.update_config(model_id=model_id)

Expand Down
40 changes: 40 additions & 0 deletions tests/strands/models/test_bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,46 @@ def test__init__context_window_limit(bedrock_client):
assert model.context_window_limit == 200_000


def test__init__auto_populates_context_window_limit(bedrock_client):
_ = bedrock_client

model = BedrockModel(model_id="anthropic.claude-sonnet-4-20250514-v1:0")

assert model.get_config().get("context_window_limit") == 1_000_000


def test__init__auto_populates_context_window_limit_cross_region(bedrock_client):
_ = bedrock_client

model = BedrockModel(model_id="us.anthropic.claude-sonnet-4-6")

assert model.get_config().get("context_window_limit") == 1_000_000


def test__init__auto_populates_context_window_limit_default_model(bedrock_client):
_ = bedrock_client

model = BedrockModel()

assert model.get_config().get("context_window_limit") == 1_000_000


def test__init__explicit_context_window_limit_not_overridden(bedrock_client):
_ = bedrock_client

model = BedrockModel(model_id="anthropic.claude-sonnet-4-20250514-v1:0", context_window_limit=100_000)

assert model.get_config().get("context_window_limit") == 100_000


def test__init__unknown_model_no_context_window_limit(bedrock_client):
_ = bedrock_client

model = BedrockModel(model_id="unknown.model-v1:0")

assert model.get_config().get("context_window_limit") is None


def test_update_config(model, model_id):
model.update_config(model_id=model_id)

Expand Down
Loading
Loading