Skip to content

Commit 1ccc5a4

Browse files
committed
feat: add context window limit lookup table
1 parent a245e6d commit 1ccc5a4

14 files changed

Lines changed: 414 additions & 8 deletions

src/strands/models/_defaults.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
"""Default model metadata lookup tables.
2+
3+
Provides context window limits for known model IDs across all providers.
4+
Values sourced from provider documentation and
5+
https://github.com/BerriAI/litellm/blob/litellm_internal_staging/model_prices_and_context_window.json
6+
7+
Applied to providers with well-known, fixed model IDs: Bedrock, Anthropic, OpenAI,
8+
OpenAI Responses, Gemini, and Mistral. Providers that use local/custom model IDs
9+
(Ollama, LlamaCpp, SageMaker) or proxy to other providers with their own prefixed
10+
ID format (LiteLLM) are excluded — their context windows depend on deployment config,
11+
not a static table.
12+
"""
13+
14+
from typing import Any
15+
16+
# Context window limits (in tokens) for known model IDs.
17+
#
18+
# Best-effort lookup table — unknown models return None and callers
19+
# fall back gracefully (e.g. proactive compression is disabled).
20+
# Users can always override with an explicit context_window_limit in their model config.
21+
#
22+
# For Bedrock models with cross-region prefixes (e.g. us., eu., global.),
23+
# get_context_window_limit strips the prefix before lookup so only the base model ID is needed here.
24+
_CONTEXT_WINDOW_LIMITS: dict[str, int] = {
25+
# Anthropic (direct API)
26+
"claude-sonnet-4-6": 1_000_000,
27+
"claude-sonnet-4-20250514": 1_000_000,
28+
"claude-sonnet-4-5": 200_000,
29+
"claude-sonnet-4-5-20250929": 200_000,
30+
"claude-opus-4-6": 1_000_000,
31+
"claude-opus-4-6-20260205": 1_000_000,
32+
"claude-opus-4-7": 1_000_000,
33+
"claude-opus-4-7-20260416": 1_000_000,
34+
"claude-opus-4-5": 200_000,
35+
"claude-opus-4-5-20251101": 200_000,
36+
"claude-opus-4-20250514": 200_000,
37+
"claude-opus-4-1": 200_000,
38+
"claude-opus-4-1-20250805": 200_000,
39+
"claude-haiku-4-5": 200_000,
40+
"claude-haiku-4-5-20251001": 200_000,
41+
"claude-3-7-sonnet-20250219": 200_000,
42+
"claude-3-5-sonnet-20241022": 200_000,
43+
"claude-3-5-sonnet-20240620": 200_000,
44+
"claude-3-5-haiku-20241022": 200_000,
45+
"claude-3-opus-20240229": 200_000,
46+
"claude-3-haiku-20240307": 200_000,
47+
# Bedrock Anthropic (base model IDs — cross-region prefixes stripped by get_context_window_limit)
48+
"anthropic.claude-sonnet-4-6": 1_000_000,
49+
"anthropic.claude-sonnet-4-20250514-v1:0": 1_000_000,
50+
"anthropic.claude-sonnet-4-5-20250929-v1:0": 200_000,
51+
"anthropic.claude-opus-4-6-v1": 1_000_000,
52+
"anthropic.claude-opus-4-7": 1_000_000,
53+
"anthropic.claude-opus-4-5-20251101-v1:0": 200_000,
54+
"anthropic.claude-opus-4-20250514-v1:0": 200_000,
55+
"anthropic.claude-opus-4-1-20250805-v1:0": 200_000,
56+
"anthropic.claude-haiku-4-5-20251001-v1:0": 200_000,
57+
"anthropic.claude-haiku-4-5@20251001": 200_000,
58+
"anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000,
59+
"anthropic.claude-3-7-sonnet-20240620-v1:0": 200_000,
60+
"anthropic.claude-3-5-sonnet-20241022-v2:0": 200_000,
61+
"anthropic.claude-3-5-sonnet-20240620-v1:0": 200_000,
62+
"anthropic.claude-3-5-haiku-20241022-v1:0": 200_000,
63+
"anthropic.claude-3-opus-20240229-v1:0": 200_000,
64+
"anthropic.claude-3-haiku-20240307-v1:0": 200_000,
65+
"anthropic.claude-3-sonnet-20240229-v1:0": 200_000,
66+
"anthropic.claude-mythos-preview": 1_000_000,
67+
# Bedrock Amazon Nova
68+
"amazon.nova-pro-v1:0": 300_000,
69+
"amazon.nova-lite-v1:0": 300_000,
70+
"amazon.nova-micro-v1:0": 128_000,
71+
"amazon.nova-premier-v1:0": 1_000_000,
72+
"amazon.nova-2-lite-v1:0": 1_000_000,
73+
"amazon.nova-2-pro-preview-20251202-v1:0": 1_000_000,
74+
# OpenAI
75+
"gpt-5.5": 1_050_000,
76+
"gpt-5.5-pro": 1_050_000,
77+
"gpt-5.4": 1_050_000,
78+
"gpt-5.4-pro": 1_050_000,
79+
"gpt-5.4-mini": 272_000,
80+
"gpt-5.4-nano": 272_000,
81+
"gpt-5.2": 272_000,
82+
"gpt-5.2-pro": 272_000,
83+
"gpt-5.1": 272_000,
84+
"gpt-5": 272_000,
85+
"gpt-5-mini": 272_000,
86+
"gpt-5-nano": 272_000,
87+
"gpt-5-pro": 128_000,
88+
"gpt-4.1": 1_047_576,
89+
"gpt-4.1-mini": 1_047_576,
90+
"gpt-4.1-nano": 1_047_576,
91+
"gpt-4o": 128_000,
92+
"gpt-4o-mini": 128_000,
93+
"gpt-4-turbo": 128_000,
94+
"o3": 200_000,
95+
"o3-mini": 200_000,
96+
"o3-pro": 200_000,
97+
"o4-mini": 200_000,
98+
"o1": 200_000,
99+
# Google Gemini
100+
"gemini-2.5-flash": 1_048_576,
101+
"gemini-2.5-flash-lite": 1_048_576,
102+
"gemini-2.5-pro": 1_048_576,
103+
"gemini-2.0-flash": 1_048_576,
104+
"gemini-2.0-flash-lite": 1_048_576,
105+
"gemini-3-pro-preview": 1_048_576,
106+
"gemini-3-flash-preview": 1_048_576,
107+
"gemini-3.1-pro-preview": 1_048_576,
108+
"gemini-3.1-flash-lite-preview": 1_048_576,
109+
# Mistral
110+
"mistral-large-latest": 262_144,
111+
"mistral-large-2512": 262_144,
112+
"mistral-large-3": 262_144,
113+
"mistral-medium-latest": 131_072,
114+
"mistral-medium-2505": 131_072,
115+
"mistral-small-latest": 131_072,
116+
"mistral-small-3-2-2506": 131_072,
117+
}
118+
119+
120+
def get_context_window_limit(model_id: str) -> int | None:
121+
"""Look up the context window limit for a model ID.
122+
123+
For Bedrock cross-region model IDs (e.g. ``us.anthropic.claude-sonnet-4-6``),
124+
the region prefix is stripped as a fallback if the direct lookup fails.
125+
126+
Args:
127+
model_id: The model ID to look up.
128+
129+
Returns:
130+
The context window limit in tokens, or None if not found.
131+
"""
132+
direct = _CONTEXT_WINDOW_LIMITS.get(model_id)
133+
if direct is not None:
134+
return direct
135+
136+
# Fallback: strip prefix before first dot and retry (handles cross-region prefixes)
137+
dot_index = model_id.find(".")
138+
if dot_index != -1:
139+
return _CONTEXT_WINDOW_LIMITS.get(model_id[dot_index + 1 :])
140+
141+
return None
142+
143+
144+
def resolve_config_metadata(config: Any, model_id: str) -> Any:
145+
"""Resolve model metadata fields on a config dict from built-in lookup tables.
146+
147+
When ``context_window_limit`` is not explicitly set, looks it up from the built-in table.
148+
Explicit values pass through unchanged. Returns a new dict only when resolution adds a field;
149+
otherwise returns the original config to avoid unnecessary allocation.
150+
151+
Args:
152+
config: The stored model config dict.
153+
model_id: The model ID to look up.
154+
155+
Returns:
156+
The config with resolved metadata, or the original config if nothing to resolve.
157+
"""
158+
if "context_window_limit" in config:
159+
return config
160+
161+
limit = get_context_window_limit(model_id)
162+
if limit is None:
163+
return config
164+
165+
return {**config, "context_window_limit": limit}

src/strands/models/anthropic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException
2121
from ..types.streaming import StreamEvent
2222
from ..types.tools import ToolChoice, ToolChoiceToolDict, ToolSpec
23+
from ._defaults import resolve_config_metadata
2324
from ._validation import _has_location_source, validate_config_keys
2425
from .model import BaseModelConfig, Model
2526

@@ -95,7 +96,7 @@ def get_config(self) -> AnthropicConfig:
9596
Returns:
9697
The Anthropic model configuration.
9798
"""
98-
return self.config
99+
return cast(AnthropicModel.AnthropicConfig, resolve_config_metadata(self.config, self.config["model_id"]))
99100

100101
def _format_request_message_content(self, content: ContentBlock) -> dict[str, Any]:
101102
"""Format an Anthropic content block.

src/strands/models/bedrock.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
)
3232
from ..types.streaming import CitationsDelta, StreamEvent
3333
from ..types.tools import ToolChoice, ToolSpec
34+
from ._defaults import resolve_config_metadata
3435
from ._strict_schema import ensure_strict_json_schema
3536
from ._validation import validate_config_keys
3637
from .model import BaseModelConfig, CacheConfig, Model
@@ -217,7 +218,7 @@ def get_config(self) -> BedrockConfig:
217218
Returns:
218219
The Bedrock model configuration.
219220
"""
220-
return self.config
221+
return cast(BedrockModel.BedrockConfig, resolve_config_metadata(self.config, self.config.get("model_id", "")))
221222

222223
def _format_request(
223224
self,

src/strands/models/gemini.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException, ProviderTokenCountError
2020
from ..types.streaming import StreamEvent
2121
from ..types.tools import ToolChoice, ToolSpec
22+
from ._defaults import resolve_config_metadata
2223
from ._validation import _has_location_source, validate_config_keys
2324
from .model import BaseModelConfig, Model
2425

@@ -115,7 +116,7 @@ def get_config(self) -> GeminiConfig:
115116
Returns:
116117
The Gemini model configuration.
117118
"""
118-
return self.config
119+
return cast(GeminiModel.GeminiConfig, resolve_config_metadata(self.config, self.config["model_id"]))
119120

120121
def _get_client(self) -> genai.Client:
121122
"""Get a Gemini client for making requests.

src/strands/models/mistral.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import json
88
import logging
99
from collections.abc import AsyncGenerator, Iterable
10-
from typing import Any, TypeVar
10+
from typing import Any, TypeVar, cast
1111

1212
import mistralai
1313
from pydantic import BaseModel
@@ -17,6 +17,7 @@
1717
from ..types.exceptions import ModelThrottledException
1818
from ..types.streaming import StopReason, StreamEvent
1919
from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
20+
from ._defaults import resolve_config_metadata
2021
from ._validation import _has_location_source, validate_config_keys, warn_on_tool_choice_not_supported
2122
from .model import BaseModelConfig, Model
2223

@@ -114,7 +115,7 @@ def get_config(self) -> MistralConfig:
114115
Returns:
115116
The Mistral model configuration.
116117
"""
117-
return self.config
118+
return cast(MistralModel.MistralConfig, resolve_config_metadata(self.config, self.config["model_id"]))
118119

119120
def _format_request_message_content(self, content: ContentBlock) -> str | dict[str, Any]:
120121
"""Format a Mistral content block.

src/strands/models/openai.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException
2222
from ..types.streaming import StreamEvent
2323
from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
24+
from ._defaults import resolve_config_metadata
2425
from ._openai_bedrock import BedrockMantleConfig, resolve_bedrock_client_args
2526
from ._validation import _has_location_source, validate_config_keys
2627
from .model import BaseModelConfig, Model
@@ -150,7 +151,9 @@ def get_config(self) -> OpenAIConfig:
150151
Returns:
151152
The OpenAI model configuration.
152153
"""
153-
return cast(OpenAIModel.OpenAIConfig, self.config)
154+
return cast(
155+
OpenAIModel.OpenAIConfig, resolve_config_metadata(self.config, str(self.config.get("model_id", "")))
156+
)
154157

155158
@classmethod
156159
def format_request_message_content(cls, content: ContentBlock, **kwargs: Any) -> dict[str, Any]:

src/strands/models/openai_responses.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException # noqa: E402
5959
from ..types.streaming import StreamEvent # noqa: E402
6060
from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse # noqa: E402
61+
from ._defaults import resolve_config_metadata # noqa: E402
6162
from ._openai_bedrock import BedrockMantleConfig, resolve_bedrock_client_args # noqa: E402
6263
from ._validation import validate_config_keys # noqa: E402
6364
from .model import BaseModelConfig, Model # noqa: E402
@@ -210,7 +211,10 @@ def get_config(self) -> OpenAIResponsesConfig:
210211
Returns:
211212
The OpenAI Responses API model configuration.
212213
"""
213-
return cast(OpenAIResponsesModel.OpenAIResponsesConfig, self.config)
214+
return cast(
215+
OpenAIResponsesModel.OpenAIResponsesConfig,
216+
resolve_config_metadata(self.config, str(self.config.get("model_id", ""))),
217+
)
214218

215219
@override
216220
async def count_tokens(

tests/strands/models/test_anthropic.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,30 @@ def test__init__model_configs(anthropic_client, model_id, max_tokens):
8282
assert tru_temperature == exp_temperature
8383

8484

85+
def test__init__auto_populates_context_window_limit(anthropic_client):
86+
_ = anthropic_client
87+
88+
model = AnthropicModel(model_id="claude-sonnet-4-20250514", max_tokens=1)
89+
90+
assert model.get_config().get("context_window_limit") == 1_000_000
91+
92+
93+
def test__init__explicit_context_window_limit_not_overridden(anthropic_client):
94+
_ = anthropic_client
95+
96+
model = AnthropicModel(model_id="claude-sonnet-4-20250514", max_tokens=1, context_window_limit=100_000)
97+
98+
assert model.get_config().get("context_window_limit") == 100_000
99+
100+
101+
def test__init__unknown_model_no_context_window_limit(anthropic_client):
102+
_ = anthropic_client
103+
104+
model = AnthropicModel(model_id="unknown-model", max_tokens=1)
105+
106+
assert model.get_config().get("context_window_limit") is None
107+
108+
85109
def test_update_config(model, model_id):
86110
model.update_config(model_id=model_id)
87111

tests/strands/models/test_bedrock.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,46 @@ def test__init__context_window_limit(bedrock_client):
296296
assert model.context_window_limit == 200_000
297297

298298

299+
def test__init__auto_populates_context_window_limit(bedrock_client):
300+
_ = bedrock_client
301+
302+
model = BedrockModel(model_id="anthropic.claude-sonnet-4-20250514-v1:0")
303+
304+
assert model.get_config().get("context_window_limit") == 1_000_000
305+
306+
307+
def test__init__auto_populates_context_window_limit_cross_region(bedrock_client):
308+
_ = bedrock_client
309+
310+
model = BedrockModel(model_id="us.anthropic.claude-sonnet-4-6")
311+
312+
assert model.get_config().get("context_window_limit") == 1_000_000
313+
314+
315+
def test__init__auto_populates_context_window_limit_default_model(bedrock_client):
316+
_ = bedrock_client
317+
318+
model = BedrockModel()
319+
320+
assert model.get_config().get("context_window_limit") == 1_000_000
321+
322+
323+
def test__init__explicit_context_window_limit_not_overridden(bedrock_client):
324+
_ = bedrock_client
325+
326+
model = BedrockModel(model_id="anthropic.claude-sonnet-4-20250514-v1:0", context_window_limit=100_000)
327+
328+
assert model.get_config().get("context_window_limit") == 100_000
329+
330+
331+
def test__init__unknown_model_no_context_window_limit(bedrock_client):
332+
_ = bedrock_client
333+
334+
model = BedrockModel(model_id="unknown.model-v1:0")
335+
336+
assert model.get_config().get("context_window_limit") is None
337+
338+
299339
def test_update_config(model, model_id):
300340
model.update_config(model_id=model_id)
301341

0 commit comments

Comments
 (0)