feat: add context_window_limit to model configs (#2176)

opieter-aws · web-flow · commit c723e5287621 · 2026-04-21T17:12:47.000-04:00
diff --git a/src/strands/models/__init__.py b/src/strands/models/__init__.py
@@ -7,11 +7,12 @@
 
 from . import bedrock, model
 from .bedrock import BedrockModel
-from .model import CacheConfig, Model
+from .model import BaseModelConfig, CacheConfig, Model
 
 __all__ = [
     "bedrock",
     "model",
+    "BaseModelConfig",
     "BedrockModel",
     "CacheConfig",
     "Model",
diff --git a/src/strands/models/anthropic.py b/src/strands/models/anthropic.py
@@ -8,7 +8,7 @@
 import logging
 import mimetypes
 from collections.abc import AsyncGenerator
-from typing import Any, TypedDict, TypeVar, cast
+from typing import Any, TypeVar, cast
 
 import anthropic
 from pydantic import BaseModel
@@ -21,7 +21,7 @@
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolChoiceToolDict, ToolSpec
 from ._validation import _has_location_source, validate_config_keys
-from .model import Model
+from .model import BaseModelConfig, Model
 
 logger = logging.getLogger(__name__)
 
@@ -46,7 +46,7 @@ class AnthropicModel(Model):
         "input and output tokens exceed your context limit",
     }
 
-    class AnthropicConfig(TypedDict, total=False):
+    class AnthropicConfig(BaseModelConfig, total=False):
         """Configuration options for Anthropic models.
 
         Attributes:
diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py
@@ -15,7 +15,7 @@
 from botocore.config import Config as BotocoreConfig
 from botocore.exceptions import ClientError
 from pydantic import BaseModel
-from typing_extensions import TypedDict, Unpack, override
+from typing_extensions import Unpack, override
 
 from strands.types.media import S3Location, SourceLocation
 
@@ -31,7 +31,7 @@
 from ..types.streaming import CitationsDelta, StreamEvent
 from ..types.tools import ToolChoice, ToolSpec
 from ._validation import validate_config_keys
-from .model import CacheConfig, Model
+from .model import BaseModelConfig, CacheConfig, Model
 
 logger = logging.getLogger(__name__)
 
@@ -69,7 +69,7 @@ class BedrockModel(Model):
     - Context window overflow detection
     """
 
-    class BedrockConfig(TypedDict, total=False):
+    class BedrockConfig(BaseModelConfig, total=False):
         """Configuration options for Bedrock models.
 
         Attributes:
diff --git a/src/strands/models/gemini.py b/src/strands/models/gemini.py
@@ -9,7 +9,7 @@
 import mimetypes
 import secrets
 from collections.abc import AsyncGenerator
-from typing import Any, TypedDict, TypeVar, cast
+from typing import Any, TypeVar, cast
 
 import pydantic
 from google import genai
@@ -20,7 +20,7 @@
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolSpec
 from ._validation import _has_location_source, validate_config_keys
-from .model import Model
+from .model import BaseModelConfig, Model
 
 logger = logging.getLogger(__name__)
 
@@ -33,7 +33,7 @@ class GeminiModel(Model):
     - Docs: https://ai.google.dev/api
     """
 
-    class GeminiConfig(TypedDict, total=False):
+    class GeminiConfig(BaseModelConfig, total=False):
         """Configuration options for Gemini models.
 
         Attributes:
diff --git a/src/strands/models/litellm.py b/src/strands/models/litellm.py
@@ -6,7 +6,7 @@
 import json
 import logging
 from collections.abc import AsyncGenerator
-from typing import Any, TypedDict, TypeVar, cast
+from typing import Any, TypeVar, cast
 
 import litellm
 from litellm.exceptions import ContextWindowExceededError
@@ -21,6 +21,7 @@
 from ..types.streaming import MetadataEvent, StreamEvent
 from ..types.tools import ToolChoice, ToolSpec, ToolUse
 from ._validation import validate_config_keys
+from .model import BaseModelConfig
 from .openai import OpenAIModel
 
 logger = logging.getLogger(__name__)
@@ -35,7 +36,7 @@
 class LiteLLMModel(OpenAIModel):
     """LiteLLM model provider implementation."""
 
-    class LiteLLMConfig(TypedDict, total=False):
+    class LiteLLMConfig(BaseModelConfig, total=False):
         """Configuration options for LiteLLM models.
 
         Attributes:
diff --git a/src/strands/models/llamaapi.py b/src/strands/models/llamaapi.py
@@ -14,14 +14,14 @@
 import llama_api_client
 from llama_api_client import LlamaAPIClient
 from pydantic import BaseModel
-from typing_extensions import TypedDict, Unpack, override
+from typing_extensions import Unpack, override
 
 from ..types.content import ContentBlock, Messages
 from ..types.exceptions import ModelThrottledException
 from ..types.streaming import StreamEvent, Usage
 from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
 from ._validation import _has_location_source, validate_config_keys, warn_on_tool_choice_not_supported
-from .model import Model
+from .model import BaseModelConfig, Model
 
 logger = logging.getLogger(__name__)
 
@@ -31,7 +31,7 @@
 class LlamaAPIModel(Model):
     """Llama API model provider implementation."""
 
-    class LlamaConfig(TypedDict, total=False):
+    class LlamaConfig(BaseModelConfig, total=False):
         """Configuration options for Llama API models.
 
         Attributes:
diff --git a/src/strands/models/llamacpp.py b/src/strands/models/llamacpp.py
@@ -17,7 +17,6 @@
 from collections.abc import AsyncGenerator
 from typing import (
     Any,
-    TypedDict,
     TypeVar,
     cast,
 )
@@ -31,7 +30,7 @@
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolSpec
 from ._validation import _has_location_source, validate_config_keys, warn_on_tool_choice_not_supported
-from .model import Model
+from .model import BaseModelConfig, Model
 
 logger = logging.getLogger(__name__)
 
@@ -86,7 +85,7 @@ class LlamaCppModel(Model):
         >>> response = agent(image_content)
     """
 
-    class LlamaCppConfig(TypedDict, total=False):
+    class LlamaCppConfig(BaseModelConfig, total=False):
         """Configuration options for llama.cpp models.
 
         Attributes:
diff --git a/src/strands/models/mistral.py b/src/strands/models/mistral.py
@@ -11,14 +11,14 @@
 
 import mistralai
 from pydantic import BaseModel
-from typing_extensions import TypedDict, Unpack, override
+from typing_extensions import Unpack, override
 
 from ..types.content import ContentBlock, Messages
 from ..types.exceptions import ModelThrottledException
 from ..types.streaming import StopReason, StreamEvent
 from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
 from ._validation import _has_location_source, validate_config_keys, warn_on_tool_choice_not_supported
-from .model import Model
+from .model import BaseModelConfig, Model
 
 logger = logging.getLogger(__name__)
 
@@ -36,7 +36,7 @@ class MistralModel(Model):
     - System prompts
     """
 
-    class MistralConfig(TypedDict, total=False):
+    class MistralConfig(BaseModelConfig, total=False):
         """Configuration parameters for Mistral models.
 
         Attributes:
diff --git a/src/strands/models/model.py b/src/strands/models/model.py
@@ -4,7 +4,7 @@
 import logging
 from collections.abc import AsyncGenerator, AsyncIterable
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Literal, TypeVar
+from typing import TYPE_CHECKING, Any, Literal, TypedDict, TypeVar
 
 from pydantic import BaseModel
 
@@ -22,6 +22,17 @@
 T = TypeVar("T", bound=BaseModel)
 
 
+class BaseModelConfig(TypedDict, total=False):
+    """Base configuration shared by all model providers.
+
+    Attributes:
+        context_window_limit: Maximum context window size in tokens for the model.
+            This value represents the total token capacity shared between input and output.
+    """
+
+    context_window_limit: int | None
+
+
 @dataclass
 class CacheConfig:
     """Configuration for prompt caching.
@@ -51,6 +62,16 @@ def stateful(self) -> bool:
         """
         return False
 
+    @property
+    def context_window_limit(self) -> int | None:
+        """Maximum context window size in tokens, or None if not configured."""
+        config = self.get_config()
+        return (
+            config.get("context_window_limit")
+            if isinstance(config, dict)
+            else getattr(config, "context_window_limit", None)
+        )
+
     @abc.abstractmethod
     # pragma: no cover
     def update_config(self, **model_config: Any) -> None:
diff --git a/src/strands/models/ollama.py b/src/strands/models/ollama.py
@@ -10,13 +10,13 @@
 
 import ollama
 from pydantic import BaseModel
-from typing_extensions import TypedDict, Unpack, override
+from typing_extensions import Unpack, override
 
 from ..types.content import ContentBlock, Messages
 from ..types.streaming import StopReason, StreamEvent
 from ..types.tools import ToolChoice, ToolSpec
 from ._validation import _has_location_source, validate_config_keys, warn_on_tool_choice_not_supported
-from .model import Model
+from .model import BaseModelConfig, Model
 
 logger = logging.getLogger(__name__)
 
@@ -33,7 +33,7 @@ class OllamaModel(Model):
     - Tool/function calling
     """
 
-    class OllamaConfig(TypedDict, total=False):
+    class OllamaConfig(BaseModelConfig, total=False):
         """Configuration parameters for Ollama models.
 
         Attributes:
diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py
@@ -9,7 +9,7 @@
 import mimetypes
 from collections.abc import AsyncGenerator, AsyncIterator
 from contextlib import asynccontextmanager
-from typing import Any, Protocol, TypedDict, TypeVar, cast
+from typing import Any, Protocol, TypeVar, cast
 
 import openai
 from openai.types.chat.parsed_chat_completion import ParsedChatCompletion
@@ -22,7 +22,7 @@
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
 from ._validation import _has_location_source, validate_config_keys
-from .model import Model
+from .model import BaseModelConfig, Model
 
 logger = logging.getLogger(__name__)
 
@@ -53,7 +53,7 @@ class OpenAIModel(Model):
 
     client: Client
 
-    class OpenAIConfig(TypedDict, total=False):
+    class OpenAIConfig(BaseModelConfig, total=False):
         """Configuration options for OpenAI models.
 
         Attributes:
diff --git a/src/strands/models/openai_responses.py b/src/strands/models/openai_responses.py
@@ -59,7 +59,7 @@
 from ..types.streaming import StreamEvent  # noqa: E402
 from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse  # noqa: E402
 from ._validation import validate_config_keys  # noqa: E402
-from .model import Model  # noqa: E402
+from .model import BaseModelConfig, Model  # noqa: E402
 
 logger = logging.getLogger(__name__)
 
@@ -122,7 +122,7 @@ class OpenAIResponsesModel(Model):
     client: Client
     client_args: dict[str, Any]
 
-    class OpenAIResponsesConfig(TypedDict, total=False):
+    class OpenAIResponsesConfig(BaseModelConfig, total=False):
         """Configuration options for OpenAI Responses API models.
 
         Attributes:
diff --git a/src/strands/models/sagemaker.py b/src/strands/models/sagemaker.py
@@ -17,6 +17,7 @@
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolResult, ToolSpec
 from ._validation import validate_config_keys, warn_on_tool_choice_not_supported
+from .model import BaseModelConfig
 from .openai import OpenAIModel
 
 T = TypeVar("T", bound=BaseModel)
@@ -116,7 +117,7 @@ class SageMakerAIPayloadSchema(TypedDict, total=False):
         tool_results_as_user_messages: bool | None
         additional_args: dict[str, Any] | None
 
-    class SageMakerAIEndpointConfig(TypedDict, total=False):
+    class SageMakerAIEndpointConfig(BaseModelConfig, total=False):
         """Configuration options for SageMaker models.
 
         Attributes:
diff --git a/src/strands/models/writer.py b/src/strands/models/writer.py
@@ -8,7 +8,7 @@
 import logging
 import mimetypes
 from collections.abc import AsyncGenerator
-from typing import Any, TypedDict, TypeVar, cast
+from typing import Any, TypeVar, cast
 
 import writerai
 from pydantic import BaseModel
@@ -19,7 +19,7 @@
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolResult, ToolSpec, ToolUse
 from ._validation import _has_location_source, validate_config_keys, warn_on_tool_choice_not_supported
-from .model import Model
+from .model import BaseModelConfig, Model
 
 logger = logging.getLogger(__name__)
 
@@ -29,7 +29,7 @@
 class WriterModel(Model):
     """Writer API model provider implementation."""
 
-    class WriterConfig(TypedDict, total=False):
+    class WriterConfig(BaseModelConfig, total=False):
         """Configuration options for Writer API.
 
         Attributes:
diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py
@@ -288,6 +288,15 @@ def test__init__model_config(bedrock_client):
     assert tru_max_tokens == exp_max_tokens
 
 
+def test__init__context_window_limit(bedrock_client):
+    _ = bedrock_client
+
+    model = BedrockModel(context_window_limit=200_000)
+
+    assert model.get_config().get("context_window_limit") == 200_000
+    assert model.context_window_limit == 200_000
+
+
 def test_update_config(model, model_id):
     model.update_config(model_id=model_id)
 
diff --git a/tests/strands/models/test_gemini.py b/tests/strands/models/test_gemini.py
@@ -70,6 +70,15 @@ def test__init__model_configs(gemini_client, model_id):
     assert tru_temperature == exp_temperature
 
 
+def test__init__context_window_limit(gemini_client):
+    _ = gemini_client
+
+    model = GeminiModel(model_id="gemini-2.5-flash", context_window_limit=1_048_576)
+
+    assert model.get_config().get("context_window_limit") == 1_048_576
+    assert model.context_window_limit == 1_048_576
+
+
 def test_update_config(model, model_id):
     model.update_config(model_id=model_id)
 
diff --git a/tests/strands/models/test_model.py b/tests/strands/models/test_model.py
diff --git a/tests/strands/models/test_openai.py b/tests/strands/models/test_openai.py