-
Notifications
You must be signed in to change notification settings - Fork 853
Expand file tree
/
Copy pathmodel.py
More file actions
187 lines (146 loc) · 6.2 KB
/
model.py
File metadata and controls
187 lines (146 loc) · 6.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
"""Abstract base class for Agent model providers."""
from __future__ import annotations
import abc
import logging
from collections.abc import AsyncGenerator, AsyncIterable
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Literal, TypeVar
from pydantic import BaseModel
from ..hooks.events import AfterInvocationEvent
from ..plugins.plugin import Plugin
from ..types.content import Messages, SystemContentBlock
from ..types.streaming import StreamEvent
from ..types.tools import ToolChoice, ToolSpec
if TYPE_CHECKING:
from ..agent.agent import Agent
logger = logging.getLogger(__name__)
T = TypeVar("T", bound=BaseModel)
@dataclass
class CacheConfig:
"""Configuration for prompt caching.
Attributes:
strategy: Caching strategy to use.
- "auto": Automatically detect model support and inject cachePoint to maximize cache coverage
- "anthropic": Inject cachePoint in Anthropic-compatible format without model support check
"""
strategy: Literal["auto", "anthropic"] = "auto"
class Model(abc.ABC):
"""Abstract base class for Agent model providers.
This class defines the interface for all model implementations in the Strands Agents SDK. It provides a
standardized way to configure and process requests for different AI model providers.
"""
@property
def stateful(self) -> bool:
"""Whether the model manages conversation state server-side.
Returns:
False by default. Model providers that support server-side state should override this.
"""
return False
@classmethod
def from_dict(cls, config: dict[str, Any]) -> Model:
"""Create a Model instance from a configuration dictionary.
The default implementation extracts ``client_args`` (if present) and passes
all remaining keys as keyword arguments to the constructor. Subclasses with
non-standard constructor signatures should override this method.
Args:
config: Provider-specific configuration dictionary. A copy is made internally;
the caller's dict is not modified.
Returns:
A configured Model instance.
"""
config = config.copy()
client_args = config.pop("client_args", None)
kwargs: dict[str, Any] = {}
if client_args is not None:
kwargs["client_args"] = client_args
kwargs.update(config)
return cls(**kwargs)
@abc.abstractmethod
# pragma: no cover
def update_config(self, **model_config: Any) -> None:
"""Update the model configuration with the provided arguments.
Args:
**model_config: Configuration overrides.
"""
pass
@abc.abstractmethod
# pragma: no cover
def get_config(self) -> Any:
"""Return the model configuration.
Returns:
The model's configuration.
"""
pass
@abc.abstractmethod
# pragma: no cover
def structured_output(
self, output_model: type[T], prompt: Messages, system_prompt: str | None = None, **kwargs: Any
) -> AsyncGenerator[dict[str, T | Any], None]:
"""Get structured output from the model.
Args:
output_model: The output model to use for the agent.
prompt: The prompt messages to use for the agent.
system_prompt: System prompt to provide context to the model.
**kwargs: Additional keyword arguments for future extensibility.
Yields:
Model events with the last being the structured output.
Raises:
ValidationException: The response format from the model does not match the output_model
"""
pass
@abc.abstractmethod
# pragma: no cover
def stream(
self,
messages: Messages,
tool_specs: list[ToolSpec] | None = None,
system_prompt: str | None = None,
*,
tool_choice: ToolChoice | None = None,
system_prompt_content: list[SystemContentBlock] | None = None,
invocation_state: dict[str, Any] | None = None,
**kwargs: Any,
) -> AsyncIterable[StreamEvent]:
"""Stream conversation with the model.
This method handles the full lifecycle of conversing with the model:
1. Format the messages, tool specs, and configuration into a streaming request
2. Send the request to the model
3. Yield the formatted message chunks
Args:
messages: List of message objects to be processed by the model.
tool_specs: List of tool specifications to make available to the model.
system_prompt: System prompt to provide context to the model.
tool_choice: Selection strategy for tool invocation.
system_prompt_content: System prompt content blocks for advanced features like caching.
invocation_state: Caller-provided state/context that was passed to the agent when it was invoked.
**kwargs: Additional keyword arguments for future extensibility.
Yields:
Formatted message chunks from the model.
Raises:
ModelThrottledException: When the model service is throttling requests from the client.
"""
pass
class _ModelPlugin(Plugin):
"""Plugin that manages model-related lifecycle hooks."""
@property
def name(self) -> str:
"""A stable string identifier for this plugin."""
return "strands:model"
@staticmethod
def _on_after_invocation(event: AfterInvocationEvent) -> None:
"""Handle post-invocation model management tasks.
Performs the following:
- Clears messages when the model is managing conversation state server-side.
"""
if event.agent.model.stateful:
event.agent.messages.clear()
logger.debug(
"response_id=<%s> | cleared messages for server-managed conversation",
event.agent._model_state.get("response_id"),
)
def init_agent(self, agent: "Agent") -> None:
"""Register model lifecycle hooks with the agent.
Args:
agent: The agent instance to register hooks with.
"""
agent.add_hook(self._on_after_invocation, AfterInvocationEvent)