strands-agents
diff --git a/‎src/strands/agent/agent.py‎
Lines changed: 21 additions & 2 deletions b/‎src/strands/agent/agent.py‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎src/strands/event_loop/event_loop.py‎
Lines changed: 1 addition & 0 deletions b/‎src/strands/event_loop/event_loop.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/strands/event_loop/streaming.py‎
Lines changed: 3 additions & 0 deletions b/‎src/strands/event_loop/streaming.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/strands/models/model.py‎
Lines changed: 46 additions & 1 deletion b/‎src/strands/models/model.py‎
Lines changed: 46 additions & 1 deletion
diff --git a/‎src/strands/models/openai_responses.py‎
Lines changed: 34 additions & 15 deletions b/‎src/strands/models/openai_responses.py‎
Lines changed: 34 additions & 15 deletions
diff --git a/‎src/strands/multiagent/graph.py‎
Lines changed: 9 additions & 0 deletions b/‎src/strands/multiagent/graph.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/strands/multiagent/swarm.py‎
Lines changed: 5 additions & 0 deletions b/‎src/strands/multiagent/swarm.py‎
Lines changed: 5 additions & 0 deletions
@@ -45,7 +45,7 @@
 from ..hooks.registry import TEvent
 from ..interrupt import _InterruptState
 from ..models.bedrock import BedrockModel
-from ..models.model import Model
+from ..models.model import Model, _ModelPlugin
 from ..plugins import Plugin
 from ..plugins.registry import _PluginRegistry
 from ..session.session_manager import SessionManager
@@ -68,6 +68,7 @@
 from .base import AgentBase
 from .conversation_manager import (
     ConversationManager,
+    NullConversationManager,
     SlidingWindowConversationManager,
 )
 from .state import AgentState
@@ -229,7 +230,19 @@ def __init__(
         else:
             self.callback_handler = callback_handler
 
-        self.conversation_manager = conversation_manager if conversation_manager else SlidingWindowConversationManager()
+        if self.model.stateful and conversation_manager is not None:
+            raise ValueError(
+                "conversation_manager cannot be used with a stateful model. "
+                "The model manages conversation state server-side."
+            )
+
+        self.conversation_manager: ConversationManager
+        if self.model.stateful:
+            self.conversation_manager = NullConversationManager()
+        elif conversation_manager:
+            self.conversation_manager = conversation_manager
+        else:
+            self.conversation_manager = SlidingWindowConversationManager()
 
         # Process trace attributes to ensure they're of compatible types
         self.trace_attributes: dict[str, AttributeValue] = {}
@@ -282,6 +295,9 @@ def __init__(
 
         self._interrupt_state = _InterruptState()
 
+        # Runtime state for model providers (e.g., server-side response ids)
+        self._model_state: dict[str, Any] = {}
+
         # Initialize lock for guarding concurrent invocations
         # Using threading.Lock instead of asyncio.Lock because run_async() creates
         # separate event loops in different threads, so asyncio.Lock wouldn't work
@@ -327,6 +343,9 @@ def __init__(
             for hook in hooks:
                 self.hooks.add_hook(hook)
 
+        # Register built-in plugins
+        self._plugin_registry.add_and_init(_ModelPlugin())
+
         if plugins:
             for plugin in plugins:
                 self._plugin_registry.add_and_init(plugin)
 
@@ -338,6 +338,7 @@ async def _handle_model_execution(
                     system_prompt_content=agent._system_prompt_content,
                     tool_choice=structured_output_context.tool_choice,
                     invocation_state=invocation_state,
+                    model_state=agent._model_state,
                     cancel_signal=agent._cancel_signal,
                 ):
                     yield event
 
@@ -463,6 +463,7 @@ async def stream_messages(
     tool_choice: Any | None = None,
     system_prompt_content: list[SystemContentBlock] | None = None,
     invocation_state: dict[str, Any] | None = None,
+    model_state: dict[str, Any] | None = None,
     cancel_signal: threading.Event | None = None,
     **kwargs: Any,
 ) -> AsyncGenerator[TypedEvent, None]:
@@ -477,6 +478,7 @@ async def stream_messages(
         system_prompt_content: The authoritative system prompt content blocks that always contains the
             system prompt data.
         invocation_state: Caller-provided state/context that was passed to the agent when it was invoked.
+        model_state: Runtime state for model providers (e.g., server-side response ids).
         cancel_signal: Optional threading.Event to check for cancellation during streaming.
         **kwargs: Additional keyword arguments for future extensibility.
 
@@ -495,6 +497,7 @@ async def stream_messages(
         tool_choice=tool_choice,
         system_prompt_content=system_prompt_content,
         invocation_state=invocation_state,
+        model_state=model_state,
     )
 
     async for event in process_stream(chunks, start_time, cancel_signal):
 
@@ -4,14 +4,19 @@
 import logging
 from collections.abc import AsyncGenerator, AsyncIterable
 from dataclasses import dataclass
-from typing import Any, Literal, TypeVar
+from typing import TYPE_CHECKING, Any, Literal, TypeVar
 
 from pydantic import BaseModel
 
+from ..hooks.events import AfterInvocationEvent
+from ..plugins.plugin import Plugin
 from ..types.content import Messages, SystemContentBlock
 from ..types.streaming import StreamEvent
 from ..types.tools import ToolChoice, ToolSpec
 
+if TYPE_CHECKING:
+    from ..agent.agent import Agent
+
 logger = logging.getLogger(__name__)
 
 T = TypeVar("T", bound=BaseModel)
@@ -37,6 +42,15 @@ class Model(abc.ABC):
     standardized way to configure and process requests for different AI model providers.
     """
 
+    @property
+    def stateful(self) -> bool:
+        """Whether the model manages conversation state server-side.
+
+        Returns:
+            False by default. Model providers that support server-side state should override this.
+        """
+        return False
+
     @abc.abstractmethod
     # pragma: no cover
     def update_config(self, **model_config: Any) -> None:
@@ -115,3 +129,34 @@ def stream(
             ModelThrottledException: When the model service is throttling requests from the client.
         """
         pass
+
+
+class _ModelPlugin(Plugin):
+    """Plugin that manages model-related lifecycle hooks."""
+
+    @property
+    def name(self) -> str:
+        """A stable string identifier for this plugin."""
+        return "strands:model"
+
+    @staticmethod
+    def _on_after_invocation(event: AfterInvocationEvent) -> None:
+        """Handle post-invocation model management tasks.
+
+        Performs the following:
+        - Clears messages when the model is managing conversation state server-side.
+        """
+        if event.agent.model.stateful:
+            event.agent.messages.clear()
+            logger.debug(
+                "response_id=<%s> | cleared messages for server-managed conversation",
+                event.agent._model_state.get("response_id"),
+            )
+
+    def init_agent(self, agent: "Agent") -> None:
+        """Register model lifecycle hooks with the agent.
+
+        Args:
+            agent: The agent instance to register hooks with.
+        """
+        agent.add_hook(self._on_after_invocation, AfterInvocationEvent)
@@ -1,18 +1,8 @@
 """OpenAI model provider using the Responses API.
 
-The Responses API is OpenAI's newer API that differs from the Chat Completions API in several key ways:
+Note: Built-in tools (web search, code interpreter, file search) are not yet supported.
 
-1. The Responses API can maintain conversation state server-side through "previous_response_id",
-   while Chat Completions is stateless and requires sending full conversation history each time.
-   Note: This implementation currently only implements the stateless approach.
-
-2. Responses API uses "input" (list of items) instead of "messages", and system
-   prompts are passed as "instructions" rather than a system role message.
-
-3. Responses API supports built-in tools (web search, code interpreter, file search)
-   Note: These are not yet implemented in this provider.
-
-- Docs: https://platform.openai.com/docs/api-reference/responses
+Docs: https://platform.openai.com/docs/api-reference/responses
 """
 
 import base64
@@ -132,10 +122,14 @@ class OpenAIResponsesConfig(TypedDict, total=False):
             params: Model parameters (e.g., max_output_tokens, temperature, etc.).
                 For a complete list of supported parameters, see
                 https://platform.openai.com/docs/api-reference/responses/create.
+            stateful: Whether to enable server-side conversation state management.
+                When True, the server stores conversation history and the client does not need to
+                send the full message history with each request. Defaults to False.
         """
 
         model_id: str
         params: dict[str, Any] | None
+        stateful: bool
 
     def __init__(
         self, client_args: dict[str, Any] | None = None, **model_config: Unpack[OpenAIResponsesConfig]
@@ -153,6 +147,15 @@ def __init__(
 
         logger.debug("config=<%s> | initializing", self.config)
 
+    @property
+    @override
+    def stateful(self) -> bool:
+        """Whether server-side conversation storage is enabled.
+
+        Derived from the ``stateful`` configuration option.
+        """
+        return bool(self.config.get("stateful"))
+
     @override
     def update_config(self, **model_config: Unpack[OpenAIResponsesConfig]) -> None:  # type: ignore[override]
         """Update the OpenAI Responses API model configuration with the provided arguments.
@@ -180,6 +183,7 @@ async def stream(
         system_prompt: str | None = None,
         *,
         tool_choice: ToolChoice | None = None,
+        model_state: dict[str, Any] | None = None,
         **kwargs: Any,
     ) -> AsyncGenerator[StreamEvent, None]:
         """Stream conversation with the OpenAI Responses API model.
@@ -189,6 +193,7 @@ async def stream(
             tool_specs: List of tool specifications to make available to the model.
             system_prompt: System prompt to provide context to the model.
             tool_choice: Selection strategy for tool invocation.
+            model_state: Runtime state for model providers (e.g., server-side response ids).
             **kwargs: Additional keyword arguments for future extensibility.
 
         Yields:
@@ -199,7 +204,7 @@ async def stream(
             ModelThrottledException: If the request is throttled by OpenAI (rate limits).
         """
         logger.debug("formatting request for OpenAI Responses API")
-        request = self._format_request(messages, tool_specs, system_prompt, tool_choice)
+        request = self._format_request(messages, tool_specs, system_prompt, tool_choice, model_state)
         logger.debug("formatted request=<%s>", request)
 
         logger.debug("invoking OpenAI Responses API model")
@@ -219,7 +224,14 @@ async def stream(
 
                 async for event in response:
                     if hasattr(event, "type"):
-                        if event.type == "response.reasoning_text.delta":
+                        if event.type == "response.created":
+                            # Capture response id for server-side conversation chaining
+                            if hasattr(event, "response"):
+                                response_id = getattr(event.response, "id", None)
+                                if model_state is not None and response_id:
+                                    model_state["response_id"] = response_id
+
+                        elif event.type == "response.reasoning_text.delta":
                             # Reasoning content streaming (for o1/o3 reasoning models)
                             chunks, data_type = self._stream_switch_content("reasoning_content", data_type)
                             for chunk in chunks:
@@ -383,6 +395,7 @@ def _format_request(
         tool_specs: list[ToolSpec] | None = None,
         system_prompt: str | None = None,
         tool_choice: ToolChoice | None = None,
+        model_state: dict[str, Any] | None = None,
     ) -> dict[str, Any]:
         """Format an OpenAI Responses API compatible response streaming request.
 
@@ -391,6 +404,7 @@ def _format_request(
             tool_specs: List of tool specifications to make available to the model.
             system_prompt: System prompt to provide context to the model.
             tool_choice: Selection strategy for tool invocation.
+            model_state: Runtime state for model providers (e.g., server-side response ids).
 
         Returns:
             An OpenAI Responses API compatible response streaming request.
@@ -400,13 +414,18 @@ def _format_request(
                 format.
         """
         input_items = self._format_request_messages(messages)
-        request = {
+        request: dict[str, Any] = {
             "model": self.config["model_id"],
             "input": input_items,
             "stream": True,
             **cast(dict[str, Any], self.config.get("params", {})),
+            "store": self.stateful,
         }
 
+        response_id = model_state.get("response_id") if model_state else None
+        if response_id and self.stateful:
+            request["previous_response_id"] = response_id
+
         if system_prompt:
             request["instructions"] = system_prompt
 
 
@@ -170,6 +170,7 @@ class GraphNode:
     execution_time: int = 0
     _initial_messages: Messages = field(default_factory=list, init=False)
     _initial_state: AgentState = field(default_factory=AgentState, init=False)
+    _initial_model_state: dict[str, Any] = field(default_factory=dict, init=False)
 
     def __post_init__(self) -> None:
         """Capture initial executor state after initialization."""
@@ -180,6 +181,9 @@ def __post_init__(self) -> None:
         if hasattr(self.executor, "state") and hasattr(self.executor.state, "get"):
             self._initial_state = AgentState(self.executor.state.get())
 
+        if hasattr(self.executor, "_model_state"):
+            self._initial_model_state = copy.deepcopy(self.executor._model_state)
+
     def reset_executor_state(self) -> None:
         """Reset GraphNode executor state to initial state when graph was created.
 
@@ -192,6 +196,9 @@ def reset_executor_state(self) -> None:
         if hasattr(self.executor, "state"):
             self.executor.state = AgentState(self._initial_state.get())
 
+        if hasattr(self.executor, "_model_state"):
+            self.executor._model_state = copy.deepcopy(self._initial_model_state)
+
         # Reset execution status
         self.execution_status = Status.PENDING
         self.result = None
@@ -639,6 +646,7 @@ def _activate_interrupt(
                     "interrupt_state": node.executor._interrupt_state.to_dict(),
                     "state": node.executor.state.get(),
                     "messages": node.executor.messages,
+                    "model_state": node.executor._model_state,
                 }
             )
 
@@ -1074,6 +1082,7 @@ def _build_node_input(self, node: GraphNode) -> list[ContentBlock]:
                         node.executor.messages = node_context["messages"]
                         node.executor.state = AgentState(node_context["state"])
                         node.executor._interrupt_state = _InterruptState.from_dict(node_context["interrupt_state"])
+                        node.executor._model_state = node_context.get("model_state", {})
 
                     return node_responses
 
 
@@ -69,12 +69,14 @@ class SwarmNode:
     swarm: Optional["Swarm"] = None
     _initial_messages: Messages = field(default_factory=list, init=False)
     _initial_state: AgentState = field(default_factory=AgentState, init=False)
+    _initial_model_state: dict[str, Any] = field(default_factory=dict, init=False)
 
     def __post_init__(self) -> None:
         """Capture initial executor state after initialization."""
         # Deep copy the initial messages and state to preserve them
         self._initial_messages = copy.deepcopy(self.executor.messages)
         self._initial_state = AgentState(self.executor.state.get())
+        self._initial_model_state = copy.deepcopy(self.executor._model_state)
 
     def __hash__(self) -> int:
         """Return hash for SwarmNode based on node_id."""
@@ -104,10 +106,12 @@ def reset_executor_state(self) -> None:
             self.executor.messages = context["messages"]
             self.executor.state = AgentState(context["state"])
             self.executor._interrupt_state = _InterruptState.from_dict(context["interrupt_state"])
+            self.executor._model_state = context.get("model_state", {})
             return
 
         self.executor.messages = copy.deepcopy(self._initial_messages)
         self.executor.state = AgentState(self._initial_state.get())
+        self.executor._model_state = copy.deepcopy(self._initial_model_state)
 
 
 @dataclass
@@ -697,6 +701,7 @@ def _activate_interrupt(self, node: SwarmNode, interrupts: list[Interrupt]) -> M
             "interrupt_state": node.executor._interrupt_state.to_dict(),
             "state": node.executor.state.get(),
             "messages": node.executor.messages,
+            "model_state": node.executor._model_state,
         }
 
         self._interrupt_state.interrupts.update({interrupt.id: interrupt for interrupt in interrupts})