sdk-python/src/strands/agent/conversation_manager/summarizing_conversation_manager.py at 7db57db4a6ef19b8281ec71bfc7138cd6dd6429a · strands-agents/sdk-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
"""Summarizing conversation history management with configurable options."""

import logging
from typing import TYPE_CHECKING, Any, Optional, cast

from typing_extensions import override

from ..._async import run_async
from ...event_loop.streaming import process_stream
from ...tools._tool_helpers import noop_tool
from ...tools.registry import ToolRegistry
from ...types.content import Message
from ...types.exceptions import ContextWindowOverflowException
from ...types.tools import AgentTool
from .conversation_manager import ConversationManager

if TYPE_CHECKING:
    from ..agent import Agent


logger = logging.getLogger(__name__)


DEFAULT_SUMMARIZATION_PROMPT = """You are a conversation summarizer. Provide a concise summary of the conversation \
history.

Format Requirements:
- You MUST create a structured and concise summary in bullet-point format.
- You MUST NOT respond conversationally.
- You MUST NOT address the user directly.
- You MUST NOT comment on tool availability.

Assumptions:
- You MUST NOT assume tool executions failed unless otherwise stated.

Task:
Your task is to create a structured summary document:
- It MUST contain bullet points with key topics and questions covered
- It MUST contain bullet points for all significant tools executed and their results
- It MUST contain bullet points for any code or technical information shared
- It MUST contain a section of key insights gained
- It MUST format the summary in the third person

Example format:

## Conversation Summary
* Topic 1: Key information
* Topic 2: Key information
*
## Tools Executed
* Tool X: Result Y"""


class SummarizingConversationManager(ConversationManager):
    """Implements a summarizing window manager.

    This manager provides a configurable option to summarize older context instead of
    simply trimming it, helping preserve important information while staying within
    context limits.
    """

    def __init__(
        self,
        summary_ratio: float = 0.3,
        preserve_recent_messages: int = 10,
        summarization_agent: Optional["Agent"] = None,
        summarization_system_prompt: str | None = None,
        *,
        compression_threshold: float | None = None,
    ):
        """Initialize the summarizing conversation manager.

        Args:
            summary_ratio: Ratio of messages to summarize vs keep when context overflow occurs.
                Value between 0.1 and 0.8. Defaults to 0.3 (summarize 30% of oldest messages).
            preserve_recent_messages: Minimum number of recent messages to always keep.
                Defaults to 10 messages.
            summarization_agent: Optional agent to use for summarization instead of the parent agent.
                If provided, this agent can use tools as part of the summarization process.
            summarization_system_prompt: Optional system prompt override for summarization.
                If None, uses the default summarization prompt.
            compression_threshold: Ratio of context window usage that triggers proactive compression.
                See :class:`ConversationManager` for details.
        """
        super().__init__(compression_threshold=compression_threshold)
        if summarization_agent is not None and summarization_system_prompt is not None:
            raise ValueError(
                "Cannot provide both summarization_agent and summarization_system_prompt. "
                "Agents come with their own system prompt."
            )

        self.summary_ratio = max(0.1, min(0.8, summary_ratio))
        self.preserve_recent_messages = preserve_recent_messages
        self.summarization_agent = summarization_agent
        self.summarization_system_prompt = summarization_system_prompt
        self._summary_message: Message | None = None

    @override
    def restore_from_session(self, state: dict[str, Any]) -> list[Message] | None:
        """Restores the Summarizing Conversation manager from its previous state in a session.

        Args:
            state: The previous state of the Summarizing Conversation Manager.

        Returns:
            Optionally returns the previous conversation summary if it exists.
        """
        super().restore_from_session(state)
        self._summary_message = state.get("summary_message")
        return [self._summary_message] if self._summary_message else None

    def get_state(self) -> dict[str, Any]:
        """Returns a dictionary representation of the state for the Summarizing Conversation Manager."""
        return {"summary_message": self._summary_message, **super().get_state()}

    def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
        """Apply management strategy to conversation history.

        For the summarizing conversation manager, no proactive management is performed.
        Summarization only occurs when there's a context overflow that triggers reduce_context.

        Args:
            agent: The agent whose conversation history will be managed.
                The agent's messages list is modified in-place.
            **kwargs: Additional keyword arguments for future extensibility.
        """
        # No proactive management - summarization only happens on context overflow
        pass

    def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
        """Reduce context using summarization.

        Args:
            agent: The agent whose conversation history will be reduced.
                The agent's messages list is modified in-place.
            e: The exception that triggered the context reduction, if any.
            **kwargs: Additional keyword arguments for future extensibility.

        Raises:
            ContextWindowOverflowException: If the context cannot be summarized.
        """
        try:
            self._summarize_oldest(agent)
        except Exception as summarization_error:
            logger.error("Summarization failed: %s", summarization_error)
            raise summarization_error from e

    def reduce_on_threshold(self, agent: "Agent", **kwargs: Any) -> bool:
        """Proactively reduce context by summarizing oldest messages.

        Args:
            agent: The agent whose conversation history will be reduced.
            **kwargs: Additional keyword arguments for future extensibility.

        Returns:
            True if the history was reduced, False otherwise.
        """
        self._summarize_oldest(agent)
        return True

    def _summarize_oldest(self, agent: "Agent") -> None:
        """Summarize the oldest messages and replace them with a summary.

        Args:
            agent: The agent instance.

        Raises:
            ContextWindowOverflowException: If there are insufficient messages for summarization.
        """
        # Calculate how many messages to summarize
        messages_to_summarize_count = max(1, int(len(agent.messages) * self.summary_ratio))

        # Ensure we don't summarize recent messages
        messages_to_summarize_count = min(
            messages_to_summarize_count, len(agent.messages) - self.preserve_recent_messages
        )

        if messages_to_summarize_count <= 0:
            raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")

        # Adjust split point to avoid breaking ToolUse/ToolResult pairs
        messages_to_summarize_count = self._adjust_split_point_for_tool_pairs(
            agent.messages, messages_to_summarize_count
        )

        if messages_to_summarize_count <= 0:
            raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")

        # Extract messages to summarize
        messages_to_summarize = agent.messages[:messages_to_summarize_count]
        remaining_messages = agent.messages[messages_to_summarize_count:]

        # Keep track of the number of messages that have been summarized thus far.
        self.removed_message_count += len(messages_to_summarize)
        # If there is a summary message, don't count it in the removed_message_count.
        if self._summary_message:
            self.removed_message_count -= 1

        # Generate summary
        self._summary_message = self._generate_summary(messages_to_summarize, agent)

        # Replace the summarized messages with the summary
        agent.messages[:] = [self._summary_message] + remaining_messages

    def _generate_summary(self, messages: list[Message], agent: "Agent") -> Message:
        """Generate a summary of the provided messages.

        When a dedicated summarization_agent was provided at init time, it is invoked as before
        (full agent pipeline, tool execution, etc.).

        In the default case (no summarization_agent), the parent agent's *model* is called
        directly via ``model.stream()``.  This avoids re-entering the agent pipeline which
        would deadlock on ``_invocation_lock`` and corrupt metrics / traces / interrupt state.

        Args:
            messages: The messages to summarize.
            agent: The agent instance whose model will be used for summarization when no
                dedicated summarization_agent was configured.

        Returns:
            A message containing the conversation summary.

        Raises:
            Exception: If summary generation fails.
        """
        if self.summarization_agent is not None:
            return self._generate_summary_with_agent(messages)

        return self._generate_summary_with_model(messages, agent)

    # ------------------------------------------------------------------
    # Path 1 – dedicated summarization agent (backward-compatible)
    # ------------------------------------------------------------------

    def _generate_summary_with_agent(self, messages: list[Message]) -> Message:
        """Generate a summary using the dedicated summarization agent.

        Args:
            messages: The messages to summarize.

        Returns:
            A message containing the conversation summary.
        """
        summarization_agent = self.summarization_agent
        assert summarization_agent is not None  # guaranteed by caller

        original_system_prompt = summarization_agent.system_prompt
        original_messages = summarization_agent.messages.copy()
        original_tool_registry = summarization_agent.tool_registry
        original_structured_output_model = getattr(summarization_agent, "_default_structured_output_model", None)

        try:
            # Disable structured output for summarization. Summaries are plain text and
            # structured output adds toolUse blocks that are invalid in user messages.
            if hasattr(summarization_agent, "_default_structured_output_model"):
                summarization_agent._default_structured_output_model = None

            # Add no-op tool if agent has no tools to satisfy tool spec requirement
            if not summarization_agent.tool_names:
                tool_registry = ToolRegistry()
                tool_registry.register_tool(cast(AgentTool, noop_tool))
                summarization_agent.tool_registry = tool_registry

            summarization_agent.messages = messages

            result = summarization_agent("Please summarize this conversation.")
            return cast(Message, {**result.message, "role": "user"})

        finally:
            summarization_agent.system_prompt = original_system_prompt
            summarization_agent.messages = original_messages
            summarization_agent.tool_registry = original_tool_registry
            if hasattr(summarization_agent, "_default_structured_output_model"):
                summarization_agent._default_structured_output_model = original_structured_output_model

    # ------------------------------------------------------------------
    # Path 2 – default case: call model.stream() directly
    # ------------------------------------------------------------------

    def _generate_summary_with_model(self, messages: list[Message], agent: "Agent") -> Message:
        """Generate a summary by calling the agent's model directly.

        This bypasses the full agent pipeline (lock, metrics, traces, tool loop) and
        simply asks the underlying model to summarize the conversation.

        Args:
            messages: The messages to summarize.
            agent: The parent agent whose model is used.

        Returns:
            A message containing the conversation summary.
        """
        system_prompt = (
            self.summarization_system_prompt
            if self.summarization_system_prompt is not None
            else DEFAULT_SUMMARIZATION_PROMPT
        )

        # Build the message list: conversation history + summarization request
        summarization_messages = list(messages) + [
            {"role": "user", "content": [{"text": "Please summarize this conversation."}]}
        ]

        async def _call_model() -> Message:
            chunks = agent.model.stream(
                summarization_messages,
                tool_specs=None,
                system_prompt=system_prompt,
            )

            result_message: Message | None = None
            async for event in process_stream(chunks):
                if "stop" in event:
                    _, result_message, _, _ = event["stop"]

            if result_message is None:
                raise RuntimeError("Failed to generate summary: no response from model")
            return result_message

        message = run_async(_call_model)
        return cast(Message, {**message, "role": "user"})

    def _adjust_split_point_for_tool_pairs(self, messages: list[Message], split_point: int) -> int:
        """Adjust the split point to avoid breaking ToolUse/ToolResult pairs.

        Uses the same logic as SlidingWindowConversationManager for consistency.

        Args:
            messages: The full list of messages.
            split_point: The initially calculated split point.

        Returns:
            The adjusted split point that doesn't break ToolUse/ToolResult pairs.

        Raises:
            ContextWindowOverflowException: If no valid split point can be found.
        """
        if split_point > len(messages):
            raise ContextWindowOverflowException("Split point exceeds message array length")

        if split_point == len(messages):
            return split_point

        # Find the next valid split_point
        while split_point < len(messages):
            if (
                # Oldest message cannot be a toolResult because it needs a toolUse preceding it
                any("toolResult" in content for content in messages[split_point]["content"])
                or (
                    # Oldest message can be a toolUse only if a toolResult immediately follows it.
                    any("toolUse" in content for content in messages[split_point]["content"])
                    and split_point + 1 < len(messages)
                    and not any("toolResult" in content for content in messages[split_point + 1]["content"])
                )
            ):
                split_point += 1
            else:
                break
        else:
            # If we didn't find a valid split_point, then we throw
            raise ContextWindowOverflowException("Unable to trim conversation context!")

        return split_point