Skip to content

Commit 18c31b8

Browse files
committed
feat: add proactive context compression to conversation managers
1 parent 6e208a8 commit 18c31b8

5 files changed

Lines changed: 489 additions & 40 deletions

File tree

src/strands/agent/conversation_manager/conversation_manager.py

Lines changed: 105 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,22 @@
11
"""Abstract interface for conversation history management."""
22

3+
import logging
4+
import warnings
35
from abc import ABC, abstractmethod
46
from typing import TYPE_CHECKING, Any
57

8+
from ...hooks.events import BeforeModelCallEvent
69
from ...hooks.registry import HookProvider, HookRegistry
710
from ...types.content import Message
811

912
if TYPE_CHECKING:
1013
from ...agent.agent import Agent
14+
from ...models.model import Model
15+
16+
logger = logging.getLogger(__name__)
17+
18+
# Track whether the context_window_limit warning has been emitted
19+
_context_window_limit_warned = False
1120

1221

1322
class ConversationManager(ABC, HookProvider):
@@ -24,6 +33,11 @@ class ConversationManager(ABC, HookProvider):
2433
lifecycle events. Derived classes that override register_hooks must call the base implementation to ensure proper
2534
hook registration.
2635
36+
Optionally, a manager can enable proactive compression by setting ``compression_threshold``
37+
in the constructor. When set, the base class registers a ``BeforeModelCallEvent`` hook that
38+
checks projected input tokens against the model's context window limit and calls
39+
:meth:`reduce_on_threshold` when the threshold is exceeded.
40+
2741
Example:
2842
```python
2943
class MyConversationManager(ConversationManager):
@@ -33,34 +47,117 @@ def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
3347
```
3448
"""
3549

36-
def __init__(self) -> None:
50+
def __init__(self, *, compression_threshold: float | None = None) -> None:
3751
"""Initialize the ConversationManager.
3852
53+
Args:
54+
compression_threshold: Ratio of context window usage that triggers proactive compression.
55+
Value between 0 (exclusive) and 1 (inclusive). For example, 0.7 means compress when 70%
56+
of the context window is used. When not set, proactive compression is disabled and only
57+
reactive overflow recovery is used.
58+
59+
Raises:
60+
ValueError: If compression_threshold is not in the valid range (0, 1].
61+
3962
Attributes:
4063
removed_message_count: The messages that have been removed from the agents messages array.
4164
These represent messages provided by the user or LLM that have been removed, not messages
4265
included by the conversation manager through something like summarization.
4366
"""
67+
if compression_threshold is not None and (compression_threshold <= 0 or compression_threshold > 1):
68+
raise ValueError(
69+
f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {compression_threshold}"
70+
)
71+
4472
self.removed_message_count = 0
73+
self._compression_threshold = compression_threshold
74+
75+
def reduce_on_threshold(self, agent: "Agent", model: "Model", **kwargs: Any) -> bool:
76+
"""Proactively reduce the conversation history before a model call.
77+
78+
Called when projected input tokens exceed the configured compression_threshold
79+
of the model's context window limit. Subclasses implement this to reduce
80+
context before the model call, avoiding overflow errors.
81+
82+
The default implementation returns False. Subclasses that support proactive
83+
compression should override this method.
84+
85+
Args:
86+
agent: The agent whose conversation history will be reduced.
87+
The agent's messages list should be modified in-place.
88+
model: The model instance for the upcoming call.
89+
**kwargs: Additional keyword arguments for future extensibility.
90+
91+
Returns:
92+
True if the history was reduced, False otherwise.
93+
"""
94+
return False
4595

4696
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
4797
"""Register hooks for agent lifecycle events.
4898
99+
When ``compression_threshold`` is configured and the subclass overrides
100+
``reduce_on_threshold``, registers a ``BeforeModelCallEvent`` hook for
101+
proactive compression.
102+
49103
Derived classes that override this method must call the base implementation to ensure proper hook
50104
registration chain.
51105
52106
Args:
53107
registry: The hook registry to register callbacks with.
54108
**kwargs: Additional keyword arguments for future extensibility.
109+
"""
110+
if self._compression_threshold is None:
111+
return
55112

56-
Example:
57-
```python
58-
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
59-
super().register_hooks(registry, **kwargs)
60-
registry.add_callback(SomeEvent, self.on_some_event)
61-
```
113+
# Check if the subclass actually overrides reduce_on_threshold
114+
has_override = type(self).reduce_on_threshold is not ConversationManager.reduce_on_threshold
115+
if not has_override:
116+
logger.warning(
117+
"conversation_manager=<%s> | compression_threshold is configured but reduce_on_threshold is not"
118+
" implemented, proactive compression is disabled",
119+
type(self).__name__,
120+
)
121+
return
122+
123+
registry.add_callback(BeforeModelCallEvent, self._on_before_model_call_threshold)
124+
125+
def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
126+
"""Handle BeforeModelCallEvent for proactive compression.
127+
128+
Args:
129+
event: The before model call event.
62130
"""
63-
pass
131+
global _context_window_limit_warned # noqa: PLW0603
132+
133+
context_window_limit = event.agent.model.context_window_limit
134+
if context_window_limit is None:
135+
if not _context_window_limit_warned:
136+
_context_window_limit_warned = True
137+
warnings.warn(
138+
"context_window_limit is not set on the model, proactive compression is disabled."
139+
" Set context_window_limit in your model config",
140+
stacklevel=2,
141+
)
142+
return
143+
144+
if event.projected_input_tokens is None:
145+
return
146+
147+
ratio = event.projected_input_tokens / context_window_limit
148+
if ratio >= self._compression_threshold: # type: ignore[operator]
149+
logger.debug(
150+
"projected_tokens=<%s>, limit=<%s>, ratio=<%.2f>, compression_threshold=<%s>"
151+
" | compression threshold exceeded, reducing context",
152+
event.projected_input_tokens,
153+
context_window_limit,
154+
ratio,
155+
self._compression_threshold,
156+
)
157+
try:
158+
self.reduce_on_threshold(agent=event.agent, model=event.agent.model)
159+
except Exception:
160+
logger.debug("proactive compression failed, will proceed with model call", exc_info=True)
64161

65162
def restore_from_session(self, state: dict[str, Any]) -> list[Message] | None:
66163
"""Restore the Conversation Manager's state from a session.

src/strands/agent/conversation_manager/sliding_window_conversation_manager.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
if TYPE_CHECKING:
77
from ...agent.agent import Agent
8+
from ...models.model import Model
89

910
from ...hooks import BeforeModelCallEvent, HookRegistry
1011
from ...types.content import ContentBlock, Messages
@@ -37,6 +38,7 @@ def __init__(
3738
should_truncate_results: bool = True,
3839
*,
3940
per_turn: bool | int = False,
41+
compression_threshold: float | None = None,
4042
):
4143
"""Initialize the sliding window conversation manager.
4244
@@ -54,6 +56,8 @@ def __init__(
5456
manage message history and prevent the agent loop from slowing down. Start with
5557
per_turn=True and adjust to a specific frequency (e.g., per_turn=5) if needed
5658
for performance tuning.
59+
compression_threshold: Ratio of context window usage that triggers proactive compression.
60+
See :class:`ConversationManager` for details.
5761
5862
Raises:
5963
ValueError: If window_size is negative, or if per_turn is 0 or a negative integer.
@@ -63,7 +67,7 @@ def __init__(
6367
if isinstance(per_turn, int) and not isinstance(per_turn, bool) and per_turn <= 0:
6468
raise ValueError(f"per_turn must be a positive integer, True, or False, got {per_turn}")
6569

66-
super().__init__()
70+
super().__init__(compression_threshold=compression_threshold)
6771

6872
self.window_size = window_size
6973
self.should_truncate_results = should_truncate_results
@@ -155,6 +159,21 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
155159
return
156160
self.reduce_context(agent)
157161

162+
def reduce_on_threshold(self, agent: "Agent", model: "Model", **kwargs: Any) -> bool:
163+
"""Proactively reduce context by trimming oldest messages.
164+
165+
Args:
166+
agent: The agent whose conversation history will be reduced.
167+
model: The model instance for the upcoming call.
168+
**kwargs: Additional keyword arguments for future extensibility.
169+
170+
Returns:
171+
True if the history was reduced, False otherwise.
172+
"""
173+
initial_count = len(agent.messages)
174+
self.reduce_context(agent)
175+
return len(agent.messages) < initial_count
176+
158177
def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
159178
"""Trim the oldest messages to reduce the conversation context size.
160179

src/strands/agent/conversation_manager/summarizing_conversation_manager.py

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from .conversation_manager import ConversationManager
1616

1717
if TYPE_CHECKING:
18+
from ...models.model import Model
1819
from ..agent import Agent
1920

2021

@@ -65,6 +66,8 @@ def __init__(
6566
preserve_recent_messages: int = 10,
6667
summarization_agent: Optional["Agent"] = None,
6768
summarization_system_prompt: str | None = None,
69+
*,
70+
compression_threshold: float | None = None,
6871
):
6972
"""Initialize the summarizing conversation manager.
7073
@@ -77,8 +80,10 @@ def __init__(
7780
If provided, this agent can use tools as part of the summarization process.
7881
summarization_system_prompt: Optional system prompt override for summarization.
7982
If None, uses the default summarization prompt.
83+
compression_threshold: Ratio of context window usage that triggers proactive compression.
84+
See :class:`ConversationManager` for details.
8085
"""
81-
super().__init__()
86+
super().__init__(compression_threshold=compression_threshold)
8287
if summarization_agent is not None and summarization_system_prompt is not None:
8388
raise ValueError(
8489
"Cannot provide both summarization_agent and summarization_system_prompt. "
@@ -136,44 +141,74 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
136141
ContextWindowOverflowException: If the context cannot be summarized.
137142
"""
138143
try:
139-
# Calculate how many messages to summarize
140-
messages_to_summarize_count = max(1, int(len(agent.messages) * self.summary_ratio))
144+
self._summarize_oldest(agent)
145+
except Exception as summarization_error:
146+
logger.error("Summarization failed: %s", summarization_error)
147+
raise summarization_error from e
141148

142-
# Ensure we don't summarize recent messages
143-
messages_to_summarize_count = min(
144-
messages_to_summarize_count, len(agent.messages) - self.preserve_recent_messages
145-
)
149+
def reduce_on_threshold(self, agent: "Agent", model: "Model", **kwargs: Any) -> bool:
150+
"""Proactively reduce context by summarizing oldest messages.
146151
147-
if messages_to_summarize_count <= 0:
148-
raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")
152+
Best-effort: swallows errors so the model call can still proceed.
149153
150-
# Adjust split point to avoid breaking ToolUse/ToolResult pairs
151-
messages_to_summarize_count = self._adjust_split_point_for_tool_pairs(
152-
agent.messages, messages_to_summarize_count
153-
)
154+
Args:
155+
agent: The agent whose conversation history will be reduced.
156+
model: The model instance for the upcoming call.
157+
**kwargs: Additional keyword arguments for future extensibility.
158+
159+
Returns:
160+
True if the history was reduced, False otherwise.
161+
"""
162+
try:
163+
self._summarize_oldest(agent)
164+
return True
165+
except Exception as summarization_error:
166+
logger.error("error=<%s> | proactive summarization failed", summarization_error)
167+
return False
154168

155-
if messages_to_summarize_count <= 0:
156-
raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")
169+
def _summarize_oldest(self, agent: "Agent") -> None:
170+
"""Summarize the oldest messages and replace them with a summary.
157171
158-
# Extract messages to summarize
159-
messages_to_summarize = agent.messages[:messages_to_summarize_count]
160-
remaining_messages = agent.messages[messages_to_summarize_count:]
172+
Args:
173+
agent: The agent instance.
161174
162-
# Keep track of the number of messages that have been summarized thus far.
163-
self.removed_message_count += len(messages_to_summarize)
164-
# If there is a summary message, don't count it in the removed_message_count.
165-
if self._summary_message:
166-
self.removed_message_count -= 1
175+
Raises:
176+
ContextWindowOverflowException: If there are insufficient messages for summarization.
177+
"""
178+
# Calculate how many messages to summarize
179+
messages_to_summarize_count = max(1, int(len(agent.messages) * self.summary_ratio))
167180

168-
# Generate summary
169-
self._summary_message = self._generate_summary(messages_to_summarize, agent)
181+
# Ensure we don't summarize recent messages
182+
messages_to_summarize_count = min(
183+
messages_to_summarize_count, len(agent.messages) - self.preserve_recent_messages
184+
)
170185

171-
# Replace the summarized messages with the summary
172-
agent.messages[:] = [self._summary_message] + remaining_messages
186+
if messages_to_summarize_count <= 0:
187+
raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")
173188

174-
except Exception as summarization_error:
175-
logger.error("Summarization failed: %s", summarization_error)
176-
raise summarization_error from e
189+
# Adjust split point to avoid breaking ToolUse/ToolResult pairs
190+
messages_to_summarize_count = self._adjust_split_point_for_tool_pairs(
191+
agent.messages, messages_to_summarize_count
192+
)
193+
194+
if messages_to_summarize_count <= 0:
195+
raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")
196+
197+
# Extract messages to summarize
198+
messages_to_summarize = agent.messages[:messages_to_summarize_count]
199+
remaining_messages = agent.messages[messages_to_summarize_count:]
200+
201+
# Keep track of the number of messages that have been summarized thus far.
202+
self.removed_message_count += len(messages_to_summarize)
203+
# If there is a summary message, don't count it in the removed_message_count.
204+
if self._summary_message:
205+
self.removed_message_count -= 1
206+
207+
# Generate summary
208+
self._summary_message = self._generate_summary(messages_to_summarize, agent)
209+
210+
# Replace the summarized messages with the summary
211+
agent.messages[:] = [self._summary_message] + remaining_messages
177212

178213
def _generate_summary(self, messages: list[Message], agent: "Agent") -> Message:
179214
"""Generate a summary of the provided messages.

0 commit comments

Comments
 (0)