Skip to content

Commit 7db57db

Browse files
committed
feat: add proactive context compression to conversation managers
1 parent 6e208a8 commit 7db57db

5 files changed

Lines changed: 493 additions & 40 deletions

File tree

src/strands/agent/conversation_manager/conversation_manager.py

Lines changed: 109 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
"""Abstract interface for conversation history management."""
22

3+
import logging
34
from abc import ABC, abstractmethod
45
from typing import TYPE_CHECKING, Any
56

7+
from ...hooks.events import BeforeModelCallEvent
68
from ...hooks.registry import HookProvider, HookRegistry
79
from ...types.content import Message
810

911
if TYPE_CHECKING:
1012
from ...agent.agent import Agent
1113

14+
logger = logging.getLogger(__name__)
15+
16+
DEFAULT_CONTEXT_WINDOW_LIMIT = 200_000
17+
1218

1319
class ConversationManager(ABC, HookProvider):
1420
"""Abstract base class for managing conversation history.
@@ -24,6 +30,11 @@ class ConversationManager(ABC, HookProvider):
2430
lifecycle events. Derived classes that override register_hooks must call the base implementation to ensure proper
2531
hook registration.
2632
33+
Optionally, a manager can enable proactive compression by setting ``compression_threshold``
34+
in the constructor. When set, the base class registers a ``BeforeModelCallEvent`` hook that
35+
checks projected input tokens against the model's context window limit and calls
36+
:meth:`reduce_on_threshold` when the threshold is exceeded.
37+
2738
Example:
2839
```python
2940
class MyConversationManager(ConversationManager):
@@ -33,34 +44,124 @@ def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
3344
```
3445
"""
3546

36-
def __init__(self) -> None:
47+
def __init__(self, *, compression_threshold: float | None = None) -> None:
3748
"""Initialize the ConversationManager.
3849
50+
Args:
51+
compression_threshold: Ratio of context window usage that triggers proactive compression.
52+
Value between 0 (exclusive) and 1 (inclusive). For example, 0.7 means compress when 70%
53+
of the context window is used. When not set, proactive compression is disabled and only
54+
reactive overflow recovery is used.
55+
56+
Raises:
57+
ValueError: If compression_threshold is not in the valid range (0, 1].
58+
3959
Attributes:
4060
removed_message_count: The messages that have been removed from the agents messages array.
4161
These represent messages provided by the user or LLM that have been removed, not messages
4262
included by the conversation manager through something like summarization.
4363
"""
64+
if compression_threshold is not None and (compression_threshold <= 0 or compression_threshold > 1):
65+
raise ValueError(
66+
f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {compression_threshold}"
67+
)
68+
4469
self.removed_message_count = 0
70+
self._compression_threshold = compression_threshold
71+
self._context_window_limit_warned = False
72+
73+
def reduce_on_threshold(self, agent: "Agent", **kwargs: Any) -> bool:
74+
"""Proactively reduce the conversation history before a model call.
75+
76+
Called when projected input tokens exceed the configured compression_threshold
77+
of the model's context window limit. Subclasses implement this to reduce
78+
context before the model call, avoiding overflow errors.
79+
80+
The base class catches any exceptions raised by this method and logs them
81+
at debug level, so subclass implementations do not need to defensively
82+
swallow errors — they can let them propagate. When an exception occurs,
83+
the return value is never observed by the caller.
84+
85+
The default implementation returns False. Subclasses that support proactive
86+
compression should override this method.
87+
88+
Args:
89+
agent: The agent whose conversation history will be reduced.
90+
The agent's messages list should be modified in-place.
91+
**kwargs: Additional keyword arguments for future extensibility.
92+
93+
Returns:
94+
True if the history was reduced, False otherwise. Only observed on success;
95+
if the method raises, the base class catches the exception and the return
96+
value is ignored.
97+
"""
98+
return False
4599

46100
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
47101
"""Register hooks for agent lifecycle events.
48102
103+
When ``compression_threshold`` is configured and the subclass overrides
104+
``reduce_on_threshold``, registers a ``BeforeModelCallEvent`` hook for
105+
proactive compression.
106+
49107
Derived classes that override this method must call the base implementation to ensure proper hook
50108
registration chain.
51109
52110
Args:
53111
registry: The hook registry to register callbacks with.
54112
**kwargs: Additional keyword arguments for future extensibility.
113+
"""
114+
if self._compression_threshold is None:
115+
return
55116

56-
Example:
57-
```python
58-
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
59-
super().register_hooks(registry, **kwargs)
60-
registry.add_callback(SomeEvent, self.on_some_event)
61-
```
117+
# Check if the subclass actually overrides reduce_on_threshold
118+
has_override = type(self).reduce_on_threshold is not ConversationManager.reduce_on_threshold
119+
if not has_override:
120+
logger.warning(
121+
"conversation_manager=<%s> | compression_threshold is configured but reduce_on_threshold is not"
122+
" implemented, proactive compression is disabled",
123+
type(self).__name__,
124+
)
125+
return
126+
127+
registry.add_callback(BeforeModelCallEvent, self._on_before_model_call_threshold)
128+
129+
def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
130+
"""Handle BeforeModelCallEvent for proactive compression.
131+
132+
Args:
133+
event: The before model call event.
62134
"""
63-
pass
135+
context_window_limit = event.agent.model.context_window_limit
136+
if context_window_limit is None:
137+
context_window_limit = DEFAULT_CONTEXT_WINDOW_LIMIT
138+
if not self._context_window_limit_warned:
139+
self._context_window_limit_warned = True
140+
logger.warning(
141+
"context_window_limit=<None>, default=<%s>"
142+
" | context_window_limit is not set on the model, using default"
143+
" | set context_window_limit in your model config for accurate threshold checks",
144+
DEFAULT_CONTEXT_WINDOW_LIMIT,
145+
)
146+
147+
if event.projected_input_tokens is None:
148+
logger.debug("projected_input_tokens=<None> | skipping proactive compression")
149+
return
150+
151+
ratio = event.projected_input_tokens / context_window_limit
152+
if ratio >= self._compression_threshold: # type: ignore[operator]
153+
logger.debug(
154+
"projected_tokens=<%s>, limit=<%s>, ratio=<%.2f>, compression_threshold=<%s>"
155+
" | compression threshold exceeded, reducing context",
156+
event.projected_input_tokens,
157+
context_window_limit,
158+
ratio,
159+
self._compression_threshold,
160+
)
161+
try:
162+
self.reduce_on_threshold(agent=event.agent)
163+
except Exception:
164+
logger.debug("proactive compression failed, will proceed with model call", exc_info=True)
64165

65166
def restore_from_session(self, state: dict[str, Any]) -> list[Message] | None:
66167
"""Restore the Conversation Manager's state from a session.

src/strands/agent/conversation_manager/sliding_window_conversation_manager.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def __init__(
3737
should_truncate_results: bool = True,
3838
*,
3939
per_turn: bool | int = False,
40+
compression_threshold: float | None = None,
4041
):
4142
"""Initialize the sliding window conversation manager.
4243
@@ -54,6 +55,8 @@ def __init__(
5455
manage message history and prevent the agent loop from slowing down. Start with
5556
per_turn=True and adjust to a specific frequency (e.g., per_turn=5) if needed
5657
for performance tuning.
58+
compression_threshold: Ratio of context window usage that triggers proactive compression.
59+
See :class:`ConversationManager` for details.
5760
5861
Raises:
5962
ValueError: If window_size is negative, or if per_turn is 0 or a negative integer.
@@ -63,7 +66,7 @@ def __init__(
6366
if isinstance(per_turn, int) and not isinstance(per_turn, bool) and per_turn <= 0:
6467
raise ValueError(f"per_turn must be a positive integer, True, or False, got {per_turn}")
6568

66-
super().__init__()
69+
super().__init__(compression_threshold=compression_threshold)
6770

6871
self.window_size = window_size
6972
self.should_truncate_results = should_truncate_results
@@ -155,6 +158,20 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
155158
return
156159
self.reduce_context(agent)
157160

161+
def reduce_on_threshold(self, agent: "Agent", **kwargs: Any) -> bool:
162+
"""Proactively reduce context by trimming oldest messages.
163+
164+
Args:
165+
agent: The agent whose conversation history will be reduced.
166+
**kwargs: Additional keyword arguments for future extensibility.
167+
168+
Returns:
169+
True if the history was reduced, False otherwise.
170+
"""
171+
initial_count = len(agent.messages)
172+
self.reduce_context(agent)
173+
return len(agent.messages) < initial_count
174+
158175
def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
159176
"""Trim the oldest messages to reduce the conversation context size.
160177

src/strands/agent/conversation_manager/summarizing_conversation_manager.py

Lines changed: 57 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ def __init__(
6565
preserve_recent_messages: int = 10,
6666
summarization_agent: Optional["Agent"] = None,
6767
summarization_system_prompt: str | None = None,
68+
*,
69+
compression_threshold: float | None = None,
6870
):
6971
"""Initialize the summarizing conversation manager.
7072
@@ -77,8 +79,10 @@ def __init__(
7779
If provided, this agent can use tools as part of the summarization process.
7880
summarization_system_prompt: Optional system prompt override for summarization.
7981
If None, uses the default summarization prompt.
82+
compression_threshold: Ratio of context window usage that triggers proactive compression.
83+
See :class:`ConversationManager` for details.
8084
"""
81-
super().__init__()
85+
super().__init__(compression_threshold=compression_threshold)
8286
if summarization_agent is not None and summarization_system_prompt is not None:
8387
raise ValueError(
8488
"Cannot provide both summarization_agent and summarization_system_prompt. "
@@ -136,44 +140,67 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
136140
ContextWindowOverflowException: If the context cannot be summarized.
137141
"""
138142
try:
139-
# Calculate how many messages to summarize
140-
messages_to_summarize_count = max(1, int(len(agent.messages) * self.summary_ratio))
143+
self._summarize_oldest(agent)
144+
except Exception as summarization_error:
145+
logger.error("Summarization failed: %s", summarization_error)
146+
raise summarization_error from e
141147

142-
# Ensure we don't summarize recent messages
143-
messages_to_summarize_count = min(
144-
messages_to_summarize_count, len(agent.messages) - self.preserve_recent_messages
145-
)
148+
def reduce_on_threshold(self, agent: "Agent", **kwargs: Any) -> bool:
149+
"""Proactively reduce context by summarizing oldest messages.
146150
147-
if messages_to_summarize_count <= 0:
148-
raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")
151+
Args:
152+
agent: The agent whose conversation history will be reduced.
153+
**kwargs: Additional keyword arguments for future extensibility.
149154
150-
# Adjust split point to avoid breaking ToolUse/ToolResult pairs
151-
messages_to_summarize_count = self._adjust_split_point_for_tool_pairs(
152-
agent.messages, messages_to_summarize_count
153-
)
155+
Returns:
156+
True if the history was reduced, False otherwise.
157+
"""
158+
self._summarize_oldest(agent)
159+
return True
154160

155-
if messages_to_summarize_count <= 0:
156-
raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")
161+
def _summarize_oldest(self, agent: "Agent") -> None:
162+
"""Summarize the oldest messages and replace them with a summary.
157163
158-
# Extract messages to summarize
159-
messages_to_summarize = agent.messages[:messages_to_summarize_count]
160-
remaining_messages = agent.messages[messages_to_summarize_count:]
164+
Args:
165+
agent: The agent instance.
161166
162-
# Keep track of the number of messages that have been summarized thus far.
163-
self.removed_message_count += len(messages_to_summarize)
164-
# If there is a summary message, don't count it in the removed_message_count.
165-
if self._summary_message:
166-
self.removed_message_count -= 1
167+
Raises:
168+
ContextWindowOverflowException: If there are insufficient messages for summarization.
169+
"""
170+
# Calculate how many messages to summarize
171+
messages_to_summarize_count = max(1, int(len(agent.messages) * self.summary_ratio))
167172

168-
# Generate summary
169-
self._summary_message = self._generate_summary(messages_to_summarize, agent)
173+
# Ensure we don't summarize recent messages
174+
messages_to_summarize_count = min(
175+
messages_to_summarize_count, len(agent.messages) - self.preserve_recent_messages
176+
)
170177

171-
# Replace the summarized messages with the summary
172-
agent.messages[:] = [self._summary_message] + remaining_messages
178+
if messages_to_summarize_count <= 0:
179+
raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")
173180

174-
except Exception as summarization_error:
175-
logger.error("Summarization failed: %s", summarization_error)
176-
raise summarization_error from e
181+
# Adjust split point to avoid breaking ToolUse/ToolResult pairs
182+
messages_to_summarize_count = self._adjust_split_point_for_tool_pairs(
183+
agent.messages, messages_to_summarize_count
184+
)
185+
186+
if messages_to_summarize_count <= 0:
187+
raise ContextWindowOverflowException("Cannot summarize: insufficient messages for summarization")
188+
189+
# Extract messages to summarize
190+
messages_to_summarize = agent.messages[:messages_to_summarize_count]
191+
remaining_messages = agent.messages[messages_to_summarize_count:]
192+
193+
# Keep track of the number of messages that have been summarized thus far.
194+
self.removed_message_count += len(messages_to_summarize)
195+
# If there is a summary message, don't count it in the removed_message_count.
196+
if self._summary_message:
197+
self.removed_message_count -= 1
198+
199+
# Generate summary
200+
self._summary_message = self._generate_summary(messages_to_summarize, agent)
201+
202+
# Replace the summarized messages with the summary
203+
agent.messages[:] = [self._summary_message] + remaining_messages
177204

178205
def _generate_summary(self, messages: list[Message], agent: "Agent") -> Message:
179206
"""Generate a summary of the provided messages.

0 commit comments

Comments
 (0)