Skip to content

Commit f862185

Browse files
feat: add proactive context compression to conversation managers (#2239)
Co-authored-by: agent-of-mkmeral <agent-of-mkmeral@users.noreply.github.com>
1 parent ead3179 commit f862185

7 files changed

Lines changed: 605 additions & 93 deletions

File tree

src/strands/agent/conversation_manager/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
It includes:
44
55
- ConversationManager: Abstract base class defining the conversation management interface
6+
- ProactiveCompressionConfig: Configuration type for proactive compression settings
67
- NullConversationManager: A no-op implementation that does not modify conversation history
78
- SlidingWindowConversationManager: An implementation that maintains a sliding window of messages to control context
89
size while preserving conversation coherence
@@ -13,14 +14,15 @@
1314
is critical for effective agent interactions.
1415
"""
1516

16-
from .conversation_manager import ConversationManager
17+
from .conversation_manager import ConversationManager, ProactiveCompressionConfig
1718
from .null_conversation_manager import NullConversationManager
1819
from .sliding_window_conversation_manager import SlidingWindowConversationManager
1920
from .summarizing_conversation_manager import SummarizingConversationManager
2021

2122
__all__ = [
2223
"ConversationManager",
2324
"NullConversationManager",
25+
"ProactiveCompressionConfig",
2426
"SlidingWindowConversationManager",
2527
"SummarizingConversationManager",
2628
]

src/strands/agent/conversation_manager/conversation_manager.py

Lines changed: 121 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,33 @@
11
"""Abstract interface for conversation history management."""
22

3+
import logging
34
from abc import ABC, abstractmethod
4-
from typing import TYPE_CHECKING, Any
5+
from typing import TYPE_CHECKING, Any, TypedDict, Union
56

7+
from ...hooks.events import BeforeModelCallEvent
68
from ...hooks.registry import HookProvider, HookRegistry
79
from ...types.content import Message
810

911
if TYPE_CHECKING:
1012
from ...agent.agent import Agent
1113

14+
logger = logging.getLogger(__name__)
15+
16+
DEFAULT_COMPRESSION_THRESHOLD = 0.7
17+
DEFAULT_CONTEXT_WINDOW_LIMIT = 200_000
18+
19+
20+
class ProactiveCompressionConfig(TypedDict, total=False):
21+
"""Configuration for proactive compression when passed as an object.
22+
23+
Attributes:
24+
compression_threshold: Ratio of context window usage that triggers proactive compression.
25+
Value between 0 (exclusive) and 1 (inclusive).
26+
Defaults to 0.7 (compress when 70% of the context window is used).
27+
"""
28+
29+
compression_threshold: float
30+
1231

1332
class ConversationManager(ABC, HookProvider):
1433
"""Abstract base class for managing conversation history.
@@ -22,45 +41,122 @@ class ConversationManager(ABC, HookProvider):
2241
2342
ConversationManager implements the HookProvider protocol, allowing derived classes to register hooks for agent
2443
lifecycle events. Derived classes that override register_hooks must call the base implementation to ensure proper
25-
hook registration.
44+
hook registration chain.
45+
46+
The primary responsibility of a ConversationManager is overflow recovery: when the model encounters a context
47+
window overflow, :meth:`reduce_context` is called with ``e`` set and MUST reduce the history enough for the next
48+
model call to succeed.
49+
50+
Subclasses can enable proactive compression by passing ``proactive_compression`` in the constructor.
51+
When enabled, the base class registers a ``BeforeModelCallEvent`` hook that checks projected input tokens
52+
against the model's context window limit and calls :meth:`reduce_context` (without ``e``) when the
53+
threshold is exceeded. This is a best-effort operation — errors are swallowed so the model call can
54+
still proceed.
2655
2756
Example:
2857
```python
29-
class MyConversationManager(ConversationManager):
30-
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
31-
super().register_hooks(registry, **kwargs)
32-
# Register additional hooks here
58+
# Enable proactive compression with default threshold (0.7)
59+
SlidingWindowConversationManager(window_size=50, proactive_compression=True)
60+
61+
# Enable proactive compression with custom threshold
62+
SummarizingConversationManager(proactive_compression={"compression_threshold": 0.8})
3363
```
3464
"""
3565

36-
def __init__(self) -> None:
66+
def __init__(self, *, proactive_compression: Union[bool, "ProactiveCompressionConfig", None] = None) -> None:
3767
"""Initialize the ConversationManager.
3868
69+
Args:
70+
proactive_compression: Enable proactive context compression before the model call.
71+
- ``True``: compress when 70% of the context window is used (default threshold).
72+
- ``{"compression_threshold": float}``: compress at the specified ratio (0, 1].
73+
- ``False`` or ``None``: disabled, only reactive overflow recovery is used.
74+
75+
Raises:
76+
ValueError: If compression_threshold is not in the valid range (0, 1].
77+
3978
Attributes:
4079
removed_message_count: The messages that have been removed from the agents messages array.
4180
These represent messages provided by the user or LLM that have been removed, not messages
4281
included by the conversation manager through something like summarization.
4382
"""
83+
# Resolve the threshold from proactive_compression parameter
84+
if proactive_compression is True:
85+
threshold: float | None = DEFAULT_COMPRESSION_THRESHOLD
86+
elif isinstance(proactive_compression, dict):
87+
threshold = proactive_compression.get("compression_threshold", DEFAULT_COMPRESSION_THRESHOLD)
88+
else:
89+
threshold = None
90+
91+
if threshold is not None and (threshold <= 0 or threshold > 1):
92+
raise ValueError(
93+
f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {threshold}"
94+
)
95+
4496
self.removed_message_count = 0
97+
self._compression_threshold = threshold
98+
self._context_window_limit_warned = False
4599

46100
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
47101
"""Register hooks for agent lifecycle events.
48102
103+
Always registers a ``BeforeModelCallEvent`` hook for proactive compression.
104+
When ``proactive_compression`` is not configured, the handler is a no-op (early return).
105+
49106
Derived classes that override this method must call the base implementation to ensure proper hook
50107
registration chain.
51108
52109
Args:
53110
registry: The hook registry to register callbacks with.
54111
**kwargs: Additional keyword arguments for future extensibility.
112+
"""
113+
# Always subscribe — the threshold check happens inside the handler
114+
registry.add_callback(BeforeModelCallEvent, self._on_before_model_call_threshold)
55115

56-
Example:
57-
```python
58-
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
59-
super().register_hooks(registry, **kwargs)
60-
registry.add_callback(SomeEvent, self.on_some_event)
61-
```
116+
def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
117+
"""Handle BeforeModelCallEvent for proactive compression.
118+
119+
When proactive compression is not configured, this is a no-op.
120+
When configured, checks projected input tokens against the context window limit
121+
and calls reduce_context() without error (best-effort) when threshold is exceeded.
122+
123+
Args:
124+
event: The before model call event.
62125
"""
63-
pass
126+
# Early return if proactive compression is not enabled
127+
if self._compression_threshold is None:
128+
return
129+
130+
context_window_limit = event.agent.model.context_window_limit
131+
if context_window_limit is None:
132+
context_window_limit = DEFAULT_CONTEXT_WINDOW_LIMIT
133+
if not self._context_window_limit_warned:
134+
self._context_window_limit_warned = True
135+
logger.warning(
136+
"context_window_limit=<%s> | context_window_limit not set on model, using default."
137+
" Set context_window_limit in your model config for accurate proactive compression",
138+
DEFAULT_CONTEXT_WINDOW_LIMIT,
139+
)
140+
141+
if event.projected_input_tokens is None:
142+
logger.debug("projected_input_tokens=<None> | skipping proactive compression")
143+
return
144+
145+
ratio = event.projected_input_tokens / context_window_limit
146+
if ratio >= self._compression_threshold:
147+
logger.debug(
148+
"projected_tokens=<%s>, limit=<%s>, ratio=<%.2f>, compression_threshold=<%s>"
149+
" | compression threshold exceeded, reducing context",
150+
event.projected_input_tokens,
151+
context_window_limit,
152+
ratio,
153+
self._compression_threshold,
154+
)
155+
# Proactive compression is best-effort: swallow errors so the model call can still proceed.
156+
try:
157+
self.reduce_context(agent=event.agent)
158+
except Exception:
159+
logger.debug("proactive compression failed, will proceed with model call", exc_info=True)
64160

65161
def restore_from_session(self, state: dict[str, Any]) -> list[Message] | None:
66162
"""Restore the Conversation Manager's state from a session.
@@ -99,22 +195,24 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
99195

100196
@abstractmethod
101197
def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
102-
"""Called when the model's context window is exceeded.
103-
104-
This method should implement the specific strategy for reducing the window size when a context overflow occurs.
105-
It is typically called after a ContextWindowOverflowException is caught.
198+
"""Reduce the conversation history.
106199
107-
Implementations might use strategies such as:
200+
Called in two scenarios:
201+
1. **Reactive** (e is set): A context window overflow occurred. The implementation
202+
MUST remove enough history for the next model call to succeed, or re-raise the error.
203+
2. **Proactive** (e is None): The compression threshold was exceeded. This is best-effort —
204+
returning without reduction or raising is acceptable; the model call proceeds regardless.
108205
109-
- Removing the N oldest messages
110-
- Summarizing older context
111-
- Applying importance-based filtering
112-
- Maintaining critical conversation markers
206+
Implementations should modify ``agent.messages`` in-place.
113207
114208
Args:
115209
agent: The agent whose conversation history will be reduced.
116210
This list is modified in-place.
117211
e: The exception that triggered the context reduction, if any.
212+
When set, this is a reactive overflow recovery call — the implementation MUST
213+
reduce enough history for the next model call to succeed.
214+
When None, this is a proactive compression call — best-effort reduction to avoid
215+
hitting the context window limit.
118216
**kwargs: Additional keyword arguments for future extensibility.
119217
"""
120218
pass

src/strands/agent/conversation_manager/null_conversation_manager.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
if TYPE_CHECKING:
66
from ...agent.agent import Agent
77

8-
from ...types.exceptions import ContextWindowOverflowException
98
from .conversation_manager import ConversationManager
109

1110

@@ -29,18 +28,18 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
2928
pass
3029

3130
def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
32-
"""Does not reduce context and raises an exception.
31+
"""Does not reduce context.
32+
33+
When called reactively (e is not None), re-raises the overflow exception since this
34+
manager cannot reduce context. When called proactively (e is None), returns silently.
3335
3436
Args:
3537
agent: The agent whose conversation history will remain unmodified.
3638
e: The exception that triggered the context reduction, if any.
3739
**kwargs: Additional keyword arguments for future extensibility.
3840
3941
Raises:
40-
e: If provided.
41-
ContextWindowOverflowException: If e is None.
42+
e: If provided (reactive overflow).
4243
"""
4344
if e:
4445
raise e
45-
else:
46-
raise ContextWindowOverflowException("Context window overflowed!")

src/strands/agent/conversation_manager/sliding_window_conversation_manager.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from ...types.content import ContentBlock, Messages
1111
from ...types.exceptions import ContextWindowOverflowException
1212
from ...types.tools import ToolResultContent
13-
from .conversation_manager import ConversationManager
13+
from .conversation_manager import ConversationManager, ProactiveCompressionConfig
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -37,6 +37,7 @@ def __init__(
3737
should_truncate_results: bool = True,
3838
*,
3939
per_turn: bool | int = False,
40+
proactive_compression: bool | ProactiveCompressionConfig | None = None,
4041
):
4142
"""Initialize the sliding window conversation manager.
4243
@@ -54,6 +55,10 @@ def __init__(
5455
manage message history and prevent the agent loop from slowing down. Start with
5556
per_turn=True and adjust to a specific frequency (e.g., per_turn=5) if needed
5657
for performance tuning.
58+
proactive_compression: Enable proactive context compression before the model call.
59+
- ``True``: compress when 70% of the context window is used (default threshold).
60+
- ``{"compression_threshold": float}``: compress at the specified ratio (0, 1].
61+
- ``False`` or ``None``: disabled, only reactive overflow recovery is used.
5762
5863
Raises:
5964
ValueError: If window_size is negative, or if per_turn is 0 or a negative integer.
@@ -63,7 +68,7 @@ def __init__(
6368
if isinstance(per_turn, int) and not isinstance(per_turn, bool) and per_turn <= 0:
6469
raise ValueError(f"per_turn must be a positive integer, True, or False, got {per_turn}")
6570

66-
super().__init__()
71+
super().__init__(proactive_compression=proactive_compression)
6772

6873
self.window_size = window_size
6974
self.should_truncate_results = should_truncate_results
@@ -158,6 +163,12 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
158163
def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
159164
"""Trim the oldest messages to reduce the conversation context size.
160165
166+
When ``e`` is set (reactive overflow recovery), attempts to truncate large tool results
167+
first before falling back to message trimming.
168+
169+
When ``e`` is None (proactive compression or routine management), only trims messages
170+
without attempting tool result truncation.
171+
161172
The method handles special cases where trimming the messages leads to:
162173
- toolResult with no corresponding toolUse
163174
- toolUse with no corresponding toolResult
@@ -166,12 +177,14 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
166177
agent: The agent whose messages will be reduce.
167178
This list is modified in-place.
168179
e: The exception that triggered the context reduction, if any.
180+
When set, this is a reactive overflow recovery call.
181+
When None, this is a proactive or routine management call.
169182
**kwargs: Additional keyword arguments for future extensibility.
170183
171184
Raises:
172185
ContextWindowOverflowException: If the context cannot be reduced further and a context overflow
173-
error was provided (e is not None). When called during routine window management (e is None),
174-
logs a warning and returns without modification.
186+
error was provided (e is not None). When called during routine window management or
187+
proactive compression (e is None), logs a warning and returns without modification.
175188
"""
176189
messages = agent.messages
177190

@@ -181,16 +194,18 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
181194
messages[:] = []
182195
return
183196

184-
# Try to truncate the tool result first
185-
oldest_message_idx_with_tool_results = self._find_oldest_message_with_tool_results(messages)
186-
if oldest_message_idx_with_tool_results is not None and self.should_truncate_results:
187-
logger.debug(
188-
"message_index=<%s> | found message with tool results at index", oldest_message_idx_with_tool_results
189-
)
190-
results_truncated = self._truncate_tool_results(messages, oldest_message_idx_with_tool_results)
191-
if results_truncated:
192-
logger.debug("message_index=<%s> | tool results truncated", oldest_message_idx_with_tool_results)
193-
return
197+
# Try to truncate the tool result first (only for reactive overflow, not proactive compression)
198+
if e is not None:
199+
oldest_message_idx_with_tool_results = self._find_oldest_message_with_tool_results(messages)
200+
if oldest_message_idx_with_tool_results is not None and self.should_truncate_results:
201+
logger.debug(
202+
"message_index=<%s> | found message with tool results at index",
203+
oldest_message_idx_with_tool_results,
204+
)
205+
results_truncated = self._truncate_tool_results(messages, oldest_message_idx_with_tool_results)
206+
if results_truncated:
207+
logger.debug("message_index=<%s> | tool results truncated", oldest_message_idx_with_tool_results)
208+
return
194209

195210
# Try to trim index id when tool result cannot be truncated anymore
196211
# If the number of messages is less than the window_size, then we default to 2, otherwise, trim to window size

0 commit comments

Comments
 (0)