Skip to content

Commit af2e0da

Browse files
refactor: align proactive compression API with TypeScript SDK
Match the implementation and interfaces from strands-agents/sdk-typescript#965: 1. Rename parameter: compression_threshold → proactive_compression - Accepts bool | ProactiveCompressionConfig | None - True = default threshold (0.7), dict = custom, None/False = disabled 2. Remove reduce_on_threshold() method — unified into reduce_context() - reduce_context(agent, e=None) distinguishes reactive vs proactive: - e set: reactive overflow recovery, MUST reduce or rethrow - e None: proactive/routine, best-effort (errors swallowed) 3. Always register BeforeModelCallEvent hook - Check happens inside the handler (early return when not configured) - Removed subclass-override detection logic 4. SummarizingConversationManager.reduce_context() error handling: - Reactive (e set): rethrow on failure - Proactive (e None): log warning and return silently 5. Export ProactiveCompressionConfig from conversation_manager package 6. Add DEFAULT_COMPRESSION_THRESHOLD = 0.7 constant Usage (matches TypeScript DX): SlidingWindowConversationManager(window_size=50, proactive_compression=True) SummarizingConversationManager(proactive_compression={'compression_threshold': 0.8})
1 parent 7db57db commit af2e0da

6 files changed

Lines changed: 219 additions & 201 deletions

File tree

src/strands/agent/conversation_manager/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
It includes:
44
55
- ConversationManager: Abstract base class defining the conversation management interface
6+
- ProactiveCompressionConfig: Configuration type for proactive compression settings
67
- NullConversationManager: A no-op implementation that does not modify conversation history
78
- SlidingWindowConversationManager: An implementation that maintains a sliding window of messages to control context
89
size while preserving conversation coherence
@@ -13,14 +14,15 @@
1314
is critical for effective agent interactions.
1415
"""
1516

16-
from .conversation_manager import ConversationManager
17+
from .conversation_manager import ConversationManager, ProactiveCompressionConfig
1718
from .null_conversation_manager import NullConversationManager
1819
from .sliding_window_conversation_manager import SlidingWindowConversationManager
1920
from .summarizing_conversation_manager import SummarizingConversationManager
2021

2122
__all__ = [
2223
"ConversationManager",
2324
"NullConversationManager",
25+
"ProactiveCompressionConfig",
2426
"SlidingWindowConversationManager",
2527
"SummarizingConversationManager",
2628
]

src/strands/agent/conversation_manager/conversation_manager.py

Lines changed: 72 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import logging
44
from abc import ABC, abstractmethod
5-
from typing import TYPE_CHECKING, Any
5+
from typing import TYPE_CHECKING, Any, TypedDict, Union
66

77
from ...hooks.events import BeforeModelCallEvent
88
from ...hooks.registry import HookProvider, HookRegistry
@@ -13,9 +13,22 @@
1313

1414
logger = logging.getLogger(__name__)
1515

16+
DEFAULT_COMPRESSION_THRESHOLD = 0.7
1617
DEFAULT_CONTEXT_WINDOW_LIMIT = 200_000
1718

1819

20+
class ProactiveCompressionConfig(TypedDict, total=False):
21+
"""Configuration for proactive compression when passed as an object.
22+
23+
Attributes:
24+
compression_threshold: Ratio of context window usage that triggers proactive compression.
25+
Value between 0 (exclusive) and 1 (inclusive).
26+
Defaults to 0.7 (compress when 70% of the context window is used).
27+
"""
28+
29+
compression_threshold: float
30+
31+
1932
class ConversationManager(ABC, HookProvider):
2033
"""Abstract base class for managing conversation history.
2134
@@ -28,30 +41,36 @@ class ConversationManager(ABC, HookProvider):
2841
2942
ConversationManager implements the HookProvider protocol, allowing derived classes to register hooks for agent
3043
lifecycle events. Derived classes that override register_hooks must call the base implementation to ensure proper
31-
hook registration.
44+
hook registration chain.
45+
46+
The primary responsibility of a ConversationManager is overflow recovery: when the model encounters a context
47+
window overflow, :meth:`reduce_context` is called with ``e`` set and MUST reduce the history enough for the next
48+
model call to succeed.
3249
33-
Optionally, a manager can enable proactive compression by setting ``compression_threshold``
34-
in the constructor. When set, the base class registers a ``BeforeModelCallEvent`` hook that
35-
checks projected input tokens against the model's context window limit and calls
36-
:meth:`reduce_on_threshold` when the threshold is exceeded.
50+
Subclasses can enable proactive compression by passing ``proactive_compression`` in the constructor.
51+
When enabled, the base class registers a ``BeforeModelCallEvent`` hook that checks projected input tokens
52+
against the model's context window limit and calls :meth:`reduce_context` (without ``e``) when the
53+
threshold is exceeded. This is a best-effort operation — errors are swallowed so the model call can
54+
still proceed.
3755
3856
Example:
3957
```python
40-
class MyConversationManager(ConversationManager):
41-
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
42-
super().register_hooks(registry, **kwargs)
43-
# Register additional hooks here
58+
# Enable proactive compression with default threshold (0.7)
59+
SlidingWindowConversationManager(window_size=50, proactive_compression=True)
60+
61+
# Enable proactive compression with custom threshold
62+
SummarizingConversationManager(proactive_compression={"compression_threshold": 0.8})
4463
```
4564
"""
4665

47-
def __init__(self, *, compression_threshold: float | None = None) -> None:
66+
def __init__(self, *, proactive_compression: Union[bool, "ProactiveCompressionConfig", None] = None) -> None:
4867
"""Initialize the ConversationManager.
4968
5069
Args:
51-
compression_threshold: Ratio of context window usage that triggers proactive compression.
52-
Value between 0 (exclusive) and 1 (inclusive). For example, 0.7 means compress when 70%
53-
of the context window is used. When not set, proactive compression is disabled and only
54-
reactive overflow recovery is used.
70+
proactive_compression: Enable proactive context compression before the model call.
71+
- ``True``: compress when 70% of the context window is used (default threshold).
72+
- ``{"compression_threshold": float}``: compress at the specified ratio (0, 1].
73+
- ``False`` or ``None``: disabled, only reactive overflow recovery is used.
5574
5675
Raises:
5776
ValueError: If compression_threshold is not in the valid range (0, 1].
@@ -61,48 +80,28 @@ def __init__(self, *, compression_threshold: float | None = None) -> None:
6180
These represent messages provided by the user or LLM that have been removed, not messages
6281
included by the conversation manager through something like summarization.
6382
"""
64-
if compression_threshold is not None and (compression_threshold <= 0 or compression_threshold > 1):
83+
# Resolve the threshold from proactive_compression parameter
84+
if proactive_compression is True:
85+
threshold: float | None = DEFAULT_COMPRESSION_THRESHOLD
86+
elif isinstance(proactive_compression, dict):
87+
threshold = proactive_compression.get("compression_threshold", DEFAULT_COMPRESSION_THRESHOLD)
88+
else:
89+
threshold = None
90+
91+
if threshold is not None and (threshold <= 0 or threshold > 1):
6592
raise ValueError(
66-
f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {compression_threshold}"
93+
f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {threshold}"
6794
)
6895

6996
self.removed_message_count = 0
70-
self._compression_threshold = compression_threshold
97+
self._compression_threshold = threshold
7198
self._context_window_limit_warned = False
7299

73-
def reduce_on_threshold(self, agent: "Agent", **kwargs: Any) -> bool:
74-
"""Proactively reduce the conversation history before a model call.
75-
76-
Called when projected input tokens exceed the configured compression_threshold
77-
of the model's context window limit. Subclasses implement this to reduce
78-
context before the model call, avoiding overflow errors.
79-
80-
The base class catches any exceptions raised by this method and logs them
81-
at debug level, so subclass implementations do not need to defensively
82-
swallow errors — they can let them propagate. When an exception occurs,
83-
the return value is never observed by the caller.
84-
85-
The default implementation returns False. Subclasses that support proactive
86-
compression should override this method.
87-
88-
Args:
89-
agent: The agent whose conversation history will be reduced.
90-
The agent's messages list should be modified in-place.
91-
**kwargs: Additional keyword arguments for future extensibility.
92-
93-
Returns:
94-
True if the history was reduced, False otherwise. Only observed on success;
95-
if the method raises, the base class catches the exception and the return
96-
value is ignored.
97-
"""
98-
return False
99-
100100
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
101101
"""Register hooks for agent lifecycle events.
102102
103-
When ``compression_threshold`` is configured and the subclass overrides
104-
``reduce_on_threshold``, registers a ``BeforeModelCallEvent`` hook for
105-
proactive compression.
103+
Always registers a ``BeforeModelCallEvent`` hook for proactive compression.
104+
When ``proactive_compression`` is not configured, the handler is a no-op (early return).
106105
107106
Derived classes that override this method must call the base implementation to ensure proper hook
108107
registration chain.
@@ -111,36 +110,31 @@ def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
111110
registry: The hook registry to register callbacks with.
112111
**kwargs: Additional keyword arguments for future extensibility.
113112
"""
114-
if self._compression_threshold is None:
115-
return
116-
117-
# Check if the subclass actually overrides reduce_on_threshold
118-
has_override = type(self).reduce_on_threshold is not ConversationManager.reduce_on_threshold
119-
if not has_override:
120-
logger.warning(
121-
"conversation_manager=<%s> | compression_threshold is configured but reduce_on_threshold is not"
122-
" implemented, proactive compression is disabled",
123-
type(self).__name__,
124-
)
125-
return
126-
113+
# Always subscribe — the threshold check happens inside the handler
127114
registry.add_callback(BeforeModelCallEvent, self._on_before_model_call_threshold)
128115

129116
def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
130117
"""Handle BeforeModelCallEvent for proactive compression.
131118
119+
When proactive compression is not configured, this is a no-op.
120+
When configured, checks projected input tokens against the context window limit
121+
and calls reduce_context() without error (best-effort) when threshold is exceeded.
122+
132123
Args:
133124
event: The before model call event.
134125
"""
126+
# Early return if proactive compression is not enabled
127+
if self._compression_threshold is None:
128+
return
129+
135130
context_window_limit = event.agent.model.context_window_limit
136131
if context_window_limit is None:
137132
context_window_limit = DEFAULT_CONTEXT_WINDOW_LIMIT
138133
if not self._context_window_limit_warned:
139134
self._context_window_limit_warned = True
140135
logger.warning(
141-
"context_window_limit=<None>, default=<%s>"
142-
" | context_window_limit is not set on the model, using default"
143-
" | set context_window_limit in your model config for accurate threshold checks",
136+
"context_window_limit is not set on the model, using default of %s"
137+
" | set context_window_limit in your model config for accurate proactive compression",
144138
DEFAULT_CONTEXT_WINDOW_LIMIT,
145139
)
146140

@@ -149,7 +143,7 @@ def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
149143
return
150144

151145
ratio = event.projected_input_tokens / context_window_limit
152-
if ratio >= self._compression_threshold: # type: ignore[operator]
146+
if ratio >= self._compression_threshold:
153147
logger.debug(
154148
"projected_tokens=<%s>, limit=<%s>, ratio=<%.2f>, compression_threshold=<%s>"
155149
" | compression threshold exceeded, reducing context",
@@ -158,8 +152,9 @@ def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
158152
ratio,
159153
self._compression_threshold,
160154
)
155+
# Proactive compression is best-effort: swallow errors so the model call can still proceed.
161156
try:
162-
self.reduce_on_threshold(agent=event.agent)
157+
self.reduce_context(agent=event.agent)
163158
except Exception:
164159
logger.debug("proactive compression failed, will proceed with model call", exc_info=True)
165160

@@ -200,22 +195,24 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
200195

201196
@abstractmethod
202197
def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
203-
"""Called when the model's context window is exceeded.
204-
205-
This method should implement the specific strategy for reducing the window size when a context overflow occurs.
206-
It is typically called after a ContextWindowOverflowException is caught.
198+
"""Reduce the conversation history.
207199
208-
Implementations might use strategies such as:
200+
Called in two scenarios:
201+
1. **Reactive** (e is set): A context window overflow occurred. The implementation
202+
MUST remove enough history for the next model call to succeed, or re-raise the error.
203+
2. **Proactive** (e is None): The compression threshold was exceeded. This is best-effort —
204+
returning without reduction or raising is acceptable; the model call proceeds regardless.
209205
210-
- Removing the N oldest messages
211-
- Summarizing older context
212-
- Applying importance-based filtering
213-
- Maintaining critical conversation markers
206+
Implementations should modify ``agent.messages`` in-place.
214207
215208
Args:
216209
agent: The agent whose conversation history will be reduced.
217210
This list is modified in-place.
218211
e: The exception that triggered the context reduction, if any.
212+
When set, this is a reactive overflow recovery call — the implementation MUST
213+
reduce enough history for the next model call to succeed.
214+
When None, this is a proactive compression call — best-effort reduction to avoid
215+
hitting the context window limit.
219216
**kwargs: Additional keyword arguments for future extensibility.
220217
"""
221218
pass

src/strands/agent/conversation_manager/sliding_window_conversation_manager.py

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Sliding window conversation history management."""
22

33
import logging
4-
from typing import TYPE_CHECKING, Any
4+
from typing import TYPE_CHECKING, Any, Union
55

66
if TYPE_CHECKING:
77
from ...agent.agent import Agent
@@ -10,7 +10,7 @@
1010
from ...types.content import ContentBlock, Messages
1111
from ...types.exceptions import ContextWindowOverflowException
1212
from ...types.tools import ToolResultContent
13-
from .conversation_manager import ConversationManager
13+
from .conversation_manager import ConversationManager, ProactiveCompressionConfig
1414

1515
logger = logging.getLogger(__name__)
1616

@@ -37,7 +37,7 @@ def __init__(
3737
should_truncate_results: bool = True,
3838
*,
3939
per_turn: bool | int = False,
40-
compression_threshold: float | None = None,
40+
proactive_compression: Union[bool, ProactiveCompressionConfig, None] = None,
4141
):
4242
"""Initialize the sliding window conversation manager.
4343
@@ -55,8 +55,10 @@ def __init__(
5555
manage message history and prevent the agent loop from slowing down. Start with
5656
per_turn=True and adjust to a specific frequency (e.g., per_turn=5) if needed
5757
for performance tuning.
58-
compression_threshold: Ratio of context window usage that triggers proactive compression.
59-
See :class:`ConversationManager` for details.
58+
proactive_compression: Enable proactive context compression before the model call.
59+
- ``True``: compress when 70% of the context window is used (default threshold).
60+
- ``{"compression_threshold": float}``: compress at the specified ratio (0, 1].
61+
- ``False`` or ``None``: disabled, only reactive overflow recovery is used.
6062
6163
Raises:
6264
ValueError: If window_size is negative, or if per_turn is 0 or a negative integer.
@@ -66,7 +68,7 @@ def __init__(
6668
if isinstance(per_turn, int) and not isinstance(per_turn, bool) and per_turn <= 0:
6769
raise ValueError(f"per_turn must be a positive integer, True, or False, got {per_turn}")
6870

69-
super().__init__(compression_threshold=compression_threshold)
71+
super().__init__(proactive_compression=proactive_compression)
7072

7173
self.window_size = window_size
7274
self.should_truncate_results = should_truncate_results
@@ -158,23 +160,15 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
158160
return
159161
self.reduce_context(agent)
160162

161-
def reduce_on_threshold(self, agent: "Agent", **kwargs: Any) -> bool:
162-
"""Proactively reduce context by trimming oldest messages.
163-
164-
Args:
165-
agent: The agent whose conversation history will be reduced.
166-
**kwargs: Additional keyword arguments for future extensibility.
167-
168-
Returns:
169-
True if the history was reduced, False otherwise.
170-
"""
171-
initial_count = len(agent.messages)
172-
self.reduce_context(agent)
173-
return len(agent.messages) < initial_count
174-
175163
def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
176164
"""Trim the oldest messages to reduce the conversation context size.
177165
166+
When ``e`` is set (reactive overflow recovery), attempts to truncate large tool results
167+
first before falling back to message trimming.
168+
169+
When ``e`` is None (proactive compression or routine management), only trims messages
170+
without attempting tool result truncation.
171+
178172
The method handles special cases where trimming the messages leads to:
179173
- toolResult with no corresponding toolUse
180174
- toolUse with no corresponding toolResult
@@ -183,12 +177,14 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
183177
agent: The agent whose messages will be reduce.
184178
This list is modified in-place.
185179
e: The exception that triggered the context reduction, if any.
180+
When set, this is a reactive overflow recovery call.
181+
When None, this is a proactive or routine management call.
186182
**kwargs: Additional keyword arguments for future extensibility.
187183
188184
Raises:
189185
ContextWindowOverflowException: If the context cannot be reduced further and a context overflow
190-
error was provided (e is not None). When called during routine window management (e is None),
191-
logs a warning and returns without modification.
186+
error was provided (e is not None). When called during routine window management or
187+
proactive compression (e is None), logs a warning and returns without modification.
192188
"""
193189
messages = agent.messages
194190

0 commit comments

Comments
 (0)