Skip to content

Commit e8433c8

Browse files
agent-of-mkmeralopieter-aws
authored andcommitted
refactor: align proactive compression API with TypeScript SDK
Match the implementation and interfaces from strands-agents/sdk-typescript#965: 1. Rename parameter: compression_threshold → proactive_compression - Accepts bool | ProactiveCompressionConfig | None - True = default threshold (0.7), dict = custom, None/False = disabled 2. Remove reduce_on_threshold() method — unified into reduce_context() - reduce_context(agent, e=None) distinguishes reactive vs proactive: - e set: reactive overflow recovery, MUST reduce or rethrow - e None: proactive/routine, best-effort (errors swallowed) 3. Always register BeforeModelCallEvent hook - Check happens inside the handler (early return when not configured) - Removed subclass-override detection logic 4. SummarizingConversationManager.reduce_context() error handling: - Reactive (e set): rethrow on failure - Proactive (e None): log warning and return silently 5. Export ProactiveCompressionConfig from conversation_manager package 6. Add DEFAULT_COMPRESSION_THRESHOLD = 0.7 constant Usage (matches TypeScript DX): SlidingWindowConversationManager(window_size=50, proactive_compression=True) SummarizingConversationManager(proactive_compression={'compression_threshold': 0.8})
1 parent 7e878db commit e8433c8

7 files changed

Lines changed: 280 additions & 221 deletions

File tree

src/strands/agent/conversation_manager/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
It includes:
44
55
- ConversationManager: Abstract base class defining the conversation management interface
6+
- ProactiveCompressionConfig: Configuration type for proactive compression settings
67
- NullConversationManager: A no-op implementation that does not modify conversation history
78
- SlidingWindowConversationManager: An implementation that maintains a sliding window of messages to control context
89
size while preserving conversation coherence
@@ -13,14 +14,15 @@
1314
is critical for effective agent interactions.
1415
"""
1516

16-
from .conversation_manager import ConversationManager
17+
from .conversation_manager import ConversationManager, ProactiveCompressionConfig
1718
from .null_conversation_manager import NullConversationManager
1819
from .sliding_window_conversation_manager import SlidingWindowConversationManager
1920
from .summarizing_conversation_manager import SummarizingConversationManager
2021

2122
__all__ = [
2223
"ConversationManager",
2324
"NullConversationManager",
25+
"ProactiveCompressionConfig",
2426
"SlidingWindowConversationManager",
2527
"SummarizingConversationManager",
2628
]

src/strands/agent/conversation_manager/conversation_manager.py

Lines changed: 72 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import logging
44
from abc import ABC, abstractmethod
5-
from typing import TYPE_CHECKING, Any
5+
from typing import TYPE_CHECKING, Any, TypedDict, Union
66

77
from ...hooks.events import BeforeModelCallEvent
88
from ...hooks.registry import HookProvider, HookRegistry
@@ -13,9 +13,22 @@
1313

1414
logger = logging.getLogger(__name__)
1515

16+
DEFAULT_COMPRESSION_THRESHOLD = 0.7
1617
DEFAULT_CONTEXT_WINDOW_LIMIT = 200_000
1718

1819

20+
class ProactiveCompressionConfig(TypedDict, total=False):
21+
"""Configuration for proactive compression when passed as an object.
22+
23+
Attributes:
24+
compression_threshold: Ratio of context window usage that triggers proactive compression.
25+
Value between 0 (exclusive) and 1 (inclusive).
26+
Defaults to 0.7 (compress when 70% of the context window is used).
27+
"""
28+
29+
compression_threshold: float
30+
31+
1932
class ConversationManager(ABC, HookProvider):
2033
"""Abstract base class for managing conversation history.
2134
@@ -28,30 +41,36 @@ class ConversationManager(ABC, HookProvider):
2841
2942
ConversationManager implements the HookProvider protocol, allowing derived classes to register hooks for agent
3043
lifecycle events. Derived classes that override register_hooks must call the base implementation to ensure proper
31-
hook registration.
44+
hook registration chain.
45+
46+
The primary responsibility of a ConversationManager is overflow recovery: when the model encounters a context
47+
window overflow, :meth:`reduce_context` is called with ``e`` set and MUST reduce the history enough for the next
48+
model call to succeed.
3249
33-
Optionally, a manager can enable proactive compression by setting ``compression_threshold``
34-
in the constructor. When set, the base class registers a ``BeforeModelCallEvent`` hook that
35-
checks projected input tokens against the model's context window limit and calls
36-
:meth:`reduce_on_threshold` when the threshold is exceeded.
50+
Subclasses can enable proactive compression by passing ``proactive_compression`` in the constructor.
51+
When enabled, the base class registers a ``BeforeModelCallEvent`` hook that checks projected input tokens
52+
against the model's context window limit and calls :meth:`reduce_context` (without ``e``) when the
53+
threshold is exceeded. This is a best-effort operation — errors are swallowed so the model call can
54+
still proceed.
3755
3856
Example:
3957
```python
40-
class MyConversationManager(ConversationManager):
41-
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
42-
super().register_hooks(registry, **kwargs)
43-
# Register additional hooks here
58+
# Enable proactive compression with default threshold (0.7)
59+
SlidingWindowConversationManager(window_size=50, proactive_compression=True)
60+
61+
# Enable proactive compression with custom threshold
62+
SummarizingConversationManager(proactive_compression={"compression_threshold": 0.8})
4463
```
4564
"""
4665

47-
def __init__(self, *, compression_threshold: float | None = None) -> None:
66+
def __init__(self, *, proactive_compression: Union[bool, "ProactiveCompressionConfig", None] = None) -> None:
4867
"""Initialize the ConversationManager.
4968
5069
Args:
51-
compression_threshold: Ratio of context window usage that triggers proactive compression.
52-
Value between 0 (exclusive) and 1 (inclusive). For example, 0.7 means compress when 70%
53-
of the context window is used. When not set, proactive compression is disabled and only
54-
reactive overflow recovery is used.
70+
proactive_compression: Enable proactive context compression before the model call.
71+
- ``True``: compress when 70% of the context window is used (default threshold).
72+
- ``{"compression_threshold": float}``: compress at the specified ratio (0, 1].
73+
- ``False`` or ``None``: disabled, only reactive overflow recovery is used.
5574
5675
Raises:
5776
ValueError: If compression_threshold is not in the valid range (0, 1].
@@ -61,48 +80,28 @@ def __init__(self, *, compression_threshold: float | None = None) -> None:
6180
These represent messages provided by the user or LLM that have been removed, not messages
6281
included by the conversation manager through something like summarization.
6382
"""
64-
if compression_threshold is not None and (compression_threshold <= 0 or compression_threshold > 1):
83+
# Resolve the threshold from proactive_compression parameter
84+
if proactive_compression is True:
85+
threshold: float | None = DEFAULT_COMPRESSION_THRESHOLD
86+
elif isinstance(proactive_compression, dict):
87+
threshold = proactive_compression.get("compression_threshold", DEFAULT_COMPRESSION_THRESHOLD)
88+
else:
89+
threshold = None
90+
91+
if threshold is not None and (threshold <= 0 or threshold > 1):
6592
raise ValueError(
66-
f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {compression_threshold}"
93+
f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {threshold}"
6794
)
6895

6996
self.removed_message_count = 0
70-
self._compression_threshold = compression_threshold
97+
self._compression_threshold = threshold
7198
self._context_window_limit_warned = False
7299

73-
def reduce_on_threshold(self, agent: "Agent", **kwargs: Any) -> bool:
74-
"""Proactively reduce the conversation history before a model call.
75-
76-
Called when projected input tokens exceed the configured compression_threshold
77-
of the model's context window limit. Subclasses implement this to reduce
78-
context before the model call, avoiding overflow errors.
79-
80-
The base class catches any exceptions raised by this method and logs them
81-
at debug level, so subclass implementations do not need to defensively
82-
swallow errors — they can let them propagate. When an exception occurs,
83-
the return value is never observed by the caller.
84-
85-
The default implementation returns False. Subclasses that support proactive
86-
compression should override this method.
87-
88-
Args:
89-
agent: The agent whose conversation history will be reduced.
90-
The agent's messages list should be modified in-place.
91-
**kwargs: Additional keyword arguments for future extensibility.
92-
93-
Returns:
94-
True if the history was reduced, False otherwise. Only observed on success;
95-
if the method raises, the base class catches the exception and the return
96-
value is ignored.
97-
"""
98-
return False
99-
100100
def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
101101
"""Register hooks for agent lifecycle events.
102102
103-
When ``compression_threshold`` is configured and the subclass overrides
104-
``reduce_on_threshold``, registers a ``BeforeModelCallEvent`` hook for
105-
proactive compression.
103+
Always registers a ``BeforeModelCallEvent`` hook for proactive compression.
104+
When ``proactive_compression`` is not configured, the handler is a no-op (early return).
106105
107106
Derived classes that override this method must call the base implementation to ensure proper hook
108107
registration chain.
@@ -111,36 +110,31 @@ def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
111110
registry: The hook registry to register callbacks with.
112111
**kwargs: Additional keyword arguments for future extensibility.
113112
"""
114-
if self._compression_threshold is None:
115-
return
116-
117-
# Check if the subclass actually overrides reduce_on_threshold
118-
has_override = type(self).reduce_on_threshold is not ConversationManager.reduce_on_threshold
119-
if not has_override:
120-
logger.warning(
121-
"conversation_manager=<%s> | compression_threshold is configured but reduce_on_threshold is not"
122-
" implemented, proactive compression is disabled",
123-
type(self).__name__,
124-
)
125-
return
126-
113+
# Always subscribe — the threshold check happens inside the handler
127114
registry.add_callback(BeforeModelCallEvent, self._on_before_model_call_threshold)
128115

129116
def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
130117
"""Handle BeforeModelCallEvent for proactive compression.
131118
119+
When proactive compression is not configured, this is a no-op.
120+
When configured, checks projected input tokens against the context window limit
121+
and calls reduce_context() without error (best-effort) when threshold is exceeded.
122+
132123
Args:
133124
event: The before model call event.
134125
"""
126+
# Early return if proactive compression is not enabled
127+
if self._compression_threshold is None:
128+
return
129+
135130
context_window_limit = event.agent.model.context_window_limit
136131
if context_window_limit is None:
137132
context_window_limit = DEFAULT_CONTEXT_WINDOW_LIMIT
138133
if not self._context_window_limit_warned:
139134
self._context_window_limit_warned = True
140135
logger.warning(
141-
"context_window_limit=<None>, default=<%s>"
142-
" | context_window_limit is not set on the model, using default"
143-
" | set context_window_limit in your model config for accurate threshold checks",
136+
"context_window_limit=<%s> | context_window_limit not set on model, using default."
137+
" Set context_window_limit in your model config for accurate proactive compression",
144138
DEFAULT_CONTEXT_WINDOW_LIMIT,
145139
)
146140

@@ -149,7 +143,7 @@ def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
149143
return
150144

151145
ratio = event.projected_input_tokens / context_window_limit
152-
if ratio >= self._compression_threshold: # type: ignore[operator]
146+
if ratio >= self._compression_threshold:
153147
logger.debug(
154148
"projected_tokens=<%s>, limit=<%s>, ratio=<%.2f>, compression_threshold=<%s>"
155149
" | compression threshold exceeded, reducing context",
@@ -158,8 +152,9 @@ def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
158152
ratio,
159153
self._compression_threshold,
160154
)
155+
# Proactive compression is best-effort: swallow errors so the model call can still proceed.
161156
try:
162-
self.reduce_on_threshold(agent=event.agent)
157+
self.reduce_context(agent=event.agent)
163158
except Exception:
164159
logger.debug("proactive compression failed, will proceed with model call", exc_info=True)
165160

@@ -200,22 +195,24 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
200195

201196
@abstractmethod
202197
def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
203-
"""Called when the model's context window is exceeded.
204-
205-
This method should implement the specific strategy for reducing the window size when a context overflow occurs.
206-
It is typically called after a ContextWindowOverflowException is caught.
198+
"""Reduce the conversation history.
207199
208-
Implementations might use strategies such as:
200+
Called in two scenarios:
201+
1. **Reactive** (e is set): A context window overflow occurred. The implementation
202+
MUST remove enough history for the next model call to succeed, or re-raise the error.
203+
2. **Proactive** (e is None): The compression threshold was exceeded. This is best-effort —
204+
returning without reduction or raising is acceptable; the model call proceeds regardless.
209205
210-
- Removing the N oldest messages
211-
- Summarizing older context
212-
- Applying importance-based filtering
213-
- Maintaining critical conversation markers
206+
Implementations should modify ``agent.messages`` in-place.
214207
215208
Args:
216209
agent: The agent whose conversation history will be reduced.
217210
This list is modified in-place.
218211
e: The exception that triggered the context reduction, if any.
212+
When set, this is a reactive overflow recovery call — the implementation MUST
213+
reduce enough history for the next model call to succeed.
214+
When None, this is a proactive compression call — best-effort reduction to avoid
215+
hitting the context window limit.
219216
**kwargs: Additional keyword arguments for future extensibility.
220217
"""
221218
pass

src/strands/agent/conversation_manager/null_conversation_manager.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
if TYPE_CHECKING:
66
from ...agent.agent import Agent
77

8-
from ...types.exceptions import ContextWindowOverflowException
98
from .conversation_manager import ConversationManager
109

1110

@@ -29,18 +28,18 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
2928
pass
3029

3130
def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
32-
"""Does not reduce context and raises an exception.
31+
"""Does not reduce context.
32+
33+
When called reactively (e is not None), re-raises the overflow exception since this
34+
manager cannot reduce context. When called proactively (e is None), returns silently.
3335
3436
Args:
3537
agent: The agent whose conversation history will remain unmodified.
3638
e: The exception that triggered the context reduction, if any.
3739
**kwargs: Additional keyword arguments for future extensibility.
3840
3941
Raises:
40-
e: If provided.
41-
ContextWindowOverflowException: If e is None.
42+
e: If provided (reactive overflow).
4243
"""
4344
if e:
4445
raise e
45-
else:
46-
raise ContextWindowOverflowException("Context window overflowed!")

0 commit comments

Comments
 (0)