22
33import logging
44from abc import ABC , abstractmethod
5- from typing import TYPE_CHECKING , Any
5+ from typing import TYPE_CHECKING , Any , TypedDict , Union
66
77from ...hooks .events import BeforeModelCallEvent
88from ...hooks .registry import HookProvider , HookRegistry
1313
1414logger = logging .getLogger (__name__ )
1515
16+ DEFAULT_COMPRESSION_THRESHOLD = 0.7
1617DEFAULT_CONTEXT_WINDOW_LIMIT = 200_000
1718
1819
20+ class ProactiveCompressionConfig (TypedDict , total = False ):
21+ """Configuration for proactive compression when passed as an object.
22+
23+ Attributes:
24+ compression_threshold: Ratio of context window usage that triggers proactive compression.
25+ Value between 0 (exclusive) and 1 (inclusive).
26+ Defaults to 0.7 (compress when 70% of the context window is used).
27+ """
28+
29+ compression_threshold : float
30+
31+
1932class ConversationManager (ABC , HookProvider ):
2033 """Abstract base class for managing conversation history.
2134
@@ -28,30 +41,36 @@ class ConversationManager(ABC, HookProvider):
2841
2942 ConversationManager implements the HookProvider protocol, allowing derived classes to register hooks for agent
3043 lifecycle events. Derived classes that override register_hooks must call the base implementation to ensure proper
31- hook registration.
44+ hook registration chain.
45+
46+ The primary responsibility of a ConversationManager is overflow recovery: when the model encounters a context
47+ window overflow, :meth:`reduce_context` is called with ``e`` set and MUST reduce the history enough for the next
48+ model call to succeed.
3249
33- Optionally, a manager can enable proactive compression by setting ``compression_threshold``
34- in the constructor. When set, the base class registers a ``BeforeModelCallEvent`` hook that
35- checks projected input tokens against the model's context window limit and calls
36- :meth:`reduce_on_threshold` when the threshold is exceeded.
50+ Subclasses can enable proactive compression by passing ``proactive_compression`` in the constructor.
51+ When enabled, the base class registers a ``BeforeModelCallEvent`` hook that checks projected input tokens
52+ against the model's context window limit and calls :meth:`reduce_context` (without ``e``) when the
53+ threshold is exceeded. This is a best-effort operation — errors are swallowed so the model call can
54+ still proceed.
3755
3856 Example:
3957 ```python
40- class MyConversationManager(ConversationManager):
41- def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
42- super().register_hooks(registry, **kwargs)
43- # Register additional hooks here
58+ # Enable proactive compression with default threshold (0.7)
59+ SlidingWindowConversationManager(window_size=50, proactive_compression=True)
60+
61+ # Enable proactive compression with custom threshold
62+ SummarizingConversationManager(proactive_compression={"compression_threshold": 0.8})
4463 ```
4564 """
4665
47- def __init__ (self , * , compression_threshold : float | None = None ) -> None :
66+ def __init__ (self , * , proactive_compression : Union [ bool , "ProactiveCompressionConfig" , None ] = None ) -> None :
4867 """Initialize the ConversationManager.
4968
5069 Args:
51- compression_threshold: Ratio of context window usage that triggers proactive compression .
52- Value between 0 (exclusive) and 1 (inclusive). For example, 0.7 means compress when 70%
53- of the context window is used. When not set, proactive compression is disabled and only
54- reactive overflow recovery is used.
70+ proactive_compression: Enable proactive context compression before the model call .
71+ - ``True``: compress when 70% of the context window is used (default threshold).
72+ - ``{"compression_threshold": float}``: compress at the specified ratio (0, 1].
73+ - ``False`` or ``None``: disabled, only reactive overflow recovery is used.
5574
5675 Raises:
5776 ValueError: If compression_threshold is not in the valid range (0, 1].
@@ -61,48 +80,28 @@ def __init__(self, *, compression_threshold: float | None = None) -> None:
6180 These represent messages provided by the user or LLM that have been removed, not messages
6281 included by the conversation manager through something like summarization.
6382 """
64- if compression_threshold is not None and (compression_threshold <= 0 or compression_threshold > 1 ):
83+ # Resolve the threshold from proactive_compression parameter
84+ if proactive_compression is True :
85+ threshold : float | None = DEFAULT_COMPRESSION_THRESHOLD
86+ elif isinstance (proactive_compression , dict ):
87+ threshold = proactive_compression .get ("compression_threshold" , DEFAULT_COMPRESSION_THRESHOLD )
88+ else :
89+ threshold = None
90+
91+ if threshold is not None and (threshold <= 0 or threshold > 1 ):
6592 raise ValueError (
66- f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got { compression_threshold } "
93+ f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got { threshold } "
6794 )
6895
6996 self .removed_message_count = 0
70- self ._compression_threshold = compression_threshold
97+ self ._compression_threshold = threshold
7198 self ._context_window_limit_warned = False
7299
73- def reduce_on_threshold (self , agent : "Agent" , ** kwargs : Any ) -> bool :
74- """Proactively reduce the conversation history before a model call.
75-
76- Called when projected input tokens exceed the configured compression_threshold
77- of the model's context window limit. Subclasses implement this to reduce
78- context before the model call, avoiding overflow errors.
79-
80- The base class catches any exceptions raised by this method and logs them
81- at debug level, so subclass implementations do not need to defensively
82- swallow errors — they can let them propagate. When an exception occurs,
83- the return value is never observed by the caller.
84-
85- The default implementation returns False. Subclasses that support proactive
86- compression should override this method.
87-
88- Args:
89- agent: The agent whose conversation history will be reduced.
90- The agent's messages list should be modified in-place.
91- **kwargs: Additional keyword arguments for future extensibility.
92-
93- Returns:
94- True if the history was reduced, False otherwise. Only observed on success;
95- if the method raises, the base class catches the exception and the return
96- value is ignored.
97- """
98- return False
99-
100100 def register_hooks (self , registry : HookRegistry , ** kwargs : Any ) -> None :
101101 """Register hooks for agent lifecycle events.
102102
103- When ``compression_threshold`` is configured and the subclass overrides
104- ``reduce_on_threshold``, registers a ``BeforeModelCallEvent`` hook for
105- proactive compression.
103+ Always registers a ``BeforeModelCallEvent`` hook for proactive compression.
104+ When ``proactive_compression`` is not configured, the handler is a no-op (early return).
106105
107106 Derived classes that override this method must call the base implementation to ensure proper hook
108107 registration chain.
@@ -111,36 +110,31 @@ def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
111110 registry: The hook registry to register callbacks with.
112111 **kwargs: Additional keyword arguments for future extensibility.
113112 """
114- if self ._compression_threshold is None :
115- return
116-
117- # Check if the subclass actually overrides reduce_on_threshold
118- has_override = type (self ).reduce_on_threshold is not ConversationManager .reduce_on_threshold
119- if not has_override :
120- logger .warning (
121- "conversation_manager=<%s> | compression_threshold is configured but reduce_on_threshold is not"
122- " implemented, proactive compression is disabled" ,
123- type (self ).__name__ ,
124- )
125- return
126-
113+ # Always subscribe — the threshold check happens inside the handler
127114 registry .add_callback (BeforeModelCallEvent , self ._on_before_model_call_threshold )
128115
129116 def _on_before_model_call_threshold (self , event : BeforeModelCallEvent ) -> None :
130117 """Handle BeforeModelCallEvent for proactive compression.
131118
119+ When proactive compression is not configured, this is a no-op.
120+ When configured, checks projected input tokens against the context window limit
121+ and calls reduce_context() without error (best-effort) when threshold is exceeded.
122+
132123 Args:
133124 event: The before model call event.
134125 """
126+ # Early return if proactive compression is not enabled
127+ if self ._compression_threshold is None :
128+ return
129+
135130 context_window_limit = event .agent .model .context_window_limit
136131 if context_window_limit is None :
137132 context_window_limit = DEFAULT_CONTEXT_WINDOW_LIMIT
138133 if not self ._context_window_limit_warned :
139134 self ._context_window_limit_warned = True
140135 logger .warning (
141- "context_window_limit=<None>, default=<%s>"
142- " | context_window_limit is not set on the model, using default"
143- " | set context_window_limit in your model config for accurate threshold checks" ,
136+ "context_window_limit is not set on the model, using default of %s"
137+ " | set context_window_limit in your model config for accurate proactive compression" ,
144138 DEFAULT_CONTEXT_WINDOW_LIMIT ,
145139 )
146140
@@ -149,7 +143,7 @@ def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
149143 return
150144
151145 ratio = event .projected_input_tokens / context_window_limit
152- if ratio >= self ._compression_threshold : # type: ignore[operator]
146+ if ratio >= self ._compression_threshold :
153147 logger .debug (
154148 "projected_tokens=<%s>, limit=<%s>, ratio=<%.2f>, compression_threshold=<%s>"
155149 " | compression threshold exceeded, reducing context" ,
@@ -158,8 +152,9 @@ def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
158152 ratio ,
159153 self ._compression_threshold ,
160154 )
155+ # Proactive compression is best-effort: swallow errors so the model call can still proceed.
161156 try :
162- self .reduce_on_threshold (agent = event .agent )
157+ self .reduce_context (agent = event .agent )
163158 except Exception :
164159 logger .debug ("proactive compression failed, will proceed with model call" , exc_info = True )
165160
@@ -200,22 +195,24 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
200195
201196 @abstractmethod
202197 def reduce_context (self , agent : "Agent" , e : Exception | None = None , ** kwargs : Any ) -> None :
203- """Called when the model's context window is exceeded.
204-
205- This method should implement the specific strategy for reducing the window size when a context overflow occurs.
206- It is typically called after a ContextWindowOverflowException is caught.
198+ """Reduce the conversation history.
207199
208- Implementations might use strategies such as:
200+ Called in two scenarios:
201+ 1. **Reactive** (e is set): A context window overflow occurred. The implementation
202+ MUST remove enough history for the next model call to succeed, or re-raise the error.
203+ 2. **Proactive** (e is None): The compression threshold was exceeded. This is best-effort —
204+ returning without reduction or raising is acceptable; the model call proceeds regardless.
209205
210- - Removing the N oldest messages
211- - Summarizing older context
212- - Applying importance-based filtering
213- - Maintaining critical conversation markers
206+ Implementations should modify ``agent.messages`` in-place.
214207
215208 Args:
216209 agent: The agent whose conversation history will be reduced.
217210 This list is modified in-place.
218211 e: The exception that triggered the context reduction, if any.
212+ When set, this is a reactive overflow recovery call — the implementation MUST
213+ reduce enough history for the next model call to succeed.
214+ When None, this is a proactive compression call — best-effort reduction to avoid
215+ hitting the context window limit.
219216 **kwargs: Additional keyword arguments for future extensibility.
220217 """
221218 pass
0 commit comments