strands-agents
diff --git a/‎src/strands/agent/conversation_manager/__init__.py‎
Lines changed: 3 additions & 1 deletion b/‎src/strands/agent/conversation_manager/__init__.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/strands/agent/conversation_manager/conversation_manager.py‎
Lines changed: 72 additions & 75 deletions b/‎src/strands/agent/conversation_manager/conversation_manager.py‎
Lines changed: 72 additions & 75 deletions
diff --git a/‎src/strands/agent/conversation_manager/sliding_window_conversation_manager.py‎
Lines changed: 18 additions & 22 deletions b/‎src/strands/agent/conversation_manager/sliding_window_conversation_manager.py‎
Lines changed: 18 additions & 22 deletions
@@ -3,6 +3,7 @@
 It includes:
 
 - ConversationManager: Abstract base class defining the conversation management interface
+- ProactiveCompressionConfig: Configuration type for proactive compression settings
 - NullConversationManager: A no-op implementation that does not modify conversation history
 - SlidingWindowConversationManager: An implementation that maintains a sliding window of messages to control context
   size while preserving conversation coherence
@@ -13,14 +14,15 @@
 is critical for effective agent interactions.
 """
 
-from .conversation_manager import ConversationManager
+from .conversation_manager import ConversationManager, ProactiveCompressionConfig
 from .null_conversation_manager import NullConversationManager
 from .sliding_window_conversation_manager import SlidingWindowConversationManager
 from .summarizing_conversation_manager import SummarizingConversationManager
 
 __all__ = [
     "ConversationManager",
     "NullConversationManager",
+    "ProactiveCompressionConfig",
     "SlidingWindowConversationManager",
     "SummarizingConversationManager",
 ]
@@ -2,7 +2,7 @@
 
 import logging
 from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, TypedDict, Union
 
 from ...hooks.events import BeforeModelCallEvent
 from ...hooks.registry import HookProvider, HookRegistry
@@ -13,9 +13,22 @@
 
 logger = logging.getLogger(__name__)
 
+DEFAULT_COMPRESSION_THRESHOLD = 0.7
 DEFAULT_CONTEXT_WINDOW_LIMIT = 200_000
 
 
+class ProactiveCompressionConfig(TypedDict, total=False):
+    """Configuration for proactive compression when passed as an object.
+
+    Attributes:
+        compression_threshold: Ratio of context window usage that triggers proactive compression.
+            Value between 0 (exclusive) and 1 (inclusive).
+            Defaults to 0.7 (compress when 70% of the context window is used).
+    """
+
+    compression_threshold: float
+
+
 class ConversationManager(ABC, HookProvider):
     """Abstract base class for managing conversation history.
 
@@ -28,30 +41,36 @@ class ConversationManager(ABC, HookProvider):
 
     ConversationManager implements the HookProvider protocol, allowing derived classes to register hooks for agent
     lifecycle events. Derived classes that override register_hooks must call the base implementation to ensure proper
-    hook registration.
+    hook registration chain.
+
+    The primary responsibility of a ConversationManager is overflow recovery: when the model encounters a context
+    window overflow, :meth:`reduce_context` is called with ``e`` set and MUST reduce the history enough for the next
+    model call to succeed.
 
-    Optionally, a manager can enable proactive compression by setting ``compression_threshold``
-    in the constructor. When set, the base class registers a ``BeforeModelCallEvent`` hook that
-    checks projected input tokens against the model's context window limit and calls
-    :meth:`reduce_on_threshold` when the threshold is exceeded.
+    Subclasses can enable proactive compression by passing ``proactive_compression`` in the constructor.
+    When enabled, the base class registers a ``BeforeModelCallEvent`` hook that checks projected input tokens
+    against the model's context window limit and calls :meth:`reduce_context` (without ``e``) when the
+    threshold is exceeded. This is a best-effort operation — errors are swallowed so the model call can
+    still proceed.
 
     Example:
         ```python
-        class MyConversationManager(ConversationManager):
-            def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
-                super().register_hooks(registry, **kwargs)
-                # Register additional hooks here
+        # Enable proactive compression with default threshold (0.7)
+        SlidingWindowConversationManager(window_size=50, proactive_compression=True)
+
+        # Enable proactive compression with custom threshold
+        SummarizingConversationManager(proactive_compression={"compression_threshold": 0.8})
         ```
     """
 
-    def __init__(self, *, compression_threshold: float | None = None) -> None:
+    def __init__(self, *, proactive_compression: Union[bool, "ProactiveCompressionConfig", None] = None) -> None:
         """Initialize the ConversationManager.
 
         Args:
-            compression_threshold: Ratio of context window usage that triggers proactive compression.
-                Value between 0 (exclusive) and 1 (inclusive). For example, 0.7 means compress when 70%
-                of the context window is used. When not set, proactive compression is disabled and only
-                reactive overflow recovery is used.
+            proactive_compression: Enable proactive context compression before the model call.
+                - ``True``: compress when 70% of the context window is used (default threshold).
+                - ``{"compression_threshold": float}``: compress at the specified ratio (0, 1].
+                - ``False`` or ``None``: disabled, only reactive overflow recovery is used.
 
         Raises:
             ValueError: If compression_threshold is not in the valid range (0, 1].
@@ -61,48 +80,28 @@ def __init__(self, *, compression_threshold: float | None = None) -> None:
               These represent messages provided by the user or LLM that have been removed, not messages
               included by the conversation manager through something like summarization.
         """
-        if compression_threshold is not None and (compression_threshold <= 0 or compression_threshold > 1):
+        # Resolve the threshold from proactive_compression parameter
+        if proactive_compression is True:
+            threshold: float | None = DEFAULT_COMPRESSION_THRESHOLD
+        elif isinstance(proactive_compression, dict):
+            threshold = proactive_compression.get("compression_threshold", DEFAULT_COMPRESSION_THRESHOLD)
+        else:
+            threshold = None
+
+        if threshold is not None and (threshold <= 0 or threshold > 1):
             raise ValueError(
-                f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {compression_threshold}"
+                f"compression_threshold must be between 0 (exclusive) and 1 (inclusive), got {threshold}"
             )
 
         self.removed_message_count = 0
-        self._compression_threshold = compression_threshold
+        self._compression_threshold = threshold
         self._context_window_limit_warned = False
 
-    def reduce_on_threshold(self, agent: "Agent", **kwargs: Any) -> bool:
-        """Proactively reduce the conversation history before a model call.
-
-        Called when projected input tokens exceed the configured compression_threshold
-        of the model's context window limit. Subclasses implement this to reduce
-        context before the model call, avoiding overflow errors.
-
-        The base class catches any exceptions raised by this method and logs them
-        at debug level, so subclass implementations do not need to defensively
-        swallow errors — they can let them propagate. When an exception occurs,
-        the return value is never observed by the caller.
-
-        The default implementation returns False. Subclasses that support proactive
-        compression should override this method.
-
-        Args:
-            agent: The agent whose conversation history will be reduced.
-                The agent's messages list should be modified in-place.
-            **kwargs: Additional keyword arguments for future extensibility.
-
-        Returns:
-            True if the history was reduced, False otherwise. Only observed on success;
-            if the method raises, the base class catches the exception and the return
-            value is ignored.
-        """
-        return False
-
     def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
         """Register hooks for agent lifecycle events.
 
-        When ``compression_threshold`` is configured and the subclass overrides
-        ``reduce_on_threshold``, registers a ``BeforeModelCallEvent`` hook for
-        proactive compression.
+        Always registers a ``BeforeModelCallEvent`` hook for proactive compression.
+        When ``proactive_compression`` is not configured, the handler is a no-op (early return).
 
         Derived classes that override this method must call the base implementation to ensure proper hook
         registration chain.
@@ -111,36 +110,31 @@ def register_hooks(self, registry: HookRegistry, **kwargs: Any) -> None:
             registry: The hook registry to register callbacks with.
             **kwargs: Additional keyword arguments for future extensibility.
         """
-        if self._compression_threshold is None:
-            return
-
-        # Check if the subclass actually overrides reduce_on_threshold
-        has_override = type(self).reduce_on_threshold is not ConversationManager.reduce_on_threshold
-        if not has_override:
-            logger.warning(
-                "conversation_manager=<%s> | compression_threshold is configured but reduce_on_threshold is not"
-                " implemented, proactive compression is disabled",
-                type(self).__name__,
-            )
-            return
-
+        # Always subscribe — the threshold check happens inside the handler
         registry.add_callback(BeforeModelCallEvent, self._on_before_model_call_threshold)
 
     def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
         """Handle BeforeModelCallEvent for proactive compression.
 
+        When proactive compression is not configured, this is a no-op.
+        When configured, checks projected input tokens against the context window limit
+        and calls reduce_context() without error (best-effort) when threshold is exceeded.
+
         Args:
             event: The before model call event.
         """
+        # Early return if proactive compression is not enabled
+        if self._compression_threshold is None:
+            return
+
         context_window_limit = event.agent.model.context_window_limit
         if context_window_limit is None:
             context_window_limit = DEFAULT_CONTEXT_WINDOW_LIMIT
             if not self._context_window_limit_warned:
                 self._context_window_limit_warned = True
                 logger.warning(
-                    "context_window_limit=<None>, default=<%s>"
-                    " | context_window_limit is not set on the model, using default"
-                    " | set context_window_limit in your model config for accurate threshold checks",
+                    "context_window_limit is not set on the model, using default of %s"
+                    " | set context_window_limit in your model config for accurate proactive compression",
                     DEFAULT_CONTEXT_WINDOW_LIMIT,
                 )
 
@@ -149,7 +143,7 @@ def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
             return
 
         ratio = event.projected_input_tokens / context_window_limit
-        if ratio >= self._compression_threshold:  # type: ignore[operator]
+        if ratio >= self._compression_threshold:
             logger.debug(
                 "projected_tokens=<%s>, limit=<%s>, ratio=<%.2f>, compression_threshold=<%s>"
                 " | compression threshold exceeded, reducing context",
@@ -158,8 +152,9 @@ def _on_before_model_call_threshold(self, event: BeforeModelCallEvent) -> None:
                 ratio,
                 self._compression_threshold,
             )
+            # Proactive compression is best-effort: swallow errors so the model call can still proceed.
             try:
-                self.reduce_on_threshold(agent=event.agent)
+                self.reduce_context(agent=event.agent)
             except Exception:
                 logger.debug("proactive compression failed, will proceed with model call", exc_info=True)
 
@@ -200,22 +195,24 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
 
     @abstractmethod
     def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
-        """Called when the model's context window is exceeded.
-
-        This method should implement the specific strategy for reducing the window size when a context overflow occurs.
-        It is typically called after a ContextWindowOverflowException is caught.
+        """Reduce the conversation history.
 
-        Implementations might use strategies such as:
+        Called in two scenarios:
+        1. **Reactive** (e is set): A context window overflow occurred. The implementation
+           MUST remove enough history for the next model call to succeed, or re-raise the error.
+        2. **Proactive** (e is None): The compression threshold was exceeded. This is best-effort —
+           returning without reduction or raising is acceptable; the model call proceeds regardless.
 
-        - Removing the N oldest messages
-        - Summarizing older context
-        - Applying importance-based filtering
-        - Maintaining critical conversation markers
+        Implementations should modify ``agent.messages`` in-place.
 
         Args:
             agent: The agent whose conversation history will be reduced.
                 This list is modified in-place.
             e: The exception that triggered the context reduction, if any.
+                When set, this is a reactive overflow recovery call — the implementation MUST
+                reduce enough history for the next model call to succeed.
+                When None, this is a proactive compression call — best-effort reduction to avoid
+                hitting the context window limit.
             **kwargs: Additional keyword arguments for future extensibility.
         """
         pass
@@ -1,7 +1,7 @@
 """Sliding window conversation history management."""
 
 import logging
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, Union
 
 if TYPE_CHECKING:
     from ...agent.agent import Agent
@@ -10,7 +10,7 @@
 from ...types.content import ContentBlock, Messages
 from ...types.exceptions import ContextWindowOverflowException
 from ...types.tools import ToolResultContent
-from .conversation_manager import ConversationManager
+from .conversation_manager import ConversationManager, ProactiveCompressionConfig
 
 logger = logging.getLogger(__name__)
 
@@ -37,7 +37,7 @@ def __init__(
         should_truncate_results: bool = True,
         *,
         per_turn: bool | int = False,
-        compression_threshold: float | None = None,
+        proactive_compression: Union[bool, ProactiveCompressionConfig, None] = None,
     ):
         """Initialize the sliding window conversation manager.
 
@@ -55,8 +55,10 @@ def __init__(
                 manage message history and prevent the agent loop from slowing down. Start with
                 per_turn=True and adjust to a specific frequency (e.g., per_turn=5) if needed
                 for performance tuning.
-            compression_threshold: Ratio of context window usage that triggers proactive compression.
-                See :class:`ConversationManager` for details.
+            proactive_compression: Enable proactive context compression before the model call.
+                - ``True``: compress when 70% of the context window is used (default threshold).
+                - ``{"compression_threshold": float}``: compress at the specified ratio (0, 1].
+                - ``False`` or ``None``: disabled, only reactive overflow recovery is used.
 
         Raises:
             ValueError: If window_size is negative, or if per_turn is 0 or a negative integer.
@@ -66,7 +68,7 @@ def __init__(
         if isinstance(per_turn, int) and not isinstance(per_turn, bool) and per_turn <= 0:
             raise ValueError(f"per_turn must be a positive integer, True, or False, got {per_turn}")
 
-        super().__init__(compression_threshold=compression_threshold)
+        super().__init__(proactive_compression=proactive_compression)
 
         self.window_size = window_size
         self.should_truncate_results = should_truncate_results
@@ -158,23 +160,15 @@ def apply_management(self, agent: "Agent", **kwargs: Any) -> None:
             return
         self.reduce_context(agent)
 
-    def reduce_on_threshold(self, agent: "Agent", **kwargs: Any) -> bool:
-        """Proactively reduce context by trimming oldest messages.
-
-        Args:
-            agent: The agent whose conversation history will be reduced.
-            **kwargs: Additional keyword arguments for future extensibility.
-
-        Returns:
-            True if the history was reduced, False otherwise.
-        """
-        initial_count = len(agent.messages)
-        self.reduce_context(agent)
-        return len(agent.messages) < initial_count
-
     def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: Any) -> None:
         """Trim the oldest messages to reduce the conversation context size.
 
+        When ``e`` is set (reactive overflow recovery), attempts to truncate large tool results
+        first before falling back to message trimming.
+
+        When ``e`` is None (proactive compression or routine management), only trims messages
+        without attempting tool result truncation.
+
         The method handles special cases where trimming the messages leads to:
          - toolResult with no corresponding toolUse
          - toolUse with no corresponding toolResult
@@ -183,12 +177,14 @@ def reduce_context(self, agent: "Agent", e: Exception | None = None, **kwargs: A
             agent: The agent whose messages will be reduce.
                 This list is modified in-place.
             e: The exception that triggered the context reduction, if any.
+                When set, this is a reactive overflow recovery call.
+                When None, this is a proactive or routine management call.
             **kwargs: Additional keyword arguments for future extensibility.
 
         Raises:
             ContextWindowOverflowException: If the context cannot be reduced further and a context overflow
-                error was provided (e is not None). When called during routine window management (e is None),
-                logs a warning and returns without modification.
+                error was provided (e is not None). When called during routine window management or
+                proactive compression (e is None), logs a warning and returns without modification.
         """
         messages = agent.messages