openai · scalabreseGD · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/docs/ref/extensions/sandbox/litellm_compaction.md b/docs/ref/extensions/sandbox/litellm_compaction.md
@@ -0,0 +1,6 @@
+# `LiteLLM Compaction`
+
+::: agents.extensions.sandbox.litellm_compaction
+    options:
+        members:
+            - LiteLLMCompaction
diff --git a/docs/sandbox/guide.md b/docs/sandbox/guide.md
@@ -202,6 +202,8 @@ Built-in capabilities include:
 
 By default, `SandboxAgent.capabilities` uses `Capabilities.default()`, which includes `Filesystem()`, `Shell()`, and `Compaction()`. If you pass `capabilities=[...]`, that list replaces the default, so include any default capabilities you still want.
 
+`Compaction` sizes its dynamic policy from a built-in OpenAI-only context-window registry. If you route requests through litellm to a non-OpenAI model (Anthropic, Bedrock, Vertex, custom proxy aliases, ...), the lookup misses and the policy falls back to a static 240k threshold regardless of the model's real input window. For those deployments use [`LiteLLMCompaction`][agents.extensions.sandbox.litellm_compaction.LiteLLMCompaction] instead — it resolves the cap through `litellm.get_model_info()` and exposes `LiteLLMCompaction.for_model(...)` and `LiteLLMCompaction.for_context_window(...)` factories. Requires the `openai-agents[litellm]` extra.
+
 For skills, choose the source based on how you want them materialized:
 
 - `Skills(lazy_from=LocalDirLazySkillSource(...))` is a good default for larger local skill directories because the model can discover the index first and load only what it needs.

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -186,6 +186,8 @@ plugins:
                         - Any-LLM provider: ref/extensions/models/any_llm_provider.md
                         - LiteLLM model: ref/extensions/models/litellm_model.md
                         - LiteLLM provider: ref/extensions/models/litellm_provider.md
+                    - Sandbox capabilities:
+                        - LiteLLM compaction: ref/extensions/sandbox/litellm_compaction.md
                     - Tool output trimmer: ref/extensions/tool_output_trimmer.md
                     - SQLAlchemySession: ref/extensions/memory/sqlalchemy_session.md
                     - Async SQLite session: ref/extensions/memory/async_sqlite_session.md

diff --git a/src/agents/extensions/sandbox/__init__.py b/src/agents/extensions/sandbox/__init__.py
@@ -109,6 +109,13 @@
 except Exception:  # pragma: no cover
     _HAS_VERCEL = False
 
+try:
+    from .litellm_compaction import LiteLLMCompaction as LiteLLMCompaction
+
+    _HAS_LITELLM_COMPACTION = True
+except Exception:  # pragma: no cover
+    _HAS_LITELLM_COMPACTION = False
+
 __all__: list[str] = []
 
 if _HAS_E2B:
@@ -207,3 +214,6 @@
             "RunloopUserParameters",
         ]
     )
+
+if _HAS_LITELLM_COMPACTION:
+    __all__.append("LiteLLMCompaction")
diff --git a/src/agents/extensions/sandbox/litellm_compaction.py b/src/agents/extensions/sandbox/litellm_compaction.py
@@ -0,0 +1,155 @@
+"""Compaction capability sized via the litellm model registry.
+
+The default :class:`agents.sandbox.capabilities.Compaction` ships an
+OpenAI-only context-window registry. When a litellm-routed model
+(Anthropic, Bedrock, Vertex, custom proxy aliases, etc.) is used,
+``Compaction.sampling_params`` cannot pick a
+:class:`DynamicCompactionPolicy` -- the OpenAI lookup misses -- and
+falls back to a hard-coded :class:`StaticCompactionPolicy` regardless
+of the model's actual input window.
+
+:class:`LiteLLMCompaction` short-circuits that by composing a
+:class:`DynamicCompactionPolicy` whose
+:class:`CompactionModelInfo` carries a context window resolved through
+``litellm.get_model_info()``. Two factories cover the common call shapes:
+
+* :meth:`LiteLLMCompaction.for_model` -- when the caller only knows the
+  model identifier. Performs the litellm lookup internally.
+* :meth:`LiteLLMCompaction.for_context_window` -- when the caller has
+  already resolved the cap (for example, after applying a per-org
+  ceiling or test override). Useful when an external configuration
+  layer wants to clamp the compaction threshold below the litellm-
+  reported window.
+
+Both factories accept an optional ``threshold`` fraction (default
+``0.8``) -- triggering compaction this early leaves headroom for the
+latest turn and the completion after older turns have been summarised.
+"""
+
+from __future__ import annotations
+
+import logging
+
+from typing_extensions import Self
+
+from agents.sandbox.capabilities import (
+    Compaction,
+    CompactionModelInfo,
+    DynamicCompactionPolicy,
+)
+
+from ..memory._optional_imports import raise_optional_dependency_error
+
+try:
+    import litellm
+except ImportError as _e:
+    raise_optional_dependency_error(
+        "LiteLLMCompaction",
+        dependency_name="litellm",
+        extra_name="litellm",
+        cause=_e,
+    )
+
+
+logger = logging.getLogger(__name__)
+
+
+# Default threshold fraction. ``0.8`` triggers compaction with ~20%
+# headroom for the latest turn plus completion after older turns have
+# been summarised. Slightly more conservative than the upstream
+# ``DynamicCompactionPolicy`` default of ``0.9`` so a tool response is
+# less likely to be truncated mid-stream.
+_DEFAULT_THRESHOLD_FRACTION = 0.8
+
+
+# Conservative fallback when litellm has no entry for the model (brand
+# new beta identifier, custom proxy alias, etc.). Sized to a 200k input
+# window so a Claude-family deployment still benefits from compaction
+# at a reasonable point; for a smaller model the operator will see the
+# WARNING below and can pin the cap explicitly via
+# :meth:`LiteLLMCompaction.for_context_window`.
+_FALLBACK_CONTEXT_WINDOW = 200_000
+
+
+def _litellm_context_window(model: str) -> int:
+    """Resolve ``model``'s input context window via litellm.
+
+    Falls back to :data:`_FALLBACK_CONTEXT_WINDOW` with a WARNING when
+    litellm has not catalogued ``model``; the capability still
+    functions (compaction kicks in at the fallback threshold) instead
+    of failing the run.
+    """
+
+    try:
+        info = litellm.get_model_info(model)
+    except Exception as exc:  # noqa: BLE001 - litellm raises bare Exception.
+        logger.warning(
+            "litellm has no model info for %r (%s); LiteLLMCompaction "
+            "falling back to context_window=%d.",
+            model,
+            exc,
+            _FALLBACK_CONTEXT_WINDOW,
+        )
+        return _FALLBACK_CONTEXT_WINDOW
+
+    # litellm's TypedDict marks both caps optional (Bedrock embedding
+    # entries omit them, for example) so defend against None even on
+    # the happy path.
+    return int(info.get("max_input_tokens") or _FALLBACK_CONTEXT_WINDOW)
+
+
+class LiteLLMCompaction(Compaction):
+    """:class:`Compaction` whose default policy is sized for litellm models.
+
+    Drop-in replacement for :class:`agents.sandbox.capabilities.Compaction`.
+    Subclassing keeps the parent's sampling-params serialiser and
+    :meth:`process_context` truncation behaviour intact; the only thing
+    this class changes is how the :class:`DynamicCompactionPolicy`'s
+    :class:`CompactionModelInfo` gets its ``context_window`` value --
+    via litellm rather than the OpenAI-only registry.
+
+    Construct via :meth:`for_model` or :meth:`for_context_window`;
+    direct ``LiteLLMCompaction(policy=...)`` construction is also
+    supported and behaves identically to the parent class (the
+    classmethods are convenience wrappers, not the only entry point).
+    """
+
+    @classmethod
+    def for_model(
+        cls,
+        model: str,
+        *,
+        threshold: float = _DEFAULT_THRESHOLD_FRACTION,
+    ) -> Self:
+        """Build sized to ``model``'s litellm-reported context window.
+
+        Prefer this when the caller only carries the model identifier
+        (for example, inside a tool that does not see the application
+        configuration). If you need an external clamp (per-org
+        ceiling, test override, etc.) to flow through, use
+        :meth:`for_context_window` instead.
+        """
+
+        return cls.for_context_window(_litellm_context_window(model), threshold=threshold)
+
+    @classmethod
+    def for_context_window(
+        cls,
+        context_window: int,
+        *,
+        threshold: float = _DEFAULT_THRESHOLD_FRACTION,
+    ) -> Self:
+        """Build sized to an explicitly-provided ``context_window``.
+
+        Use this when an external configuration layer has already
+        resolved the input cap (for example, after applying a per-org
+        ceiling that should clamp the compaction threshold below the
+        litellm-reported window).
+        """
+
+        return cls(
+            policy=DynamicCompactionPolicy(
+                model_info=CompactionModelInfo(context_window=context_window),
+                threshold=threshold,
+            )
+        )
diff --git a/tests/extensions/sandbox/test_litellm_compaction.py b/tests/extensions/sandbox/test_litellm_compaction.py
@@ -0,0 +1,127 @@
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from agents.extensions.sandbox.litellm_compaction import (
+    _FALLBACK_CONTEXT_WINDOW,
+    LiteLLMCompaction,
+)
+from agents.sandbox.capabilities import (
+    CompactionModelInfo,
+    DynamicCompactionPolicy,
+    StaticCompactionPolicy,
+)
+
+
+class TestLiteLLMCompactionForContextWindow:
+    def test_builds_dynamic_policy_with_default_threshold(self) -> None:
+        capability = LiteLLMCompaction.for_context_window(500_000)
+
+        assert isinstance(capability, LiteLLMCompaction)
+        policy = capability.policy
+        assert isinstance(policy, DynamicCompactionPolicy)
+        assert policy.model_info == CompactionModelInfo(context_window=500_000)
+        # Default threshold is intentionally slightly more conservative than
+        # the upstream ``DynamicCompactionPolicy`` default of ``0.9``.
+        assert policy.threshold == pytest.approx(0.8)
+
+    def test_threshold_kwarg_is_propagated(self) -> None:
+        capability = LiteLLMCompaction.for_context_window(1_000_000, threshold=0.5)
+
+        policy = capability.policy
+        assert isinstance(policy, DynamicCompactionPolicy)
+        assert policy.threshold == pytest.approx(0.5)
+
+    def test_sampling_params_uses_resolved_window(self) -> None:
+        capability = LiteLLMCompaction.for_context_window(1_000_000, threshold=0.5)
+
+        sampling_params = capability.sampling_params({"model": "anthropic/claude-3-5-sonnet"})
+
+        assert sampling_params == {
+            "context_management": [
+                {
+                    "type": "compaction",
+                    "compact_threshold": 500_000,
+                }
+            ]
+        }
+
+
+class TestLiteLLMCompactionForModel:
+    def test_uses_litellm_max_input_tokens(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        captured: dict[str, Any] = {}
+
+        def fake_get_model_info(model: str) -> dict[str, Any]:
+            captured["model"] = model
+            return {"max_input_tokens": 200_000}
+
+        monkeypatch.setattr(
+            "agents.extensions.sandbox.litellm_compaction.litellm.get_model_info",
+            fake_get_model_info,
+        )
+
+        capability = LiteLLMCompaction.for_model("anthropic/claude-3-5-sonnet")
+
+        assert captured["model"] == "anthropic/claude-3-5-sonnet"
+        policy = capability.policy
+        assert isinstance(policy, DynamicCompactionPolicy)
+        assert policy.model_info.context_window == 200_000
+
+    def test_threshold_kwarg_is_propagated(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        monkeypatch.setattr(
+            "agents.extensions.sandbox.litellm_compaction.litellm.get_model_info",
+            lambda model: {"max_input_tokens": 400_000},
+        )
+
+        capability = LiteLLMCompaction.for_model("vertex_ai/gemini-1.5-pro", threshold=0.6)
+
+        policy = capability.policy
+        assert isinstance(policy, DynamicCompactionPolicy)
+        assert policy.threshold == pytest.approx(0.6)
+
+    def test_falls_back_when_litellm_raises(self, monkeypatch: pytest.MonkeyPatch) -> None:
+        def boom(model: str) -> dict[str, Any]:
+            raise Exception(f"no model info for {model}")
+
+        monkeypatch.setattr(
+            "agents.extensions.sandbox.litellm_compaction.litellm.get_model_info",
+            boom,
+        )
+
+        capability = LiteLLMCompaction.for_model("custom-proxy/some-alias")
+
+        policy = capability.policy
+        assert isinstance(policy, DynamicCompactionPolicy)
+        assert policy.model_info.context_window == _FALLBACK_CONTEXT_WINDOW
+
+    def test_falls_back_when_max_input_tokens_is_missing(
+        self, monkeypatch: pytest.MonkeyPatch
+    ) -> None:
+        monkeypatch.setattr(
+            "agents.extensions.sandbox.litellm_compaction.litellm.get_model_info",
+            lambda model: {"max_input_tokens": None},
+        )
+
+        capability = LiteLLMCompaction.for_model("bedrock/embedding-model")
+
+        policy = capability.policy
+        assert isinstance(policy, DynamicCompactionPolicy)
+        assert policy.model_info.context_window == _FALLBACK_CONTEXT_WINDOW
+
+
+class TestLiteLLMCompactionDirectConstruction:
+    def test_accepts_explicit_policy_like_parent(self) -> None:
+        capability = LiteLLMCompaction(policy=StaticCompactionPolicy(threshold=42))
+
+        sampling_params = capability.sampling_params({})
+
+        assert sampling_params == {
+            "context_management": [
+                {
+                    "type": "compaction",
+                    "compact_threshold": 42,
+                }
+            ]
+        }