google-gemini
diff --git a/‎docs/cli/settings.md‎
Lines changed: 12 additions & 15 deletions b/‎docs/cli/settings.md‎
Lines changed: 12 additions & 15 deletions
diff --git a/‎docs/reference/configuration.md‎
Lines changed: 45 additions & 19 deletions b/‎docs/reference/configuration.md‎
Lines changed: 45 additions & 19 deletions
diff --git a/‎packages/a2a-server/src/utils/testing_utils.ts‎
Lines changed: 2 additions & 6 deletions b/‎packages/a2a-server/src/utils/testing_utils.ts‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎packages/cli/src/config/config.ts‎
Lines changed: 4 additions & 8 deletions b/‎packages/cli/src/config/config.ts‎
Lines changed: 4 additions & 8 deletions
diff --git a/‎packages/cli/src/config/settingsSchema.ts‎
Lines changed: 115 additions & 34 deletions b/‎packages/cli/src/config/settingsSchema.ts‎
Lines changed: 115 additions & 34 deletions
@@ -155,21 +155,18 @@ they appear in the UI.
 
 ### Experimental
 
-| UI Label                           | Setting                                        | Description                                                                                                                                               | Default |
-| ---------------------------------- | ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
-| Enable Tool Output Masking         | `experimental.toolOutputMasking.enabled`       | Enables tool output masking to save tokens.                                                                                                               | `true`  |
-| Enable Git Worktrees               | `experimental.worktrees`                       | Enable automated Git worktree management for parallel work.                                                                                               | `false` |
-| Use OSC 52 Paste                   | `experimental.useOSC52Paste`                   | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
-| Use OSC 52 Copy                    | `experimental.useOSC52Copy`                    | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
-| Plan                               | `experimental.plan`                            | Enable Plan Mode.                                                                                                                                         | `true`  |
-| Model Steering                     | `experimental.modelSteering`                   | Enable model steering (user hints) to guide the model during tool execution.                                                                              | `false` |
-| Direct Web Fetch                   | `experimental.directWebFetch`                  | Enable web fetch behavior that bypasses LLM summarization.                                                                                                | `false` |
-| Memory Manager Agent               | `experimental.memoryManager`                   | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.             | `false` |
-| Agent History Truncation           | `experimental.agentHistoryTruncation`          | Enable truncation window logic for the Agent History Provider.                                                                                            | `false` |
-| Agent History Truncation Threshold | `experimental.agentHistoryTruncationThreshold` | The maximum number of messages before history is truncated.                                                                                               | `30`    |
-| Agent History Retained Messages    | `experimental.agentHistoryRetainedMessages`    | The number of recent messages to retain after truncation.                                                                                                 | `15`    |
-| Agent History Summarization        | `experimental.agentHistorySummarization`       | Enable summarization of truncated content via a small model for the Agent History Provider.                                                               | `false` |
-| Topic & Update Narration           | `experimental.topicUpdateNarration`            | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting.                                      | `false` |
+| UI Label                   | Setting                                  | Description                                                                                                                                               | Default |
+| -------------------------- | ---------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
+| Enable Tool Output Masking | `experimental.toolOutputMasking.enabled` | Enables tool output masking to save tokens.                                                                                                               | `true`  |
+| Enable Git Worktrees       | `experimental.worktrees`                 | Enable automated Git worktree management for parallel work.                                                                                               | `false` |
+| Use OSC 52 Paste           | `experimental.useOSC52Paste`             | Use OSC 52 for pasting. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
+| Use OSC 52 Copy            | `experimental.useOSC52Copy`              | Use OSC 52 for copying. This may be more robust than the default system when using remote terminal sessions (if your terminal is configured to allow it). | `false` |
+| Plan                       | `experimental.plan`                      | Enable Plan Mode.                                                                                                                                         | `true`  |
+| Model Steering             | `experimental.modelSteering`             | Enable model steering (user hints) to guide the model during tool execution.                                                                              | `false` |
+| Direct Web Fetch           | `experimental.directWebFetch`            | Enable web fetch behavior that bypasses LLM summarization.                                                                                                | `false` |
+| Memory Manager Agent       | `experimental.memoryManager`             | Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.             | `false` |
+| Enable Context Management  | `experimental.contextManagement`         | Enable logic for context management.                                                                                                                      | `false` |
+| Topic & Update Narration   | `experimental.topicUpdateNarration`      | Enable the experimental Topic & Update communication model for reduced chattiness and structured progress reporting.                                      | `false` |
 
 ### Skills
 
 
@@ -1702,25 +1702,8 @@ their corresponding top-level category object in your `settings.json` file.
   - **Default:** `false`
   - **Requires restart:** Yes
 
-- **`experimental.agentHistoryTruncation`** (boolean):
-  - **Description:** Enable truncation window logic for the Agent History
-    Provider.
-  - **Default:** `false`
-  - **Requires restart:** Yes
-
-- **`experimental.agentHistoryTruncationThreshold`** (number):
-  - **Description:** The maximum number of messages before history is truncated.
-  - **Default:** `30`
-  - **Requires restart:** Yes
-
-- **`experimental.agentHistoryRetainedMessages`** (number):
-  - **Description:** The number of recent messages to retain after truncation.
-  - **Default:** `15`
-  - **Requires restart:** Yes
-
-- **`experimental.agentHistorySummarization`** (boolean):
-  - **Description:** Enable summarization of truncated content via a small model
-    for the Agent History Provider.
+- **`experimental.contextManagement`** (boolean):
+  - **Description:** Enable logic for context management.
   - **Default:** `false`
   - **Requires restart:** Yes
 
@@ -1815,6 +1798,49 @@ their corresponding top-level category object in your `settings.json` file.
     prioritize available tools dynamically.
   - **Default:** `[]`
 
+#### `contextManagement`
+
+- **`contextManagement.historyWindow.maxTokens`** (number):
+  - **Description:** The number of tokens to allow before triggering
+    compression.
+  - **Default:** `150000`
+  - **Requires restart:** Yes
+
+- **`contextManagement.historyWindow.retainedTokens`** (number):
+  - **Description:** The number of tokens to always retain.
+  - **Default:** `40000`
+  - **Requires restart:** Yes
+
+- **`contextManagement.messageLimits.normalMaxTokens`** (number):
+  - **Description:** The target number of tokens to budget for a normal
+    conversation turn.
+  - **Default:** `2500`
+  - **Requires restart:** Yes
+
+- **`contextManagement.messageLimits.retainedMaxTokens`** (number):
+  - **Description:** The maximum number of tokens a single conversation turn can
+    consume before truncation.
+  - **Default:** `12000`
+  - **Requires restart:** Yes
+
+- **`contextManagement.messageLimits.normalizationHeadRatio`** (number):
+  - **Description:** The ratio of tokens to retain from the beginning of a
+    truncated message (0.0 to 1.0).
+  - **Default:** `0.25`
+  - **Requires restart:** Yes
+
+- **`contextManagement.toolDistillation.maxOutputTokens`** (number):
+  - **Description:** Maximum tokens to show when truncating large tool outputs.
+  - **Default:** `10000`
+  - **Requires restart:** Yes
+
+- **`contextManagement.toolDistillation.summarizationThresholdTokens`**
+  (number):
+  - **Description:** Threshold above which truncated tool outputs will be
+    summarized by an LLM.
+  - **Default:** `20000`
+  - **Requires restart:** Yes
+
 #### `admin`
 
 - **`admin.secureModeEnabled`** (boolean):
 
@@ -109,12 +109,8 @@ export function createMockConfig(
         enableEnvironmentVariableRedaction: false,
       },
     }),
-    isExperimentalAgentHistoryTruncationEnabled: vi.fn().mockReturnValue(false),
-    getExperimentalAgentHistoryTruncationThreshold: vi.fn().mockReturnValue(50),
-    getExperimentalAgentHistoryRetainedMessages: vi.fn().mockReturnValue(30),
-    isExperimentalAgentHistorySummarizationEnabled: vi
-      .fn()
-      .mockReturnValue(false),
+    isAutoDistillationEnabled: vi.fn().mockReturnValue(false),
+    getContextManagementConfig: vi.fn().mockReturnValue({ enabled: false }),
     ...overrides,
   } as unknown as Config;
 
 
@@ -977,14 +977,10 @@ export async function loadCliConfig(
     disabledSkills: settings.skills?.disabled,
     experimentalJitContext: settings.experimental?.jitContext,
     experimentalMemoryManager: settings.experimental?.memoryManager,
-    experimentalAgentHistoryTruncation:
-      settings.experimental?.agentHistoryTruncation,
-    experimentalAgentHistoryTruncationThreshold:
-      settings.experimental?.agentHistoryTruncationThreshold,
-    experimentalAgentHistoryRetainedMessages:
-      settings.experimental?.agentHistoryRetainedMessages,
-    experimentalAgentHistorySummarization:
-      settings.experimental?.agentHistorySummarization,
+    contextManagement: {
+      enabled: settings.experimental?.contextManagement,
+      ...settings?.contextManagement,
+    },
     modelSteering: settings.experimental?.modelSteering,
     topicUpdateNarration: settings.experimental?.topicUpdateNarration,
     toolOutputMasking: settings.experimental?.toolOutputMasking,
 
@@ -2169,44 +2169,13 @@ const SETTINGS_SCHEMA = {
           'Replace the built-in save_memory tool with a memory manager subagent that supports adding, removing, de-duplicating, and organizing memories.',
         showInDialog: true,
       },
-      agentHistoryTruncation: {
+      contextManagement: {
         type: 'boolean',
-        label: 'Agent History Truncation',
+        label: 'Enable Context Management',
         category: 'Experimental',
         requiresRestart: true,
         default: false,
-        description:
-          'Enable truncation window logic for the Agent History Provider.',
-        showInDialog: true,
-      },
-      agentHistoryTruncationThreshold: {
-        type: 'number',
-        label: 'Agent History Truncation Threshold',
-        category: 'Experimental',
-        requiresRestart: true,
-        default: 30,
-        description:
-          'The maximum number of messages before history is truncated.',
-        showInDialog: true,
-      },
-      agentHistoryRetainedMessages: {
-        type: 'number',
-        label: 'Agent History Retained Messages',
-        category: 'Experimental',
-        requiresRestart: true,
-        default: 15,
-        description:
-          'The number of recent messages to retain after truncation.',
-        showInDialog: true,
-      },
-      agentHistorySummarization: {
-        type: 'boolean',
-        label: 'Agent History Summarization',
-        category: 'Experimental',
-        requiresRestart: true,
-        default: false,
-        description:
-          'Enable summarization of truncated content via a small model for the Agent History Provider.',
+        description: 'Enable logic for context management.',
         showInDialog: true,
       },
       topicUpdateNarration: {
@@ -2485,6 +2454,118 @@ const SETTINGS_SCHEMA = {
     },
   },
 
+  contextManagement: {
+    type: 'object',
+    label: 'Context Management',
+    category: 'Experimental',
+    requiresRestart: true,
+    default: {},
+    description:
+      'Settings for agent history and tool distillation context management.',
+    showInDialog: false,
+    properties: {
+      historyWindow: {
+        type: 'object',
+        label: 'History Window Settings',
+        category: 'Context Management',
+        requiresRestart: true,
+        default: {},
+        showInDialog: false,
+        properties: {
+          maxTokens: {
+            type: 'number',
+            label: 'Max Tokens',
+            category: 'Context Management',
+            requiresRestart: true,
+            default: 150_000,
+            description:
+              'The number of tokens to allow before triggering compression.',
+            showInDialog: false,
+          },
+          retainedTokens: {
+            type: 'number',
+            label: 'Retained Tokens',
+            category: 'Context Management',
+            requiresRestart: true,
+            default: 40_000,
+            description: 'The number of tokens to always retain.',
+            showInDialog: false,
+          },
+        },
+      },
+      messageLimits: {
+        type: 'object',
+        label: 'Message Limits',
+        category: 'Context Management',
+        requiresRestart: true,
+        default: {},
+        showInDialog: false,
+        properties: {
+          normalMaxTokens: {
+            type: 'number',
+            label: 'Normal Maximum Tokens',
+            category: 'Context Management',
+            requiresRestart: true,
+            default: 2500,
+            description:
+              'The target number of tokens to budget for a normal conversation turn.',
+            showInDialog: false,
+          },
+          retainedMaxTokens: {
+            type: 'number',
+            label: 'Retained Maximum Tokens',
+            category: 'Context Management',
+            requiresRestart: true,
+            default: 12000,
+            description:
+              'The maximum number of tokens a single conversation turn can consume before truncation.',
+            showInDialog: false,
+          },
+          normalizationHeadRatio: {
+            type: 'number',
+            label: 'Normalization Head Ratio',
+            category: 'Context Management',
+            requiresRestart: true,
+            default: 0.25,
+            description:
+              'The ratio of tokens to retain from the beginning of a truncated message (0.0 to 1.0).',
+            showInDialog: false,
+          },
+        },
+      },
+      toolDistillation: {
+        type: 'object',
+        label: 'Tool Distillation',
+        category: 'Context Management',
+        requiresRestart: true,
+        default: {},
+        showInDialog: false,
+        properties: {
+          maxOutputTokens: {
+            type: 'number',
+            label: 'Max Output Tokens',
+            category: 'Context Management',
+            requiresRestart: true,
+            default: 10_000,
+            description:
+              'Maximum tokens to show when truncating large tool outputs.',
+            showInDialog: false,
+          },
+          summarizationThresholdTokens: {
+            type: 'number',
+            label: 'Tool Summarization Threshold',
+            category: 'Context Management',
+            requiresRestart: true,
+            default: 20_000,
+            description:
+              'Threshold above which truncated tool outputs will be summarized by an LLM.',
+            showInDialog: false,
+          },
+        },
+      },
+    },
+  },
+
   admin: {
     type: 'object',
     label: 'Admin',