QwenLM · Dinsmoor · May 13, 2026 · wenshao · May 14, 2026 · wenshao
diff --git a/packages/core/src/agents/runtime/agent-core.ts b/packages/core/src/agents/runtime/agent-core.ts
@@ -1349,6 +1349,28 @@ export class AgentCore {
     return this.messages;
   }
 
+  /**
+   * Prune oldest messages from the message history when `heapUsed` exceeds
+   * 1.5 GB. Returns the number of messages pruned (0 if no pruning was
+   * needed). Call this after each reasoning round to prevent unbounded
+   * memory growth in long-lived interactive sessions.
+   *
+   * Messages are pruned in chunks — removing ~20% of the oldest messages
+   * each time, which naturally scales with memory pressure rather than
+   * a hard count cap. This catches the actual root cause (heap usage)
+   * regardless of how the memory ballooned (many small entries vs few huge).
+   */
+  pruneMessages(): number {
-  pruneMessages(): number {
+  pruneMessages(): number {
+    const threshold = 1.5 * 1024 * 1024 * 1024;
+    if (process.memoryUsage().heapUsed < threshold) {
+      return 0;
+    }
+    const toPrune = Math.max(1, Math.ceil(this.messages.length * 0.2));
+    // Protect system/tool messages at the head of history.
+    const pruneFrom = Math.min(2, this.messages.length);
+    const actualPrune = Math.min(toPrune, this.messages.length - pruneFrom);
+    this.messages.splice(pruneFrom, actualPrune);
+    return actualPrune;
+  }
-  pruneMessages(): number {
+  pruneMessages(): number {
+    const threshold = 1.5 * 1024 * 1024 * 1024;
+    if (process.memoryUsage().heapUsed < threshold) {
+      return 0;
+    }
+    const toPrune = Math.max(1, Math.ceil(this.messages.length * 0.2));
+    // Protect system/tool messages at the head of history.
+    const pruneFrom = Math.min(2, this.messages.length);
+    const actualPrune = Math.min(toPrune, this.messages.length - pruneFrom);
+    this.messages.splice(pruneFrom, actualPrune);
+    return actualPrune;
+  }
+    const threshold = 1.5 * 1024 * 1024 * 1024; // 1.5 GB
+    if (process.memoryUsage().heapUsed < threshold) {
+      return 0;
+    }
+    // Remove ~20% of the oldest messages to relieve heap pressure.
-    // Remove ~20% of the oldest messages to relieve heap pressure.
+  pruneMessages(): number {
+    const threshold = 1.5 * 1024 * 1024 * 1024;
+    if (process.memoryUsage().heapUsed < threshold) {
+      return 0;
+    }
+    const toPrune = Math.max(1, Math.ceil(this.messages.length * 0.2));
+    // Also prune the corresponding GeminiChat history entries to actually
+    // reduce heap pressure. Without this, only the UI log shrinks while
+    // the real memory hog (Content[] with Part[]) keeps growing.
+    this.chat?.truncateHistory(toPrune);
+    this.messages.splice(0, toPrune);
+    return toPrune;
+  }
-    // Remove ~20% of the oldest messages to relieve heap pressure.
+  pruneMessages(): number {
+    // ... existing threshold check ...
+    const toPrune = Math.max(1, Math.ceil(this.messages.length * 0.2));
+    this.messages.splice(0, toPrune);
+    // Notify the UI so users understand context was trimmed
+    this.pushMessage?.({
+      role: 'info',
+      content: `Trimmed ${toPrune} oldest messages to manage memory.`,
+    });
+    return toPrune;
+  }
-    // Remove ~20% of the oldest messages to relieve heap pressure.
+  pruneMessages(): number {
+    const threshold = 1.5 * 1024 * 1024 * 1024;
+    if (process.memoryUsage().heapUsed < threshold) {
+      return 0;
+    }
+    const toPrune = Math.max(1, Math.ceil(this.messages.length * 0.2));
+    // Also prune the corresponding GeminiChat history entries to actually
+    // reduce heap pressure. Without this, only the UI log shrinks while
+    // the real memory hog (Content[] with Part[]) keeps growing.
+    this.chat?.truncateHistory(toPrune);
+    this.messages.splice(0, toPrune);
+    return toPrune;
+  }
-    // Remove ~20% of the oldest messages to relieve heap pressure.
+  pruneMessages(): number {
+    // ... existing threshold check ...
+    const toPrune = Math.max(1, Math.ceil(this.messages.length * 0.2));
+    this.messages.splice(0, toPrune);
+    // Notify the UI so users understand context was trimmed
+    this.pushMessage?.({
+      role: 'info',
+      content: `Trimmed ${toPrune} oldest messages to manage memory.`,
+    });
+    return toPrune;
+  }
+    const toPrune = Math.max(1, Math.ceil(this.messages.length * 0.2));
+    this.messages.splice(0, toPrune);
+    return toPrune;
+  }
+
   /**
    * Tool calls currently awaiting user approval. Mutated by
    * AgentInteractive's TOOL_WAITING_APPROVAL handler; headless agents

diff --git a/packages/core/src/agents/runtime/agent-interactive.ts b/packages/core/src/agents/runtime/agent-interactive.ts
@@ -123,6 +123,10 @@ export class AgentInteractive {
       while (message !== null && !this.masterAbortController.signal.aborted) {
         this.addMessage('user', message);
         await this.runOneRound(message);
+        // Prune old messages to prevent unbounded memory growth in
+        // long-lived interactive sessions (81+ minute sessions with
+        // hundreds of rounds can hit 4 GB without pruning).
+        this.core.pruneMessages();
-        this.core.pruneMessages();
+      while (message !== null && !this.masterAbortController.signal.aborted) {
+        // Prune BEFORE the round to ensure headroom for tool outputs.
+        this.core.pruneMessages();
+        this.addMessage('user', message);
+        await this.runOneRound(message);
+        message = this.queue.dequeue();
+      }
-        this.core.pruneMessages();
+// In the test file's createMockCore():
+pruneMessages: vi.fn().mockReturnValue(0),
-        this.core.pruneMessages();
+      while (message !== null && !this.masterAbortController.signal.aborted) {
+        // Prune BEFORE the round to ensure headroom for tool outputs.
+        this.core.pruneMessages();
+        this.addMessage('user', message);
+        await this.runOneRound(message);
+        message = this.queue.dequeue();
+      }
-        this.core.pruneMessages();
+// In the test file's createMockCore():
+pruneMessages: vi.fn().mockReturnValue(0),
         message = this.queue.dequeue();
       }
 

diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts
@@ -388,6 +388,19 @@ export class GeminiChat {
   // model.
   private sendPromise: Promise<void> = Promise.resolve();
 
+  /**
+   * Heap memory threshold (2 GB). When `heapUsed` exceeds this, chat
+   * compression is forced regardless of `hasFailedCompressionAttempt`.
+   * This is a memory safety net independent of the 70% token compaction
+   * threshold — catching the actual root cause (heap pressure) rather than
+   * a proxy (entry count). Protects against both many small entries AND
+   * few huge entries (large file reads, shell outputs).
+   *
+   * Note: set below Node's default 4 GB heap limit so there is headroom
+   * for one more GC cycle before the process is killed.
+   */
+  private static readonly HEAP_MEMORY_THRESHOLD = 2 * 1024 * 1024 * 1024; // 2 GB
+
-
+  private static readonly HEAP_MEMORY_THRESHOLD = (() => {
+    try {
+      return (v8.getHeapStatistics().heap_size_limit * 0.7) | 0;
+    } catch {
+      return 2 * 1024 * 1024 * 1024;
+    }
+  })();
-
+  private static readonly HEAP_MEMORY_THRESHOLD = (() => {
+    try {
+      return (v8.getHeapStatistics().heap_size_limit * 0.7) | 0;
+    } catch {
+      return 2 * 1024 * 1024 * 1024;
+    }
+  })();
   /**
    * Per-chat last-prompt-token-count, populated from `usageMetadata` on each
    * model response. Used by the compaction threshold check so that subagents
@@ -464,6 +477,18 @@ export class GeminiChat {
     signal?: AbortSignal,
     options?: TryCompressOptions,
   ): Promise<ChatCompressionInfo> {
+    // Force compression when heapUsed exceeds the memory threshold,
+    // regardless of `hasFailedCompressionAttempt`. This is a memory
+    // safety net — catches the actual root cause (heap pressure) rather
+    // than a proxy (entry count). Protects against both many small entries
+    // AND few huge entries (large file reads, shell outputs).
-    // AND few huge entries (large file reads, shell outputs).
+    const originalForce = force;
+    if (
+      !force &&
+      process.memoryUsage().heapUsed > GeminiChat.HEAP_MEMORY_THRESHOLD
+    ) {
+      force = true;
+    }
+
+    // ... later, in the failure path:
+    // Use originalForce instead of force for the latch decision
+    if (!originalForce) {
+      this.hasFailedCompressionAttempt = true;
+    }
-    // AND few huge entries (large file reads, shell outputs).
+    if (
+      !force &&
+      process.memoryUsage().heapUsed > GeminiChat.HEAP_MEMORY_THRESHOLD
+    ) {
+      force = true;
+      options = { ...options, trigger: 'auto' };
+    }
-    // AND few huge entries (large file reads, shell outputs).
+    const originalForce = force;
+    if (
+      !force &&
+      process.memoryUsage().heapUsed > GeminiChat.HEAP_MEMORY_THRESHOLD
+    ) {
+      force = true;
+    }
+
+    // ... later, in the failure path:
+    // Use originalForce instead of force for the latch decision
+    if (!originalForce) {
+      this.hasFailedCompressionAttempt = true;
+    }
-    // AND few huge entries (large file reads, shell outputs).
+    if (
+      !force &&
+      process.memoryUsage().heapUsed > GeminiChat.HEAP_MEMORY_THRESHOLD
+    ) {
+      force = true;
+      options = { ...options, trigger: 'auto' };
+    }
+    if (
+      !force &&
+      process.memoryUsage().heapUsed > GeminiChat.HEAP_MEMORY_THRESHOLD
-      process.memoryUsage().heapUsed > GeminiChat.HEAP_MEMORY_THRESHOLD
+    if (
+      !force &&
+      !this.hasFailedHeapCompressionAttempt &&
+      process.memoryUsage().heapUsed > 1.5 * 1024 * 1024 * 1024
+    ) {
+      force = true;
+      this.hasFailedHeapCompressionAttempt = true;
+      debugLogger.warn(
+        `Heap pressure (${(process.memoryUsage().heapUsed / 1024**3).toFixed(1)} GB) — forcing compression`,
+      );
+    }
-      process.memoryUsage().heapUsed > GeminiChat.HEAP_MEMORY_THRESHOLD
+    if (
+      !force &&
+      !this.hasFailedHeapCompressionAttempt &&
+      process.memoryUsage().heapUsed > 1.5 * 1024 * 1024 * 1024
+    ) {
+      force = true;
+      this.hasFailedHeapCompressionAttempt = true;
+      debugLogger.warn(
+        `Heap pressure (${(process.memoryUsage().heapUsed / 1024**3).toFixed(1)} GB) — forcing compression`,
+      );
+    }
+    ) {
+      force = true;
+    }
+
     const service = new ChatCompressionService();
     const { newHistory, info } = await service.compress(this, {
       promptId,