From 58a82038fd3e3701c6b15cba8c6f9967e452947c Mon Sep 17 00:00:00 2001 From: Tyler Date: Wed, 13 May 2026 11:32:38 -0700 Subject: [PATCH] fix(core): memory-based chat compression to prevent heap OOM Long-lived interactive sessions (80+ minutes) can accumulate enough conversation history to hit Node's 4 GB heap limit. The existing 70% token compaction threshold can fail permanently when large file reads or shell outputs create a few huge entries. This replaces entry-count caps with memory-based monitoring: - geminiChat.ts: force chat compaction when heapUsed exceeds 2 GB (a hard safety net independent of the 70% token threshold). - agent-core.ts: prune oldest agent messages when heapUsed exceeds 1.5 GB (pruning ~20% of oldest messages per round). Both mechanisms check actual heap pressure rather than an arbitrary proxy (message count), catching the root cause regardless of whether memory ballooned from many small entries or few huge ones. --- .../core/src/agents/runtime/agent-core.ts | 22 ++++++++++++++++ .../src/agents/runtime/agent-interactive.ts | 4 +++ packages/core/src/core/geminiChat.ts | 25 +++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/packages/core/src/agents/runtime/agent-core.ts b/packages/core/src/agents/runtime/agent-core.ts index d0c7704ce1..30cb199ca2 100644 --- a/packages/core/src/agents/runtime/agent-core.ts +++ b/packages/core/src/agents/runtime/agent-core.ts @@ -1349,6 +1349,28 @@ export class AgentCore { return this.messages; } + /** + * Prune oldest messages from the message history when `heapUsed` exceeds + * 1.5 GB. Returns the number of messages pruned (0 if no pruning was + * needed). Call this after each reasoning round to prevent unbounded + * memory growth in long-lived interactive sessions. + * + * Messages are pruned in chunks — removing ~20% of the oldest messages + * each time, which naturally scales with memory pressure rather than + * a hard count cap. This catches the actual root cause (heap usage) + * regardless of how the memory ballooned (many small entries vs few huge). + */ + pruneMessages(): number { + const threshold = 1.5 * 1024 * 1024 * 1024; // 1.5 GB + if (process.memoryUsage().heapUsed < threshold) { + return 0; + } + // Remove ~20% of the oldest messages to relieve heap pressure. + const toPrune = Math.max(1, Math.ceil(this.messages.length * 0.2)); + this.messages.splice(0, toPrune); + return toPrune; + } + /** * Tool calls currently awaiting user approval. Mutated by * AgentInteractive's TOOL_WAITING_APPROVAL handler; headless agents diff --git a/packages/core/src/agents/runtime/agent-interactive.ts b/packages/core/src/agents/runtime/agent-interactive.ts index b7fbba1df0..0b99dc4799 100644 --- a/packages/core/src/agents/runtime/agent-interactive.ts +++ b/packages/core/src/agents/runtime/agent-interactive.ts @@ -123,6 +123,10 @@ export class AgentInteractive { while (message !== null && !this.masterAbortController.signal.aborted) { this.addMessage('user', message); await this.runOneRound(message); + // Prune old messages to prevent unbounded memory growth in + // long-lived interactive sessions (81+ minute sessions with + // hundreds of rounds can hit 4 GB without pruning). + this.core.pruneMessages(); message = this.queue.dequeue(); } diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 4f06666c40..790909cf94 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -388,6 +388,19 @@ export class GeminiChat { // model. private sendPromise: Promise = Promise.resolve(); + /** + * Heap memory threshold (2 GB). When `heapUsed` exceeds this, chat + * compression is forced regardless of `hasFailedCompressionAttempt`. + * This is a memory safety net independent of the 70% token compaction + * threshold — catching the actual root cause (heap pressure) rather than + * a proxy (entry count). Protects against both many small entries AND + * few huge entries (large file reads, shell outputs). + * + * Note: set below Node's default 4 GB heap limit so there is headroom + * for one more GC cycle before the process is killed. + */ + private static readonly HEAP_MEMORY_THRESHOLD = 2 * 1024 * 1024 * 1024; // 2 GB + /** * Per-chat last-prompt-token-count, populated from `usageMetadata` on each * model response. Used by the compaction threshold check so that subagents @@ -464,6 +477,18 @@ export class GeminiChat { signal?: AbortSignal, options?: TryCompressOptions, ): Promise { + // Force compression when heapUsed exceeds the memory threshold, + // regardless of `hasFailedCompressionAttempt`. This is a memory + // safety net — catches the actual root cause (heap pressure) rather + // than a proxy (entry count). Protects against both many small entries + // AND few huge entries (large file reads, shell outputs). + if ( + !force && + process.memoryUsage().heapUsed > GeminiChat.HEAP_MEMORY_THRESHOLD + ) { + force = true; + } + const service = new ChatCompressionService(); const { newHistory, info } = await service.compress(this, { promptId,