diff --git a/packages/core/src/agents/runtime/agent-core.ts b/packages/core/src/agents/runtime/agent-core.ts index d0c7704ce1..30cb199ca2 100644 --- a/packages/core/src/agents/runtime/agent-core.ts +++ b/packages/core/src/agents/runtime/agent-core.ts @@ -1349,6 +1349,28 @@ export class AgentCore { return this.messages; } + /** + * Prune oldest messages from the message history when `heapUsed` exceeds + * 1.5 GB. Returns the number of messages pruned (0 if no pruning was + * needed). Call this after each reasoning round to prevent unbounded + * memory growth in long-lived interactive sessions. + * + * Messages are pruned in chunks — removing ~20% of the oldest messages + * each time, which naturally scales with memory pressure rather than + * a hard count cap. This catches the actual root cause (heap usage) + * regardless of how the memory ballooned (many small entries vs few huge). + */ + pruneMessages(): number { + const threshold = 1.5 * 1024 * 1024 * 1024; // 1.5 GB + if (process.memoryUsage().heapUsed < threshold) { + return 0; + } + // Remove ~20% of the oldest messages to relieve heap pressure. + const toPrune = Math.max(1, Math.ceil(this.messages.length * 0.2)); + this.messages.splice(0, toPrune); + return toPrune; + } + /** * Tool calls currently awaiting user approval. Mutated by * AgentInteractive's TOOL_WAITING_APPROVAL handler; headless agents diff --git a/packages/core/src/agents/runtime/agent-interactive.ts b/packages/core/src/agents/runtime/agent-interactive.ts index b7fbba1df0..0b99dc4799 100644 --- a/packages/core/src/agents/runtime/agent-interactive.ts +++ b/packages/core/src/agents/runtime/agent-interactive.ts @@ -123,6 +123,10 @@ export class AgentInteractive { while (message !== null && !this.masterAbortController.signal.aborted) { this.addMessage('user', message); await this.runOneRound(message); + // Prune old messages to prevent unbounded memory growth in + // long-lived interactive sessions (81+ minute sessions with + // hundreds of rounds can hit 4 GB without pruning). + this.core.pruneMessages(); message = this.queue.dequeue(); } diff --git a/packages/core/src/core/geminiChat.ts b/packages/core/src/core/geminiChat.ts index 4f06666c40..790909cf94 100644 --- a/packages/core/src/core/geminiChat.ts +++ b/packages/core/src/core/geminiChat.ts @@ -388,6 +388,19 @@ export class GeminiChat { // model. private sendPromise: Promise = Promise.resolve(); + /** + * Heap memory threshold (2 GB). When `heapUsed` exceeds this, chat + * compression is forced regardless of `hasFailedCompressionAttempt`. + * This is a memory safety net independent of the 70% token compaction + * threshold — catching the actual root cause (heap pressure) rather than + * a proxy (entry count). Protects against both many small entries AND + * few huge entries (large file reads, shell outputs). + * + * Note: set below Node's default 4 GB heap limit so there is headroom + * for one more GC cycle before the process is killed. + */ + private static readonly HEAP_MEMORY_THRESHOLD = 2 * 1024 * 1024 * 1024; // 2 GB + /** * Per-chat last-prompt-token-count, populated from `usageMetadata` on each * model response. Used by the compaction threshold check so that subagents @@ -464,6 +477,18 @@ export class GeminiChat { signal?: AbortSignal, options?: TryCompressOptions, ): Promise { + // Force compression when heapUsed exceeds the memory threshold, + // regardless of `hasFailedCompressionAttempt`. This is a memory + // safety net — catches the actual root cause (heap pressure) rather + // than a proxy (entry count). Protects against both many small entries + // AND few huge entries (large file reads, shell outputs). + if ( + !force && + process.memoryUsage().heapUsed > GeminiChat.HEAP_MEMORY_THRESHOLD + ) { + force = true; + } + const service = new ChatCompressionService(); const { newHistory, info } = await service.compress(this, { promptId,