From 4a597bce3db7cab1dfcef53a6dccc0ef0478ef0d Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 15:44:37 +0800 Subject: [PATCH 01/15] docs: add async memory recall design spec and implementation plan --- .../2026-05-15-async-memory-recall-design.md | 172 ++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 docs/design/2026-05-15-async-memory-recall-design.md diff --git a/docs/design/2026-05-15-async-memory-recall-design.md b/docs/design/2026-05-15-async-memory-recall-design.md new file mode 100644 index 00000000000..b078d52c97a --- /dev/null +++ b/docs/design/2026-05-15-async-memory-recall-design.md @@ -0,0 +1,172 @@ +# Async Memory Recall — Design Spec + +**Date:** 2026-05-15 +**Status:** Approved +**Related issues:** #3761, #3759 +**Related PRs:** #3814, #3866 + +--- + +## Problem + +`relevanceSelector.ts` uses `AbortSignal.timeout(1_000)` (introduced by #3866). On first-session cold starts, qwen3.5-flash averages ~908 ms — consistently hitting the 1 s threshold. The outer 2.5 s deadline in `resolveAutoMemoryWithDeadline` means every UserQuery can block for up to 2.5 s even when recall always fails. + +Root cause: the main-agent request path `await`s the recall result before sending to the model. Any slowness in the recall side-query directly adds to user-visible latency. + +--- + +## Design + +### Core idea + +Fire recall on UserQuery and never await it. Consume the result at two opportunistic points — whichever fires first: + +1. **UserQuery consume point** — synchronous `settledAt !== null` check just before `turn.run()`. Zero-wait: if already settled, use it; if not, skip. +2. **ToolResult inject point** — same check on every ToolResult turn. Injects memory as a `system-reminder` prepended to the tool-result message, giving the model memory context before its next response. + +This matches the pattern used by Claude Code upstream (`startRelevantMemoryPrefetch` / `settledAt` polling in `query.ts`). + +--- + +## Data structures + +### New type `MemoryPrefetchHandle` (in `client.ts`) + +```typescript +type MemoryPrefetchHandle = { + promise: Promise; + /** Set by promise.finally(). null until the promise settles. */ + settledAt: number | null; + /** True after memory has been injected — prevents double-inject. */ + consumed: boolean; + controller: AbortController; +}; +``` + +### Field change on `GeminiClient` + +| Remove | Add | +| ------------------------------------------------------------ | ---------------------------------------------------------- | +| `pendingRecallAbortController: AbortController \| undefined` | `pendingMemoryPrefetch: MemoryPrefetchHandle \| undefined` | + +--- + +## Changes + +### 1. `client.ts` — remove `resolveAutoMemoryWithDeadline` + +Delete the function entirely. It is replaced by the `settledAt` flag mechanism. + +### 2. `client.ts` — UserQuery fire path + +Replace the `resolveAutoMemoryWithDeadline` call with: + +```typescript +const controller = new AbortController(); +const promise = this.config + .getMemoryManager() + .recall(projectRoot, partToString(request), { + config: this.config, + excludedFilePaths: this.surfacedRelevantAutoMemoryPaths, + abortSignal: controller.signal, + }) + .catch((error: unknown) => { + if (!(error instanceof DOMException && error.name === 'AbortError')) { + debugLogger.warn('Managed auto-memory recall prefetch failed.', error); + } + return EMPTY_RELEVANT_AUTO_MEMORY_RESULT; + }); + +const handle: MemoryPrefetchHandle = { + promise, + settledAt: null, + consumed: false, + controller, +}; +void promise.finally(() => { + handle.settledAt = Date.now(); +}); +this.pendingMemoryPrefetch = handle; +// no await — continue immediately +``` + +### 3. `client.ts` — UserQuery consume point (replaces `await relevantAutoMemoryPromise`) + +```typescript +const prefetchHandle = this.pendingMemoryPrefetch; +if ( + prefetchHandle && + prefetchHandle.settledAt !== null && + !prefetchHandle.consumed +) { + prefetchHandle.consumed = true; + this.pendingMemoryPrefetch = undefined; + const result = await prefetchHandle.promise; // already settled, returns immediately + if (result.prompt) { + systemReminders.push(result.prompt); + for (const doc of result.selectedDocs) { + this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); + } + } +} +``` + +### 4. `client.ts` — ToolResult inject point (new) + +After `requestToSend` is assembled, before `turn.run()`, add: + +```typescript +if (messageType === SendMessageType.ToolResult) { + const prefetchHandle = this.pendingMemoryPrefetch; + if ( + prefetchHandle && + prefetchHandle.settledAt !== null && + !prefetchHandle.consumed + ) { + prefetchHandle.consumed = true; + this.pendingMemoryPrefetch = undefined; + const result = await prefetchHandle.promise; + if (result.prompt) { + requestToSend = [result.prompt, ...requestToSend]; + for (const doc of result.selectedDocs) { + this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); + } + } + } +} +``` + +### 5. `client.ts` — cleanup paths (6 locations) + +Replace all `pendingRecallAbortController?.abort()` + `= undefined` with: + +```typescript +this.pendingMemoryPrefetch?.controller.abort(); +this.pendingMemoryPrefetch = undefined; +``` + +Locations: `resetChat()`, MaxSessionTurns early-return, boundedTurns=0 early-return, SessionTokenLimitExceeded early-return, Arena control signal early-return, post-consume clear. + +### 6. `relevanceSelector.ts` — remove `AbortSignal.timeout(1_000)` + +Remove the combined `AbortSignal.any([AbortSignal.timeout(1_000), callerAbortSignal])` and pass `callerAbortSignal` directly. + +--- + +## Behaviour comparison + +| Scenario | Before | After | +| -------------------------------------------- | ------------------------------ | ------------------------------------------------------ | +| recall completes before model prep | inject on UserQuery, ~0 wait | inject on UserQuery, ~0 wait | +| recall slow (cold start) | block up to 2.5 s | skip UserQuery, inject on first ToolResult | +| recall times out (1 s) | abort, empty result, no memory | no hard timeout; inject whenever settled | +| no tool calls, recall slow | block up to 2.5 s, then skip | skip UserQuery, no ToolResult opportunity — miss | +| user sends 2nd message before recall settles | 2nd recall races 1st handle | 1st handle aborted when 2nd UserQuery fires new handle | + +--- + +## Out of scope + +- Changing the memory injection format from `system-reminder` to `tool-result` attachment (CC style) +- Per-session byte budget skip gate +- Single-word prompt skip gate From 2d0b4151df3fa73d1792e72c77d7e08ce64d5128 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 15:50:04 +0800 Subject: [PATCH 02/15] refactor(core): introduce MemoryPrefetchHandle, replace pendingRecallAbortController field --- packages/core/src/core/client.ts | 46 +++++++------------------------- 1 file changed, 9 insertions(+), 37 deletions(-) diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 69447064c2c..d29316b24cb 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -149,42 +149,14 @@ function wrapIdeContext(contextText: string): string { return `\n${safeContextText}\n`; } -/** - * Resolve the auto-memory recall promise with a hard deadline. - * If the recall (model-driven selection + heuristic fallback) does not complete - * within the deadline, return an empty result so the main request is not delayed. - * - * The deadline is set slightly above the model-driven selector's own - * AbortSignal.timeout (2s) to give the heuristic fallback time to complete, - * but low enough that the user does not perceive a delay on every turn. - */ -async function resolveAutoMemoryWithDeadline( - promise: Promise | undefined, - onDeadline: () => void, -): Promise { - if (!promise) { - return EMPTY_RELEVANT_AUTO_MEMORY_RESULT; - } - - let timer: ReturnType | undefined; - const deadline = new Promise((resolve) => { - timer = setTimeout(() => { - try { - onDeadline(); - } finally { - resolve(EMPTY_RELEVANT_AUTO_MEMORY_RESULT); - } - }, 2_500); - }); - - try { - return await Promise.race([promise, deadline]); - } finally { - if (timer !== undefined) { - clearTimeout(timer); - } - } -} +type MemoryPrefetchHandle = { + promise: Promise; + /** Set by promise.finally(). null until the promise settles. */ + settledAt: number | null; + /** True after memory has been injected — prevents double-inject. */ + consumed: boolean; + controller: AbortController; +}; /** Tools that can write to the skills directory, used to detect skillsModifiedInSession. */ const SKILL_WRITE_TOOL_NAMES: ReadonlySet = new Set([ @@ -204,7 +176,7 @@ export class GeminiClient { private lastPromptId: string | undefined = undefined; private lastSentIdeContext: IdeContext | undefined; private forceFullIdeContext = true; - private pendingRecallAbortController: AbortController | undefined; + private pendingMemoryPrefetch: MemoryPrefetchHandle | undefined; private lastSessionStartContext: string | undefined; private lastSessionStartSource: SessionStartSource | undefined; From 069045e7cf064a63d71e50dcbf0bc3fbbbe3d1c0 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 15:51:02 +0800 Subject: [PATCH 03/15] refactor(core): fire memory recall as non-blocking prefetch with settledAt flag --- packages/core/src/core/client.ts | 35 +++++++++++++------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index d29316b24cb..9f013bc0b99 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -974,9 +974,6 @@ export class GeminiClient { turns: number = MAX_TURNS, ): AsyncGenerator { const messageType = options?.type ?? SendMessageType.UserQuery; - let relevantAutoMemoryPromise: - | Promise - | undefined; if (messageType === SendMessageType.Retry) { this.stripOrphanedUserEntriesFromHistory(); @@ -1062,24 +1059,18 @@ export class GeminiClient { messageType === SendMessageType.Cron ) { if (this.config.getManagedAutoMemoryEnabled()) { - const recallAbortController = new AbortController(); - const rawRecallPromise = this.config + const controller = new AbortController(); + const promise = this.config .getMemoryManager() .recall(this.config.getProjectRoot(), partToString(request), { config: this.config, excludedFilePaths: this.surfacedRelevantAutoMemoryPaths, - abortSignal: recallAbortController.signal, + abortSignal: controller.signal, }) .catch((error: unknown) => { if ( - error instanceof DOMException && - error.name === 'AbortError' + !(error instanceof DOMException && error.name === 'AbortError') ) { - debugLogger.debug( - 'Auto-memory recall aborted by deadline.', - error, - ); - } else { debugLogger.warn( 'Managed auto-memory recall prefetch failed.', error, @@ -1087,14 +1078,16 @@ export class GeminiClient { } return EMPTY_RELEVANT_AUTO_MEMORY_RESULT; }); - this.pendingRecallAbortController = recallAbortController; - // Race the recall against the deadline at initiation time so the 2.5s - // budget is not consumed by intermediate work (microcompact, compression, - // token checks, IDE context) between initiation and consumption. - relevantAutoMemoryPromise = resolveAutoMemoryWithDeadline( - rawRecallPromise, - () => recallAbortController.abort(), - ); + const handle: MemoryPrefetchHandle = { + promise, + settledAt: null, + consumed: false, + controller, + }; + void promise.finally(() => { + handle.settledAt = Date.now(); + }); + this.pendingMemoryPrefetch = handle; } // Track prompt count for commit attribution. Only the user typing a From 347091bc380322f01545b82aad2bba37dabe547d Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 15:53:44 +0800 Subject: [PATCH 04/15] refactor(core): replace blocking await with zero-wait settledAt poll at UserQuery consume point Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/core/client.ts | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 9f013bc0b99..d29712b9a88 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -1276,18 +1276,22 @@ export class GeminiClient { messageType === SendMessageType.Cron ) { const systemReminders = []; - // The recall promise was already raced against the 2.5s deadline at - // initiation time; this await just collects the result. - this.pendingRecallAbortController = undefined; - const relevantAutoMemory = relevantAutoMemoryPromise - ? await relevantAutoMemoryPromise - : EMPTY_RELEVANT_AUTO_MEMORY_RESULT; - const relevantAutoMemoryPrompt = relevantAutoMemory.prompt; - - if (relevantAutoMemoryPrompt) { - systemReminders.push(relevantAutoMemoryPrompt); - for (const doc of relevantAutoMemory.selectedDocs) { - this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); + // Zero-wait poll: consume only if the prefetch has already settled. + // If not settled yet, skip — the ToolResult inject point will retry. + const prefetchHandle = this.pendingMemoryPrefetch; + if ( + prefetchHandle && + prefetchHandle.settledAt !== null && + !prefetchHandle.consumed + ) { + prefetchHandle.consumed = true; + this.pendingMemoryPrefetch = undefined; + const result = await prefetchHandle.promise; // already settled, returns immediately + if (result.prompt) { + systemReminders.push(result.prompt); + for (const doc of result.selectedDocs) { + this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); + } } } From 5a0b5fe9d73052c27e4c13f63bdc385c789ce4b2 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 15:54:28 +0800 Subject: [PATCH 05/15] feat(core): inject recalled memory on first ToolResult when UserQuery consume point misses Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/core/client.ts | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index d29712b9a88..e15b323be88 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -1331,6 +1331,25 @@ export class GeminiClient { requestToSend = [...systemReminders, ...requestToSend]; } + if (messageType === SendMessageType.ToolResult) { + const prefetchHandle = this.pendingMemoryPrefetch; + if ( + prefetchHandle && + prefetchHandle.settledAt !== null && + !prefetchHandle.consumed + ) { + prefetchHandle.consumed = true; + this.pendingMemoryPrefetch = undefined; + const result = await prefetchHandle.promise; + if (result.prompt) { + requestToSend = [result.prompt, ...requestToSend]; + for (const doc of result.selectedDocs) { + this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); + } + } + } + } + const resultStream = turn.run(model, requestToSend, signal); let didUpdateIdeContextState = false; for await (const event of resultStream) { From 4bd5905c03d96cec67158d00d5bff8e49c09198f Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 15:55:38 +0800 Subject: [PATCH 06/15] refactor(core): replace pendingRecallAbortController with pendingMemoryPrefetch in all cleanup paths Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/core/client.ts | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index e15b323be88..6e4a50bc864 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -386,10 +386,8 @@ export class GeminiClient { this.config.getBaseLlmClient().clearPerModelGeneratorCache(); // Abort any in-flight auto-memory recall so the stale controller // does not leak into the next session. - if (this.pendingRecallAbortController) { - this.pendingRecallAbortController.abort(); - this.pendingRecallAbortController = undefined; - } + this.pendingMemoryPrefetch?.controller.abort(); + this.pendingMemoryPrefetch = undefined; // Drop any deferred tools revealed this session so /clear really gives // a clean slate. We don't clear inside startChat itself because that path // is also taken by compression (which preserves the session), and @@ -1158,8 +1156,8 @@ export class GeminiClient { this.config.getMaxSessionTurns() > 0 && this.sessionTurnCount > this.config.getMaxSessionTurns() ) { - this.pendingRecallAbortController?.abort(); - this.pendingRecallAbortController = undefined; + this.pendingMemoryPrefetch?.controller.abort(); + this.pendingMemoryPrefetch = undefined; yield { type: GeminiEventType.MaxSessionTurns }; if (isTopLevelInteraction) endInteractionSpan('error', { @@ -1172,8 +1170,8 @@ export class GeminiClient { // Ensure turns never exceeds MAX_TURNS to prevent infinite loops const boundedTurns = Math.min(turns, MAX_TURNS); if (!boundedTurns) { - this.pendingRecallAbortController?.abort(); - this.pendingRecallAbortController = undefined; + this.pendingMemoryPrefetch?.controller.abort(); + this.pendingMemoryPrefetch = undefined; if (isTopLevelInteraction) endInteractionSpan('error', { errorMessage: 'max turns exhausted' }); return new Turn(this.getChat(), prompt_id); @@ -1188,8 +1186,8 @@ export class GeminiClient { const lastPromptTokenCount = uiTelemetryService.getLastPromptTokenCount(); if (lastPromptTokenCount > sessionTokenLimit) { - this.pendingRecallAbortController?.abort(); - this.pendingRecallAbortController = undefined; + this.pendingMemoryPrefetch?.controller.abort(); + this.pendingMemoryPrefetch = undefined; yield { type: GeminiEventType.SessionTokenLimitExceeded, value: { @@ -1249,8 +1247,8 @@ export class GeminiClient { `Arena control signal received: ${controlSignal.type} - ${controlSignal.reason}`, ); await arenaAgentClient.reportCancelled(); - this.pendingRecallAbortController?.abort(); - this.pendingRecallAbortController = undefined; + this.pendingMemoryPrefetch?.controller.abort(); + this.pendingMemoryPrefetch = undefined; if (isTopLevelInteraction) endInteractionSpan('cancelled'); return new Turn(this.getChat(), prompt_id); } From 65bd06176dc687c04df1219736c802e036ca2f8f Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 15:56:40 +0800 Subject: [PATCH 07/15] =?UTF-8?q?refactor(memory):=20remove=201s=20AbortSi?= =?UTF-8?q?gnal.timeout=20from=20relevanceSelector=20=E2=80=94=20caller=20?= =?UTF-8?q?controls=20lifetime?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/memory/relevanceSelector.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/core/src/memory/relevanceSelector.ts b/packages/core/src/memory/relevanceSelector.ts index 8962eb94d0c..6fa8bed6d53 100644 --- a/packages/core/src/memory/relevanceSelector.ts +++ b/packages/core/src/memory/relevanceSelector.ts @@ -91,9 +91,7 @@ export async function selectRelevantAutoMemoryDocumentsByModel( purpose: 'auto-memory-recall', contents, schema: RESPONSE_SCHEMA, - abortSignal: callerAbortSignal - ? AbortSignal.any([AbortSignal.timeout(1_000), callerAbortSignal]) - : AbortSignal.timeout(1_000), + abortSignal: callerAbortSignal ?? new AbortController().signal, // Uses runSideQuery's default side-query model policy: fast model first, // then main session model when no fast model is configured. From 064427c4c2db15c78325c433d5ed881005e8c27e Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 16:01:02 +0800 Subject: [PATCH 08/15] =?UTF-8?q?test(core):=20update=20auto-memory=20test?= =?UTF-8?q?s=20for=20async=20prefetch=20pattern=20=E2=80=94=20drop=20fake?= =?UTF-8?q?=20timers=20and=20deadline=20references?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/core/client.test.ts | 65 +++++++++------------------ 1 file changed, 22 insertions(+), 43 deletions(-) diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 682853d1e3b..39e215f6ec2 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -2303,20 +2303,9 @@ hello }); it('should not block the main request when auto-memory recall is slow', async () => { - // Simulate a recall that takes longer than the 2.5s deadline - mockMemoryManager.recall.mockReturnValue( - new Promise((resolve) => - setTimeout( - () => - resolve({ - prompt: '## Relevant memory\n\nSlow memory result.', - selectedDocs: [], - strategy: 'model', - }), - 10_000, - ), - ), - ); + // Recall never settles — settledAt stays null so the UserQuery consume + // point skips it and turn.run() is called immediately without memory. + mockMemoryManager.recall.mockReturnValue(new Promise(() => {})); const mockStream = (async function* () { yield { type: 'content', value: 'Hello' }; @@ -2329,38 +2318,28 @@ hello }; client['chat'] = mockChat as GeminiChat; - vi.useFakeTimers(); - try { - const streamPromise = (async () => { - const stream = client.sendMessageStream( - [{ text: 'Quick question' }], - new AbortController().signal, - 'prompt-id-slow-memory', - ); - for await (const _ of stream) { - // consume stream - } - })(); - - // Advance past the 2.5s deadline — the main request should proceed - await vi.advanceTimersByTimeAsync(3_000); - await streamPromise; - - // The main request should have been called without the slow memory - expect(mockTurnRunFn).toHaveBeenCalledWith( - 'test-model', - expect.not.arrayContaining([ - expect.stringContaining('Slow memory result'), - ]), - expect.any(AbortSignal), - ); - } finally { - vi.useRealTimers(); + const stream = client.sendMessageStream( + [{ text: 'Quick question' }], + new AbortController().signal, + 'prompt-id-slow-memory', + ); + for await (const _ of stream) { + // consume stream } + + // turn.run() must have been called without the slow memory + expect(mockTurnRunFn).toHaveBeenCalledWith( + 'test-model', + expect.not.arrayContaining([ + expect.stringContaining('Slow memory result'), + ]), + expect.any(AbortSignal), + ); }); - it('should include auto-memory prompt when recall completes within deadline', async () => { - // Simulate a fast recall that completes well within the deadline + it('should inject auto-memory at UserQuery consume point when recall already settled', async () => { + // mockResolvedValue settles synchronously; by the time the consume-point + // check runs (after at least one await), settledAt is set. mockMemoryManager.recall.mockResolvedValue({ prompt: '## Relevant memory\n\nFast memory result.', selectedDocs: [], From d9946a395409703b323ff874574c37def7208ff7 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 16:04:30 +0800 Subject: [PATCH 09/15] =?UTF-8?q?test(core):=20add=20ToolResult=20inject?= =?UTF-8?q?=20test=20=E2=80=94=20memory=20injected=20on=20first=20ToolResu?= =?UTF-8?q?lt=20when=20recall=20settles=20after=20UserQuery?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/core/client.test.ts | 81 +++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 39e215f6ec2..e6bb73b56e7 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -2373,6 +2373,87 @@ hello ); }); + it('should inject auto-memory on first ToolResult when recall settles after UserQuery', async () => { + // Controllable promise — recall stays pending across the UserQuery turn + // and only settles before the ToolResult turn runs. + let resolveRecall: + | ((value: { + prompt: string; + selectedDocs: never[]; + strategy: 'model'; + }) => void) + | undefined; + mockMemoryManager.recall.mockReturnValue( + new Promise((resolve) => { + resolveRecall = resolve; + }), + ); + + const mockStream = (async function* () { + yield { type: 'content', value: 'Hello' }; + })(); + mockTurnRunFn.mockReturnValue(mockStream); + + const mockChat: Partial = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), + }; + client['chat'] = mockChat as GeminiChat; + + // Turn 1: UserQuery — recall still pending, no injection + const userStream = client.sendMessageStream( + [{ text: 'What is my name?' }], + new AbortController().signal, + 'prompt-id-user-query', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of userStream) { + // consume + } + + expect(mockTurnRunFn).toHaveBeenLastCalledWith( + 'test-model', + expect.not.arrayContaining([ + expect.stringContaining('Deferred memory result'), + ]), + expect.any(AbortSignal), + ); + + // Recall settles between turns + resolveRecall!({ + prompt: '## Relevant memory\n\nDeferred memory result.', + selectedDocs: [], + strategy: 'model', + }); + // Drain microtasks so the settledAt finally() callback runs + await Promise.resolve(); + await Promise.resolve(); + + // Turn 2: ToolResult — settledAt is now non-null, memory should inject + mockTurnRunFn.mockReturnValue( + (async function* () { + yield { type: 'content', value: 'world' }; + })(), + ); + const toolStream = client.sendMessageStream( + [{ functionResponse: { name: 'foo', response: { ok: true } } }], + new AbortController().signal, + 'prompt-id-tool-result', + { type: SendMessageType.ToolResult }, + ); + for await (const _ of toolStream) { + // consume + } + + expect(mockTurnRunFn).toHaveBeenLastCalledWith( + 'test-model', + expect.arrayContaining([ + '## Relevant memory\n\nDeferred memory result.', + ]), + expect.any(AbortSignal), + ); + }); + it('should proceed without auto-memory when managed auto-memory is disabled', async () => { // When getManagedAutoMemoryEnabled returns false, no recall is initiated // and sendMessageStream completes without memory content From ce02bd001d7f858b7a5432d5ba027391c6dfaeec Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 17:10:50 +0800 Subject: [PATCH 10/15] fix(core): address codex review findings on async memory recall Three findings fixed: 1. Abort previous prefetch before installing a new one (line 1059): A new UserQuery/Cron used to overwrite pendingMemoryPrefetch without aborting the old controller, leaking an unbounded background recall now that the 1s side-query timeout is gone. 2. Move the UserQuery consume poll AFTER the async reminder setup: ensureTool + listSubagents are awaited between the old poll location and the final assembly, so recalls that settled during those awaits used to be missed (and a tool-less turn never got a ToolResult retry). The poll now runs immediately before requestToSend assembly, and unshifts memory to the front of systemReminders to preserve ordering. 3. Append memory after functionResponse on ToolResult turns: The Qwen API requires the functionResponse part to immediately follow the model's functionCall (see lines 1209-1213). Prepending memory text risked breaking that pairing on the native Gemini path. Appending keeps the pair intact on Gemini and produces the same OpenAI output (text becomes a separate user message after the tool messages). Tests: - Updated ToolResult inject test to assert memory index > functionResponse - Added abort-previous-prefetch test (mid-flight UserQuery aborts old handle) 224/224 tests pass; tsc clean on changed files. Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/core/client.test.ts | 70 ++++++++++++++++++++++++--- packages/core/src/core/client.ts | 53 ++++++++++++-------- 2 files changed, 98 insertions(+), 25 deletions(-) diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index e6bb73b56e7..0ae77516d86 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -2445,13 +2445,71 @@ hello // consume } - expect(mockTurnRunFn).toHaveBeenLastCalledWith( - 'test-model', - expect.arrayContaining([ - '## Relevant memory\n\nDeferred memory result.', - ]), - expect.any(AbortSignal), + // Memory must come AFTER the functionResponse part so the Qwen API + // call/response pairing isn't broken (see client.ts:1209-1213). + const lastCallArgs = mockTurnRunFn.mock.lastCall; + const requestArr = lastCallArgs![1] as unknown[]; + const functionResponseIdx = requestArr.findIndex( + (p) => typeof p === 'object' && p !== null && 'functionResponse' in p, + ); + const memoryIdx = requestArr.findIndex( + (p) => p === '## Relevant memory\n\nDeferred memory result.', + ); + expect(functionResponseIdx).toBeGreaterThanOrEqual(0); + expect(memoryIdx).toBeGreaterThan(functionResponseIdx); + }); + + it('should abort the previous prefetch when a new UserQuery arrives mid-flight', async () => { + // Pending recall on first UserQuery — never resolves on its own. + const abortSignals: AbortSignal[] = []; + mockMemoryManager.recall.mockImplementation((_root, _query, opts) => { + abortSignals.push(opts.abortSignal as AbortSignal); + return new Promise(() => {}); + }); + + const mockChat: Partial = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), + }; + client['chat'] = mockChat as GeminiChat; + mockTurnRunFn.mockReturnValue( + (async function* () { + yield { type: 'content', value: 'Hello' }; + })(), ); + + // First UserQuery — installs prefetch #1 + const stream1 = client.sendMessageStream( + [{ text: 'first' }], + new AbortController().signal, + 'prompt-id-1', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of stream1) { + // consume + } + expect(abortSignals.length).toBe(1); + expect(abortSignals[0].aborted).toBe(false); + + // Second UserQuery — should abort #1 before installing #2 + mockTurnRunFn.mockReturnValue( + (async function* () { + yield { type: 'content', value: 'Hello again' }; + })(), + ); + const stream2 = client.sendMessageStream( + [{ text: 'second' }], + new AbortController().signal, + 'prompt-id-2', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of stream2) { + // consume + } + + expect(abortSignals.length).toBe(2); + expect(abortSignals[0].aborted).toBe(true); + expect(abortSignals[1].aborted).toBe(false); }); it('should proceed without auto-memory when managed auto-memory is disabled', async () => { diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 6e4a50bc864..69c420f322c 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -1057,6 +1057,11 @@ export class GeminiClient { messageType === SendMessageType.Cron ) { if (this.config.getManagedAutoMemoryEnabled()) { + // A previous recall may still be pending (slow side-query, new user + // turn arrived before it settled). Abort it before installing the + // new handle so the orphan doesn't keep running indefinitely. + this.pendingMemoryPrefetch?.controller.abort(); + this.pendingMemoryPrefetch = undefined; const controller = new AbortController(); const promise = this.config .getMemoryManager() @@ -1274,24 +1279,6 @@ export class GeminiClient { messageType === SendMessageType.Cron ) { const systemReminders = []; - // Zero-wait poll: consume only if the prefetch has already settled. - // If not settled yet, skip — the ToolResult inject point will retry. - const prefetchHandle = this.pendingMemoryPrefetch; - if ( - prefetchHandle && - prefetchHandle.settledAt !== null && - !prefetchHandle.consumed - ) { - prefetchHandle.consumed = true; - this.pendingMemoryPrefetch = undefined; - const result = await prefetchHandle.promise; // already settled, returns immediately - if (result.prompt) { - systemReminders.push(result.prompt); - for (const doc of result.selectedDocs) { - this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); - } - } - } // add subagent system reminder if there are subagents const hasAgentTool = await this.config @@ -1326,6 +1313,27 @@ export class GeminiClient { } } + // Zero-wait poll: consume only if the prefetch has already settled. + // Done AFTER the async reminder setup above so recall settling during + // those awaits still gets caught here. If still not settled, skip — + // the ToolResult inject point will retry on the next turn. + const prefetchHandle = this.pendingMemoryPrefetch; + if ( + prefetchHandle && + prefetchHandle.settledAt !== null && + !prefetchHandle.consumed + ) { + prefetchHandle.consumed = true; + this.pendingMemoryPrefetch = undefined; + const result = await prefetchHandle.promise; // already settled, returns immediately + if (result.prompt) { + systemReminders.unshift(result.prompt); + for (const doc of result.selectedDocs) { + this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); + } + } + } + requestToSend = [...systemReminders, ...requestToSend]; } @@ -1340,7 +1348,14 @@ export class GeminiClient { this.pendingMemoryPrefetch = undefined; const result = await prefetchHandle.promise; if (result.prompt) { - requestToSend = [result.prompt, ...requestToSend]; + // Append (not prepend): on a ToolResult turn, requestToSend leads + // with functionResponse parts that must immediately follow the + // model's functionCall (Qwen API constraint, see lines 1209-1213). + // Putting the memory text after the functionResponse parts keeps + // the call/response pairing intact under native Gemini, while the + // OpenAI converter still emits the text as a separate user message + // after the tool messages. + requestToSend = [...requestToSend, result.prompt]; for (const doc of result.selectedDocs) { this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); } From 2c41fa1970f17782e21b3e8d8a91d24231d190ba Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 17:35:40 +0800 Subject: [PATCH 11/15] docs(core): add JSDoc + clarifying comments per review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Annotations only, no behavior change: - MemoryPrefetchHandle: full JSDoc covering lifecycle (create → consume → discard) - UserQuery consume site: explain why we unshift (front of systemReminders) - ToolResult inject site: reference hasPendingToolCall pattern instead of brittle line numbers when citing the Qwen functionCall/Response constraint - relevanceSelector.ts: explain why the side-query has no inline timeout (caller controls lifetime via MemoryPrefetchHandle.controller) Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/core/client.ts | 28 +++++++++++++++---- packages/core/src/memory/relevanceSelector.ts | 4 +++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 69c420f322c..f67cd9f3d91 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -149,6 +149,18 @@ function wrapIdeContext(contextText: string): string { return `\n${safeContextText}\n`; } +/** + * Handle for a non-blocking auto-memory recall prefetch. + * + * Lifecycle: + * 1. Created on UserQuery/Cron — the recall promise fires immediately, + * `pendingMemoryPrefetch` is set to this handle. + * 2. Consumed at either of two opportunistic points: a zero-wait + * `settledAt !== null` poll just before the UserQuery main request, + * or — if recall hadn't settled yet — on the first ToolResult turn. + * 3. Aborted-and-discarded by every cleanup path (resetChat, + * MaxSessionTurns, etc.) or replaced when a new UserQuery arrives. + */ type MemoryPrefetchHandle = { promise: Promise; /** Set by promise.finally(). null until the promise settles. */ @@ -1327,6 +1339,11 @@ export class GeminiClient { this.pendingMemoryPrefetch = undefined; const result = await prefetchHandle.promise; // already settled, returns immediately if (result.prompt) { + // Unshift to the front of systemReminders: on a UserQuery turn + // requestToSend leads with user text, so positioning memory at + // the very start of the system-reminder block keeps it close to + // the user prompt. Contrast the ToolResult path below, which + // must append to avoid splitting functionCall / functionResponse. systemReminders.unshift(result.prompt); for (const doc of result.selectedDocs) { this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); @@ -1350,11 +1367,12 @@ export class GeminiClient { if (result.prompt) { // Append (not prepend): on a ToolResult turn, requestToSend leads // with functionResponse parts that must immediately follow the - // model's functionCall (Qwen API constraint, see lines 1209-1213). - // Putting the memory text after the functionResponse parts keeps - // the call/response pairing intact under native Gemini, while the - // OpenAI converter still emits the text as a separate user message - // after the tool messages. + // model's functionCall (Qwen API constraint — same reason the + // IDE-context block above is skipped while a tool call is pending, + // see the `hasPendingToolCall` guard). Putting the memory text + // after the functionResponse parts keeps the call/response pairing + // intact under native Gemini; the OpenAI converter then emits the + // text as a separate user message after the tool messages. requestToSend = [...requestToSend, result.prompt]; for (const doc of result.selectedDocs) { this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); diff --git a/packages/core/src/memory/relevanceSelector.ts b/packages/core/src/memory/relevanceSelector.ts index 6fa8bed6d53..a7effd4fa38 100644 --- a/packages/core/src/memory/relevanceSelector.ts +++ b/packages/core/src/memory/relevanceSelector.ts @@ -91,6 +91,10 @@ export async function selectRelevantAutoMemoryDocumentsByModel( purpose: 'auto-memory-recall', contents, schema: RESPONSE_SCHEMA, + // No inline timeout — caller (`GeminiClient.MemoryPrefetchHandle`) + // owns cancellation and aborts via its controller on cleanup paths. + // A bare unsignalled AbortController fills the type when the caller + // doesn't pass one, since runSideQuery's `abortSignal` is required. abortSignal: callerAbortSignal ?? new AbortController().signal, // Uses runSideQuery's default side-query model policy: fast model first, From f4b6aae125fad5249b5d8a439b7a70e269bafcca Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Fri, 15 May 2026 17:45:04 +0800 Subject: [PATCH 12/15] fix(core): bridge caller abort signal into memory prefetch + doc accuracy fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Behavior fix (addresses copilot review on client.ts:1071): - When the parent sendMessageStream signal aborts (user Ctrl-C / Esc), the prefetch controller now aborts too. Previously the recall side-query would keep running until a later cleanup (next UserQuery / /clear / etc), wasting fast-model tokens on work whose result no one would consume. - Listener uses { once: true } and is also removed in the promise's finally() so a long-lived parent signal doesn't accumulate listeners across many turns under normal completion. - Edge case: if signal is already aborted when fire runs, abort the controller synchronously instead of attaching a listener. Test: - New regression guard: "should abort the pending prefetch when the caller signal aborts" — verifies the abort handler installed on the recall side fires once the parent signal aborts. Doc accuracy (addresses copilot review on the design spec): - ToolResult inject: was documented as "prepend", actual implementation appends to preserve functionCall/functionResponse pairing. Updated both the prose summary and the code sample. - Cleanup section: was documented as 6 abort-locations including the "post-consume clear"; the consume sites don't actually abort (the promise has already settled). Reorganized as 5 abort-and-clear sites + 2 clear-only sites with the distinction made explicit. - Fire path snippet: added the abort-previous-prefetch line and the caller-signal bridge so the spec matches the current implementation. Co-Authored-By: Claude Sonnet 4.6 --- .../2026-05-15-async-memory-recall-design.md | 40 ++++++++++++++++--- packages/core/src/core/client.test.ts | 36 +++++++++++++++++ packages/core/src/core/client.ts | 13 ++++++ 3 files changed, 84 insertions(+), 5 deletions(-) diff --git a/docs/design/2026-05-15-async-memory-recall-design.md b/docs/design/2026-05-15-async-memory-recall-design.md index b078d52c97a..4cbb33ede32 100644 --- a/docs/design/2026-05-15-async-memory-recall-design.md +++ b/docs/design/2026-05-15-async-memory-recall-design.md @@ -22,7 +22,7 @@ Root cause: the main-agent request path `await`s the recall result before sendin Fire recall on UserQuery and never await it. Consume the result at two opportunistic points — whichever fires first: 1. **UserQuery consume point** — synchronous `settledAt !== null` check just before `turn.run()`. Zero-wait: if already settled, use it; if not, skip. -2. **ToolResult inject point** — same check on every ToolResult turn. Injects memory as a `system-reminder` prepended to the tool-result message, giving the model memory context before its next response. +2. **ToolResult inject point** — same check on every ToolResult turn. Injects memory as a `system-reminder` **appended after** the functionResponse parts in `requestToSend`, giving the model memory context before its next response. (Append, not prepend: the Qwen API requires the functionResponse to immediately follow the model's functionCall — see the existing `hasPendingToolCall` IDE-context skip for the same constraint.) This matches the pattern used by Claude Code upstream (`startRelevantMemoryPrefetch` / `settledAt` polling in `query.ts`). @@ -62,7 +62,22 @@ Delete the function entirely. It is replaced by the `settledAt` flag mechanism. Replace the `resolveAutoMemoryWithDeadline` call with: ```typescript +// Abort any in-flight prefetch from a previous UserQuery before installing +// the new handle (prevents orphan side-queries when the user types again +// before recall settles). +this.pendingMemoryPrefetch?.controller.abort(); +this.pendingMemoryPrefetch = undefined; + const controller = new AbortController(); +// Bridge the caller's signal into the prefetch controller so a user abort +// (Ctrl-C / Esc) on the parent turn also terminates the recall side-query. +const onParentAbort = () => controller.abort(); +if (signal.aborted) { + controller.abort(); +} else { + signal.addEventListener('abort', onParentAbort, { once: true }); +} + const promise = this.config .getMemoryManager() .recall(projectRoot, partToString(request), { @@ -85,6 +100,7 @@ const handle: MemoryPrefetchHandle = { }; void promise.finally(() => { handle.settledAt = Date.now(); + signal.removeEventListener('abort', onParentAbort); }); this.pendingMemoryPrefetch = handle; // no await — continue immediately @@ -127,7 +143,10 @@ if (messageType === SendMessageType.ToolResult) { this.pendingMemoryPrefetch = undefined; const result = await prefetchHandle.promise; if (result.prompt) { - requestToSend = [result.prompt, ...requestToSend]; + // Append (not prepend) so functionResponse parts stay first + // and the model's functionCall/functionResponse pairing + // isn't broken on the native Gemini path. + requestToSend = [...requestToSend, result.prompt]; for (const doc of result.selectedDocs) { this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); } @@ -136,16 +155,27 @@ if (messageType === SendMessageType.ToolResult) { } ``` -### 5. `client.ts` — cleanup paths (6 locations) +### 5. `client.ts` — cleanup paths + +The handle is released by two distinct mechanisms: -Replace all `pendingRecallAbortController?.abort()` + `= undefined` with: +**5 abort-and-clear sites** (the prefetch is still pending, abort the controller before dropping the reference). Replace `pendingRecallAbortController?.abort()` + `= undefined` with: ```typescript this.pendingMemoryPrefetch?.controller.abort(); this.pendingMemoryPrefetch = undefined; ``` -Locations: `resetChat()`, MaxSessionTurns early-return, boundedTurns=0 early-return, SessionTokenLimitExceeded early-return, Arena control signal early-return, post-consume clear. +Sites: `resetChat()`, `MaxSessionTurns` early-return, `boundedTurns=0` early-return, `SessionTokenLimitExceeded` early-return, Arena control-signal early-return. The fire path itself also performs this abort-then-replace when a new UserQuery arrives while the previous prefetch is still in flight. + +**2 clear-only sites** (the prefetch has already settled and we're consuming it — no controller to abort, just drop the reference): + +```typescript +prefetchHandle.consumed = true; +this.pendingMemoryPrefetch = undefined; +``` + +Sites: UserQuery consume point, ToolResult inject point. ### 6. `relevanceSelector.ts` — remove `AbortSignal.timeout(1_000)` diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index 0ae77516d86..e99fa9b4f0b 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -2459,6 +2459,42 @@ hello expect(memoryIdx).toBeGreaterThan(functionResponseIdx); }); + it('should abort the pending prefetch when the caller signal aborts', async () => { + let abortHandlerInvoked = false; + mockMemoryManager.recall.mockImplementation((_root, _query, opts) => { + opts.abortSignal?.addEventListener('abort', () => { + abortHandlerInvoked = true; + }); + return new Promise(() => {}); + }); + + const mockChat: Partial = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), + }; + client['chat'] = mockChat as GeminiChat; + mockTurnRunFn.mockReturnValue( + (async function* () { + yield { type: 'content', value: 'Hello' }; + })(), + ); + + const callerController = new AbortController(); + const stream = client.sendMessageStream( + [{ text: 'user typed but then aborted' }], + callerController.signal, + 'prompt-id-aborted', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of stream) { + // consume + } + + expect(abortHandlerInvoked).toBe(false); + callerController.abort(); + expect(abortHandlerInvoked).toBe(true); + }); + it('should abort the previous prefetch when a new UserQuery arrives mid-flight', async () => { // Pending recall on first UserQuery — never resolves on its own. const abortSignals: AbortSignal[] = []; diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index f67cd9f3d91..c82008fa357 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -1075,6 +1075,18 @@ export class GeminiClient { this.pendingMemoryPrefetch?.controller.abort(); this.pendingMemoryPrefetch = undefined; const controller = new AbortController(); + // Bridge the caller's signal into the prefetch controller so a user + // abort (Ctrl-C / Esc) on the parent turn also terminates the + // recall side-query. `{ once: true }` lets the listener clean itself + // up after firing; we still call removeEventListener on the promise's + // finally to cover the normal-completion case so a long-lived parent + // signal doesn't accumulate listeners across many turns. + const onParentAbort = () => controller.abort(); + if (signal.aborted) { + controller.abort(); + } else { + signal.addEventListener('abort', onParentAbort, { once: true }); + } const promise = this.config .getMemoryManager() .recall(this.config.getProjectRoot(), partToString(request), { @@ -1101,6 +1113,7 @@ export class GeminiClient { }; void promise.finally(() => { handle.settledAt = Date.now(); + signal.removeEventListener('abort', onParentAbort); }); this.pendingMemoryPrefetch = handle; } From 41ca832a87e0ada8b83b165818dd5848f9d0db1d Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Mon, 18 May 2026 10:17:59 +0800 Subject: [PATCH 13/15] refactor(core): consolidate memory-prefetch lifecycle + safety nets per round-3 review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architectural (root-cause fix for cleanup-path sibling drift): - New private cancelPendingMemoryPrefetch() consolidates the abort+clear idiom (was duplicated across 6 sites). Logs at debug when discarding a settled-but-unconsumed handle so missing-memory scenarios are diagnosable. - New private tryConsumeMemoryPrefetch() consolidates the consume-and-mark-consumed dance (was duplicated UserQuery + ToolResult). - All existing cleanup sites + the two newly-flagged early-return sites (LoopDetected, Error) now use the helper; future early-returns can rely on the finally-block safety net. - sendMessageStream try-finally now uses a `normalCompletion` flag: only the bottom-of-try return path preserves the prefetch (intentional — next ToolResult turn may consume it); every other exit (uncaught exception, abnormal early-return) goes through cancelPendingMemoryPrefetch in finally. Diagnostics: - Restored AbortError debug log in fire-path catch (was silent after removing the deadline mechanism; aborts now come from 4+ sources so a trace is valuable). - Updated stale "deadline" log in recall.ts to reflect current abort sources (caller signal / new UserQuery / cleanup / 30 s safety timeout). Safety net: - Added 30 s ceiling in relevanceSelector via AbortSignal.any(...). Generous enough that normal ~1 s recalls don't trip it; bounds zombie side-queries if the model API hangs and the caller never aborts. Replaces the uncancellable `new AbortController().signal` fallback that would have left callerless invocations running indefinitely. Doc sync: - Design doc updated: UserQuery consume code sample now shows `unshift` (matches implementation) with an inline note on the prepend-vs-append contrast. Tests: - New regression guard: resetChat aborts pending prefetch and clears the handle. - New regression guard: LoopDetected mid-stream aborts pending prefetch and clears the handle (catches the sibling-drift bug this round caught). 227/227 tests pass; tsc clean on changed files. Declined from this round: - `await Promise.resolve()` after fire path: defensive — current code has multiple natural microtask drains before consume point. Added comment documenting the dependency instead. - Renaming `settledAt: number | null` to `settled: boolean`: timestamp has diagnostic value for future instrumentation; current consumers' null-check usage is documented in the JSDoc. Co-Authored-By: Claude Sonnet 4.6 --- .../2026-05-15-async-memory-recall-design.md | 6 +- packages/core/src/core/client.test.ts | 80 +++++++++ packages/core/src/core/client.ts | 165 ++++++++++++------ packages/core/src/memory/recall.ts | 11 +- packages/core/src/memory/relevanceSelector.ts | 16 +- 5 files changed, 209 insertions(+), 69 deletions(-) diff --git a/docs/design/2026-05-15-async-memory-recall-design.md b/docs/design/2026-05-15-async-memory-recall-design.md index 4cbb33ede32..f11b2ac5d23 100644 --- a/docs/design/2026-05-15-async-memory-recall-design.md +++ b/docs/design/2026-05-15-async-memory-recall-design.md @@ -119,7 +119,11 @@ if ( this.pendingMemoryPrefetch = undefined; const result = await prefetchHandle.promise; // already settled, returns immediately if (result.prompt) { - systemReminders.push(result.prompt); + // unshift, not push: keep memory at the front of systemReminders so + // it leads the system-reminder block on UserQuery turns. (ToolResult + // turns instead append to requestToSend to preserve functionCall / + // functionResponse pairing — see below.) + systemReminders.unshift(result.prompt); for (const doc of result.selectedDocs) { this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); } diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index e99fa9b4f0b..ec1afea6e30 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -2548,6 +2548,86 @@ hello expect(abortSignals[1].aborted).toBe(false); }); + it('should abort the pending prefetch on resetChat', async () => { + let abortHandlerInvoked = false; + mockMemoryManager.recall.mockImplementation((_root, _query, opts) => { + opts.abortSignal?.addEventListener('abort', () => { + abortHandlerInvoked = true; + }); + return new Promise(() => {}); + }); + + const mockChat: Partial = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), + }; + client['chat'] = mockChat as GeminiChat; + mockTurnRunFn.mockReturnValue( + (async function* () { + yield { type: 'content', value: 'Hello' }; + })(), + ); + + const stream = client.sendMessageStream( + [{ text: 'first' }], + new AbortController().signal, + 'prompt-id-reset-1', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of stream) { + // consume + } + + expect(abortHandlerInvoked).toBe(false); + await client.resetChat(); + expect(abortHandlerInvoked).toBe(true); + expect(client['pendingMemoryPrefetch']).toBeUndefined(); + }); + + it('should abort the pending prefetch when LoopDetected fires mid-stream', async () => { + let abortHandlerInvoked = false; + mockMemoryManager.recall.mockImplementation((_root, _query, opts) => { + opts.abortSignal?.addEventListener('abort', () => { + abortHandlerInvoked = true; + }); + return new Promise(() => {}); + }); + + const mockChat: Partial = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), + }; + client['chat'] = mockChat as GeminiChat; + + // Force LoopDetector to trip on the first event. + const loopDetector = client['loopDetector']; + vi.spyOn(loopDetector, 'addAndCheck').mockReturnValue(true); + vi.spyOn(loopDetector, 'getLastLoopType').mockReturnValue(undefined); + + mockTurnRunFn.mockReturnValue( + (async function* () { + yield { type: 'content', value: 'looping' }; + })(), + ); + + const stream = client.sendMessageStream( + [{ text: 'trigger a loop' }], + new AbortController().signal, + 'prompt-id-loop', + { type: SendMessageType.UserQuery }, + ); + const events = []; + for await (const event of stream) { + events.push(event); + } + + expect(events.some((e) => e.type === GeminiEventType.LoopDetected)).toBe( + true, + ); + expect(abortHandlerInvoked).toBe(true); + expect(client['pendingMemoryPrefetch']).toBeUndefined(); + }); + it('should proceed without auto-memory when managed auto-memory is disabled', async () => { // When getManagedAutoMemoryEnabled returns false, no recall is initiated // and sendMessageStream completes without memory content diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index c82008fa357..e0fe0367f04 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -385,6 +385,50 @@ export class GeminiClient { }); } + /** + * Abort and release the pending auto-memory prefetch in one step. + * Safe to call when no prefetch is pending — does nothing. Centralises + * the abort-then-clear idiom so every cleanup path (resetChat, early + * returns, finally) cannot half-fix one without the other. + * + * If the handle has already settled (recall completed but consume point + * hadn't run yet), the settled result is discarded — logged at debug so + * operators can diagnose missing-memory scenarios. + */ + private cancelPendingMemoryPrefetch(): void { + const handle = this.pendingMemoryPrefetch; + if (!handle) return; + if (handle.settledAt !== null && !handle.consumed) { + debugLogger.debug('Discarding settled but unconsumed memory prefetch.'); + } + handle.controller.abort(); + this.pendingMemoryPrefetch = undefined; + } + + /** + * Atomically consume the pending prefetch if it has already settled. + * Returns the recall result (caller decides where to inject it in + * `requestToSend`), or `null` if there's nothing to consume yet. + * + * Centralises the consume-and-mark dance so the UserQuery and ToolResult + * inject sites can't drift on the guard logic. + */ + private async tryConsumeMemoryPrefetch(): Promise { + const handle = this.pendingMemoryPrefetch; + if (!handle || handle.settledAt === null || handle.consumed) { + return null; + } + handle.consumed = true; + this.pendingMemoryPrefetch = undefined; + const result = await handle.promise; // already settled, returns immediately + if (result.prompt) { + for (const doc of result.selectedDocs) { + this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); + } + } + return result; + } + async resetChat(): Promise { this.initializedSessionId = undefined; this.surfacedRelevantAutoMemoryPaths.clear(); @@ -398,8 +442,7 @@ export class GeminiClient { this.config.getBaseLlmClient().clearPerModelGeneratorCache(); // Abort any in-flight auto-memory recall so the stale controller // does not leak into the next session. - this.pendingMemoryPrefetch?.controller.abort(); - this.pendingMemoryPrefetch = undefined; + this.cancelPendingMemoryPrefetch(); // Drop any deferred tools revealed this session so /clear really gives // a clean slate. We don't clear inside startChat itself because that path // is also taken by compression (which preserves the session), and @@ -1063,6 +1106,13 @@ export class GeminiClient { }); } + // Tracks whether the generator reached its natural end (the bottom-of-try + // `return turn`). Only on that path do we want to preserve the pending + // memory prefetch so the next ToolResult turn can consume it. Any other + // exit (LoopDetected, Error, signal abort, uncaught exception, abnormal + // early-return) leaves this `false`, and the `finally` block aborts the + // prefetch as a safety net. + let normalCompletion = false; try { if ( messageType === SendMessageType.UserQuery || @@ -1072,8 +1122,7 @@ export class GeminiClient { // A previous recall may still be pending (slow side-query, new user // turn arrived before it settled). Abort it before installing the // new handle so the orphan doesn't keep running indefinitely. - this.pendingMemoryPrefetch?.controller.abort(); - this.pendingMemoryPrefetch = undefined; + this.cancelPendingMemoryPrefetch(); const controller = new AbortController(); // Bridge the caller's signal into the prefetch controller so a user // abort (Ctrl-C / Esc) on the parent turn also terminates the @@ -1095,9 +1144,18 @@ export class GeminiClient { abortSignal: controller.signal, }) .catch((error: unknown) => { + // Abort sources are now numerous (caller signal, new UserQuery, + // cleanup paths, safety-net timeout). Keep a debug trace so + // operators can diagnose missing-memory scenarios without + // raising noise on the common abort path. if ( - !(error instanceof DOMException && error.name === 'AbortError') + error instanceof DOMException && + error.name === 'AbortError' ) { + debugLogger.debug( + 'Managed auto-memory recall prefetch aborted.', + ); + } else { debugLogger.warn( 'Managed auto-memory recall prefetch failed.', error, @@ -1186,8 +1244,7 @@ export class GeminiClient { this.config.getMaxSessionTurns() > 0 && this.sessionTurnCount > this.config.getMaxSessionTurns() ) { - this.pendingMemoryPrefetch?.controller.abort(); - this.pendingMemoryPrefetch = undefined; + this.cancelPendingMemoryPrefetch(); yield { type: GeminiEventType.MaxSessionTurns }; if (isTopLevelInteraction) endInteractionSpan('error', { @@ -1200,8 +1257,7 @@ export class GeminiClient { // Ensure turns never exceeds MAX_TURNS to prevent infinite loops const boundedTurns = Math.min(turns, MAX_TURNS); if (!boundedTurns) { - this.pendingMemoryPrefetch?.controller.abort(); - this.pendingMemoryPrefetch = undefined; + this.cancelPendingMemoryPrefetch(); if (isTopLevelInteraction) endInteractionSpan('error', { errorMessage: 'max turns exhausted' }); return new Turn(this.getChat(), prompt_id); @@ -1216,8 +1272,7 @@ export class GeminiClient { const lastPromptTokenCount = uiTelemetryService.getLastPromptTokenCount(); if (lastPromptTokenCount > sessionTokenLimit) { - this.pendingMemoryPrefetch?.controller.abort(); - this.pendingMemoryPrefetch = undefined; + this.cancelPendingMemoryPrefetch(); yield { type: GeminiEventType.SessionTokenLimitExceeded, value: { @@ -1277,8 +1332,7 @@ export class GeminiClient { `Arena control signal received: ${controlSignal.type} - ${controlSignal.reason}`, ); await arenaAgentClient.reportCancelled(); - this.pendingMemoryPrefetch?.controller.abort(); - this.pendingMemoryPrefetch = undefined; + this.cancelPendingMemoryPrefetch(); if (isTopLevelInteraction) endInteractionSpan('cancelled'); return new Turn(this.getChat(), prompt_id); } @@ -1340,57 +1394,36 @@ export class GeminiClient { // Zero-wait poll: consume only if the prefetch has already settled. // Done AFTER the async reminder setup above so recall settling during - // those awaits still gets caught here. If still not settled, skip — - // the ToolResult inject point will retry on the next turn. - const prefetchHandle = this.pendingMemoryPrefetch; - if ( - prefetchHandle && - prefetchHandle.settledAt !== null && - !prefetchHandle.consumed - ) { - prefetchHandle.consumed = true; - this.pendingMemoryPrefetch = undefined; - const result = await prefetchHandle.promise; // already settled, returns immediately - if (result.prompt) { - // Unshift to the front of systemReminders: on a UserQuery turn - // requestToSend leads with user text, so positioning memory at - // the very start of the system-reminder block keeps it close to - // the user prompt. Contrast the ToolResult path below, which - // must append to avoid splitting functionCall / functionResponse. - systemReminders.unshift(result.prompt); - for (const doc of result.selectedDocs) { - this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); - } - } + // those awaits still gets caught here. (settledAt is set in + // promise.finally(); microtask ordering guarantees it's visible + // after any await prior to this point — flatMapTextParts above is + // the natural drain.) If still not settled, skip — the ToolResult + // inject point will retry on the next turn. + const userQueryMemory = await this.tryConsumeMemoryPrefetch(); + if (userQueryMemory?.prompt) { + // Unshift to the front of systemReminders: on a UserQuery turn + // requestToSend leads with user text, so positioning memory at + // the very start of the system-reminder block keeps it close to + // the user prompt. Contrast the ToolResult path below, which + // must append to avoid splitting functionCall / functionResponse. + systemReminders.unshift(userQueryMemory.prompt); } requestToSend = [...systemReminders, ...requestToSend]; } if (messageType === SendMessageType.ToolResult) { - const prefetchHandle = this.pendingMemoryPrefetch; - if ( - prefetchHandle && - prefetchHandle.settledAt !== null && - !prefetchHandle.consumed - ) { - prefetchHandle.consumed = true; - this.pendingMemoryPrefetch = undefined; - const result = await prefetchHandle.promise; - if (result.prompt) { - // Append (not prepend): on a ToolResult turn, requestToSend leads - // with functionResponse parts that must immediately follow the - // model's functionCall (Qwen API constraint — same reason the - // IDE-context block above is skipped while a tool call is pending, - // see the `hasPendingToolCall` guard). Putting the memory text - // after the functionResponse parts keeps the call/response pairing - // intact under native Gemini; the OpenAI converter then emits the - // text as a separate user message after the tool messages. - requestToSend = [...requestToSend, result.prompt]; - for (const doc of result.selectedDocs) { - this.surfacedRelevantAutoMemoryPaths.add(doc.filePath); - } - } + const toolResultMemory = await this.tryConsumeMemoryPrefetch(); + if (toolResultMemory?.prompt) { + // Append (not prepend): on a ToolResult turn, requestToSend leads + // with functionResponse parts that must immediately follow the + // model's functionCall (Qwen API constraint — same reason the + // IDE-context block above is skipped while a tool call is pending, + // see the `hasPendingToolCall` guard). Putting the memory text + // after the functionResponse parts keeps the call/response pairing + // intact under native Gemini; the OpenAI converter then emits the + // text as a separate user message after the tool messages. + requestToSend = [...requestToSend, toolResultMemory.prompt]; } } @@ -1416,6 +1449,9 @@ export class GeminiClient { this.lastApiCompletionTimestamp = Date.now(); if (isTopLevelInteraction) endInteractionSpan('error', { errorMessage: 'loop detected' }); + // finally cleanup catches this, but cancel explicitly to match + // the cleanup pattern at other early-return sites. + this.cancelPendingMemoryPrefetch(); return turn; } } @@ -1466,6 +1502,9 @@ export class GeminiClient { event.value instanceof Error ? '[API error]' : 'unknown error'; endInteractionSpan('error', { errorMessage: errMsg }); } + // finally cleanup catches this, but cancel explicitly to match + // the cleanup pattern at other early-return sites. + this.cancelPendingMemoryPrefetch(); return turn; } } @@ -1661,8 +1700,18 @@ export class GeminiClient { if (isTopLevelInteraction) { endInteractionSpan(signal?.aborted ? 'cancelled' : 'ok'); } + // Reached the bottom of the try — this turn ended cleanly. Preserve + // any still-pending memory prefetch so the next ToolResult turn can + // consume it (the whole point of the fire-and-forget design). + normalCompletion = true; return turn; } finally { + // Belt-and-suspenders: abort the prefetch on any exit other than the + // bottom-of-try `return turn`. Catches uncaught exceptions and guards + // against future early-return sites that forget to call cancel. + if (!normalCompletion) { + this.cancelPendingMemoryPrefetch(); + } if (isTopLevelInteraction) { endInteractionSpan(signal?.aborted ? 'cancelled' : 'error', { errorMessage: 'unexpected exit', diff --git a/packages/core/src/memory/recall.ts b/packages/core/src/memory/recall.ts index 8697496f953..e9a402a3a69 100644 --- a/packages/core/src/memory/recall.ts +++ b/packages/core/src/memory/recall.ts @@ -219,11 +219,12 @@ export async function resolveRelevantAutoMemoryPromptForQuery( strategy, }; } catch (error) { - // Distinguish deadline-triggered cancellation from real model errors - // so oncall debugging is not misled by the fallback log. + // Distinguish caller-driven aborts (user signal / new UserQuery / + // session cleanup / 30 s safety timeout) from real model errors so + // oncall debugging is not misled by the fallback log. if (error instanceof DOMException && error.name === 'AbortError') { debugLogger.debug( - 'Model-driven auto-memory recall cancelled by deadline; heuristic result discarded.', + 'Model-driven auto-memory recall aborted; heuristic result discarded.', ); } else { debugLogger.warn( @@ -234,8 +235,8 @@ export async function resolveRelevantAutoMemoryPromptForQuery( } } - // If the caller's abort signal is already set (e.g. deadline fired), skip the - // heuristic fallback — the result would be discarded anyway. + // If the caller's abort signal is already set, skip the heuristic + // fallback — the result would be discarded anyway. if (options.abortSignal?.aborted) { return { prompt: '', diff --git a/packages/core/src/memory/relevanceSelector.ts b/packages/core/src/memory/relevanceSelector.ts index a7effd4fa38..1ef957671f7 100644 --- a/packages/core/src/memory/relevanceSelector.ts +++ b/packages/core/src/memory/relevanceSelector.ts @@ -91,11 +91,17 @@ export async function selectRelevantAutoMemoryDocumentsByModel( purpose: 'auto-memory-recall', contents, schema: RESPONSE_SCHEMA, - // No inline timeout — caller (`GeminiClient.MemoryPrefetchHandle`) - // owns cancellation and aborts via its controller on cleanup paths. - // A bare unsignalled AbortController fills the type when the caller - // doesn't pass one, since runSideQuery's `abortSignal` is required. - abortSignal: callerAbortSignal ?? new AbortController().signal, + // Caller (`GeminiClient.MemoryPrefetchHandle`) owns lifecycle and aborts + // via its controller on cleanup paths. The 30 s ceiling is a generous + // safety net that only fires if the model API hangs (network partition, + // server stall, runaway retry) AND the caller never aborts. Normal + // recalls take ~1 s; 30 s is far above the long tail so this doesn't + // re-introduce the 1 s timeout regression that motivated this redesign. + // Without this ceiling, a callerless invocation would use an + // unsignalled AbortController and run indefinitely. + abortSignal: callerAbortSignal + ? AbortSignal.any([AbortSignal.timeout(30_000), callerAbortSignal]) + : AbortSignal.timeout(30_000), // Uses runSideQuery's default side-query model policy: fast model first, // then main session model when no fast model is configured. From 904f2c47ace31764440e48b66d32f407e2e2c3e9 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Mon, 18 May 2026 10:23:18 +0800 Subject: [PATCH 14/15] =?UTF-8?q?fix(test):=20correct=20getLastLoopType=20?= =?UTF-8?q?mock=20return=20type=20=E2=80=94=20null,=20not=20undefined?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI tsc --build (stricter than --noEmit) caught: src/core/client.test.ts(2996,65): error TS2345: Argument of type 'undefined' is not assignable to parameter of type 'LoopType | null'. getLastLoopType()'s contract returns LoopType | null; the test mock was returning undefined. Switched to null to match the type. Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/core/client.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index ec1afea6e30..af5444bd1cf 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -2602,7 +2602,7 @@ hello // Force LoopDetector to trip on the first event. const loopDetector = client['loopDetector']; vi.spyOn(loopDetector, 'addAndCheck').mockReturnValue(true); - vi.spyOn(loopDetector, 'getLastLoopType').mockReturnValue(undefined); + vi.spyOn(loopDetector, 'getLastLoopType').mockReturnValue(null); mockTurnRunFn.mockReturnValue( (async function* () { From a0b8bedf53e3b7237dab600a6dacc112a086af21 Mon Sep 17 00:00:00 2001 From: LaZzyMan Date: Tue, 19 May 2026 10:33:28 +0800 Subject: [PATCH 15/15] fix(core): preserve memory prefetch across hook/next-speaker continuations + accurate recall abort log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Round-4 review findings (self-inflicted regression from round-3): 1. Preserve pending prefetch on `return hookTurn` (Stop-hook continuation) and `return continueTurn` (next-speaker continuation). The round-3 `normalCompletion = true` was only set at the bottom-of-try `return turn`, leaving these two recursive-yield paths to trip the finally cleanup. When the inner Hook turn produced tool calls, the subsequent ToolResult turn found `pendingMemoryPrefetch === undefined` and memory was silently dropped. 2. recall.ts catch log distinguishes caller-driven aborts (heuristic genuinely skipped below) from the 30s safety-net timeout in relevanceSelector (the caller's signal is NOT aborted by that path, so the heuristic fallback actually runs). Regression guard added: - "should PRESERVE the pending prefetch when next-speaker continueTurn returns" — was red before this commit, green after. 258/258 tests pass; tsc --build clean. Co-Authored-By: Claude Sonnet 4.6 --- packages/core/src/core/client.test.ts | 68 ++++++++++++++++++++++++++- packages/core/src/core/client.ts | 9 ++++ packages/core/src/memory/recall.ts | 24 +++++++--- 3 files changed, 94 insertions(+), 7 deletions(-) diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index d13da2a7fb5..bde6856c67a 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -39,7 +39,12 @@ import { import type { ModelsConfig } from '../models/modelsConfig.js'; import { UnauthorizedError } from '../utils/errors.js'; import { retryWithBackoff } from '../utils/retry.js'; -import { CompressionStatus, GeminiEventType, Turn } from './turn.js'; +import { + CompressionStatus, + GeminiEventType, + Turn, + type ServerGeminiStreamEvent, +} from './turn.js'; vi.mock('../utils/retry.js', () => ({ retryWithBackoff: vi.fn(async (fn) => await fn()), @@ -3025,6 +3030,67 @@ hello expect(client['pendingMemoryPrefetch']).toBeUndefined(); }); + it('should PRESERVE the pending prefetch when next-speaker continueTurn returns', async () => { + // Self-inflicted-regression guard for the round-4 finding: + // the bottom-of-try `normalCompletion = true` doesn't cover the + // `return continueTurn;` path, so the outer's finally used to cancel + // the still-pending prefetch — meaning a subsequent ToolResult turn + // would have no memory to consume. + let abortHandlerInvoked = false; + mockMemoryManager.recall.mockImplementation((_root, _query, opts) => { + opts.abortSignal?.addEventListener('abort', () => { + abortHandlerInvoked = true; + }); + return new Promise(() => {}); // never settles + }); + + const mockChat: Partial = { + addHistory: vi.fn(), + getHistory: vi.fn().mockReturnValue([]), + }; + client['chat'] = mockChat as GeminiChat; + mockTurnRunFn.mockReturnValue( + (async function* () { + yield { type: 'content', value: 'outer reply' }; + })(), + ); + + // Force the next-speaker check to recurse so we hit `return continueTurn`. + // The recursion call passes through this same mock stream and returns. + const { checkNextSpeaker } = await import( + '../utils/nextSpeakerChecker.js' + ); + const mockedCheckNextSpeaker = vi.mocked(checkNextSpeaker); + mockedCheckNextSpeaker + .mockResolvedValueOnce({ + reasoning: 'forced', + next_speaker: 'model', + }) + .mockResolvedValue(null); // inner recursion: stop + // Each recursive sendMessageStream call asks turn.run() for a new stream. + mockTurnRunFn.mockImplementation( + () => + (async function* () { + yield { type: 'content', value: 'reply' }; + })() as unknown as AsyncGenerator, + ); + + const stream = client.sendMessageStream( + [{ text: 'hello' }], + new AbortController().signal, + 'prompt-id-continueturn', + { type: SendMessageType.UserQuery }, + ); + for await (const _ of stream) { + // consume + } + + // The prefetch must survive the continueTurn return so a follow-up + // ToolResult turn can consume it. + expect(abortHandlerInvoked).toBe(false); + expect(client['pendingMemoryPrefetch']).not.toBeUndefined(); + }); + it('should proceed without auto-memory when managed auto-memory is disabled', async () => { // When getManagedAutoMemoryEnabled returns false, no recall is initiated // and sendMessageStream completes without memory content diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index f5b3fb353ff..e66362900dd 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -1794,6 +1794,10 @@ export class GeminiClient { ); if (isTopLevelInteraction) endInteractionSpan(signal.aborted ? 'cancelled' : 'ok'); + // Preserve the pending prefetch: the inner Hook turn we just + // yielded may have produced tool calls, and the caller's next + // ToolResult turn still needs to consume the recall result. + normalCompletion = true; return hookTurn; } @@ -1860,6 +1864,11 @@ export class GeminiClient { ); if (isTopLevelInteraction) endInteractionSpan(signal.aborted ? 'cancelled' : 'ok'); + // Preserve the pending prefetch: same reasoning as the + // `return hookTurn` site above — the recursive Hook turn may + // have produced tool calls whose ToolResult turn still needs + // the recall result. + normalCompletion = true; return continueTurn; } diff --git a/packages/core/src/memory/recall.ts b/packages/core/src/memory/recall.ts index e9a402a3a69..5b476b21ce1 100644 --- a/packages/core/src/memory/recall.ts +++ b/packages/core/src/memory/recall.ts @@ -219,13 +219,25 @@ export async function resolveRelevantAutoMemoryPromptForQuery( strategy, }; } catch (error) { - // Distinguish caller-driven aborts (user signal / new UserQuery / - // session cleanup / 30 s safety timeout) from real model errors so - // oncall debugging is not misled by the fallback log. + // Distinguish three cases so oncall debugging isn't misled: + // - caller-driven abort (user signal / new UserQuery / session + // cleanup): caller signal is aborted → heuristic fallback is + // skipped below at `options.abortSignal?.aborted`, so the + // result really is discarded. + // - 30 s safety-net timeout in relevanceSelector: only the inner + // combined signal aborts; the caller's signal is NOT aborted, + // so the heuristic fallback below DOES run. + // - real model error: warn at the higher level. if (error instanceof DOMException && error.name === 'AbortError') { - debugLogger.debug( - 'Model-driven auto-memory recall aborted; heuristic result discarded.', - ); + if (options.abortSignal?.aborted) { + debugLogger.debug( + 'Model-driven auto-memory recall aborted by caller; heuristic result discarded.', + ); + } else { + debugLogger.debug( + 'Model-driven auto-memory recall timed out (30 s safety net); heuristic fallback will run.', + ); + } } else { debugLogger.warn( 'Model-driven auto-memory recall failed; falling back to heuristic selection.',