fix(core): address codex review findings on async memory recall

LaZzyMan · claude · LaZzyMan · commit 0b4bcc1b1a18 · 2026-05-15T17:10:50.000+08:00
Three findings fixed:

1. Abort previous prefetch before installing a new one (line 1059):
   A new UserQuery/Cron used to overwrite pendingMemoryPrefetch without
   aborting the old controller, leaking an unbounded background recall now
   that the 1s side-query timeout is gone.

2. Move the UserQuery consume poll AFTER the async reminder setup:
   ensureTool + listSubagents are awaited between the old poll location and
   the final assembly, so recalls that settled during those awaits used to
   be missed (and a tool-less turn never got a ToolResult retry). The poll
   now runs immediately before requestToSend assembly, and unshifts memory
   to the front of systemReminders to preserve ordering.

3. Append memory after functionResponse on ToolResult turns:
   The Qwen API requires the functionResponse part to immediately follow
   the model's functionCall (see lines 1209-1213). Prepending memory text
   risked breaking that pairing on the native Gemini path. Appending keeps
   the pair intact on Gemini and produces the same OpenAI output (text
   becomes a separate user message after the tool messages).

Tests:
- Updated ToolResult inject test to assert memory index &gt; functionResponse
- Added abort-previous-prefetch test (mid-flight UserQuery aborts old handle)

224/224 tests pass; tsc clean on changed files.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts
@@ -2445,13 +2445,71 @@ hello
         // consume
       }
 
-      expect(mockTurnRunFn).toHaveBeenLastCalledWith(
-        'test-model',
-        expect.arrayContaining([
-          '## Relevant memory\n\nDeferred memory result.',
-        ]),
-        expect.any(AbortSignal),
+      // Memory must come AFTER the functionResponse part so the Qwen API
+      // call/response pairing isn't broken (see client.ts:1209-1213).
+      const lastCallArgs = mockTurnRunFn.mock.lastCall;
+      const requestArr = lastCallArgs![1] as unknown[];
+      const functionResponseIdx = requestArr.findIndex(
+        (p) => typeof p === 'object' && p !== null && 'functionResponse' in p,
+      );
+      const memoryIdx = requestArr.findIndex(
+        (p) => p === '## Relevant memory\n\nDeferred memory result.',
+      );
+      expect(functionResponseIdx).toBeGreaterThanOrEqual(0);
+      expect(memoryIdx).toBeGreaterThan(functionResponseIdx);
+    });
+
+    it('should abort the previous prefetch when a new UserQuery arrives mid-flight', async () => {
+      // Pending recall on first UserQuery — never resolves on its own.
+      const abortSignals: AbortSignal[] = [];
+      mockMemoryManager.recall.mockImplementation((_root, _query, opts) => {
+        abortSignals.push(opts.abortSignal as AbortSignal);
+        return new Promise(() => {});
+      });
+
+      const mockChat: Partial<GeminiChat> = {
+        addHistory: vi.fn(),
+        getHistory: vi.fn().mockReturnValue([]),
+      };
+      client['chat'] = mockChat as GeminiChat;
+      mockTurnRunFn.mockReturnValue(
+        (async function* () {
+          yield { type: 'content', value: 'Hello' };
+        })(),
       );
+
+      // First UserQuery — installs prefetch #1
+      const stream1 = client.sendMessageStream(
+        [{ text: 'first' }],
+        new AbortController().signal,
+        'prompt-id-1',
+        { type: SendMessageType.UserQuery },
+      );
+      for await (const _ of stream1) {
+        // consume
+      }
+      expect(abortSignals.length).toBe(1);
+      expect(abortSignals[0].aborted).toBe(false);
+
+      // Second UserQuery — should abort #1 before installing #2
+      mockTurnRunFn.mockReturnValue(
+        (async function* () {
+          yield { type: 'content', value: 'Hello again' };
+        })(),
+      );
+      const stream2 = client.sendMessageStream(
+        [{ text: 'second' }],
+        new AbortController().signal,
+        'prompt-id-2',
+        { type: SendMessageType.UserQuery },
+      );
+      for await (const _ of stream2) {
+        // consume
+      }
+
+      expect(abortSignals.length).toBe(2);
+      expect(abortSignals[0].aborted).toBe(true);
+      expect(abortSignals[1].aborted).toBe(false);
     });
 
     it('should proceed without auto-memory when managed auto-memory is disabled', async () => {
diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts
@@ -1057,6 +1057,11 @@ export class GeminiClient {
         messageType === SendMessageType.Cron
       ) {
         if (this.config.getManagedAutoMemoryEnabled()) {
+          // A previous recall may still be pending (slow side-query, new user
+          // turn arrived before it settled). Abort it before installing the
+          // new handle so the orphan doesn't keep running indefinitely.
+          this.pendingMemoryPrefetch?.controller.abort();
+          this.pendingMemoryPrefetch = undefined;
           const controller = new AbortController();
           const promise = this.config
             .getMemoryManager()
@@ -1274,24 +1279,6 @@ export class GeminiClient {
         messageType === SendMessageType.Cron
       ) {
         const systemReminders = [];
-        // Zero-wait poll: consume only if the prefetch has already settled.
-        // If not settled yet, skip — the ToolResult inject point will retry.
-        const prefetchHandle = this.pendingMemoryPrefetch;
-        if (
-          prefetchHandle &&
-          prefetchHandle.settledAt !== null &&
-          !prefetchHandle.consumed
-        ) {
-          prefetchHandle.consumed = true;
-          this.pendingMemoryPrefetch = undefined;
-          const result = await prefetchHandle.promise; // already settled, returns immediately
-          if (result.prompt) {
-            systemReminders.push(result.prompt);
-            for (const doc of result.selectedDocs) {
-              this.surfacedRelevantAutoMemoryPaths.add(doc.filePath);
-            }
-          }
-        }
 
         // add subagent system reminder if there are subagents
         const hasAgentTool = await this.config
@@ -1326,6 +1313,27 @@ export class GeminiClient {
           }
         }
 
+        // Zero-wait poll: consume only if the prefetch has already settled.
+        // Done AFTER the async reminder setup above so recall settling during
+        // those awaits still gets caught here. If still not settled, skip —
+        // the ToolResult inject point will retry on the next turn.
+        const prefetchHandle = this.pendingMemoryPrefetch;
+        if (
+          prefetchHandle &&
+          prefetchHandle.settledAt !== null &&
+          !prefetchHandle.consumed
+        ) {
+          prefetchHandle.consumed = true;
+          this.pendingMemoryPrefetch = undefined;
+          const result = await prefetchHandle.promise; // already settled, returns immediately
+          if (result.prompt) {
+            systemReminders.unshift(result.prompt);
+            for (const doc of result.selectedDocs) {
+              this.surfacedRelevantAutoMemoryPaths.add(doc.filePath);
+            }
+          }
+        }
+
         requestToSend = [...systemReminders, ...requestToSend];
       }
 
@@ -1340,7 +1348,14 @@ export class GeminiClient {
           this.pendingMemoryPrefetch = undefined;
           const result = await prefetchHandle.promise;
           if (result.prompt) {
-            requestToSend = [result.prompt, ...requestToSend];
+            // Append (not prepend): on a ToolResult turn, requestToSend leads
+            // with functionResponse parts that must immediately follow the
+            // model's functionCall (Qwen API constraint, see lines 1209-1213).
+            // Putting the memory text after the functionResponse parts keeps
+            // the call/response pairing intact under native Gemini, while the
+            // OpenAI converter still emits the text as a separate user message
+            // after the tool messages.
+            requestToSend = [...requestToSend, result.prompt];
             for (const doc of result.selectedDocs) {
               this.surfacedRelevantAutoMemoryPaths.add(doc.filePath);
             }