Skip to content

Commit 0b4bcc1

Browse files
LaZzyManclaude
andcommitted
fix(core): address codex review findings on async memory recall
Three findings fixed: 1. Abort previous prefetch before installing a new one (line 1059): A new UserQuery/Cron used to overwrite pendingMemoryPrefetch without aborting the old controller, leaking an unbounded background recall now that the 1s side-query timeout is gone. 2. Move the UserQuery consume poll AFTER the async reminder setup: ensureTool + listSubagents are awaited between the old poll location and the final assembly, so recalls that settled during those awaits used to be missed (and a tool-less turn never got a ToolResult retry). The poll now runs immediately before requestToSend assembly, and unshifts memory to the front of systemReminders to preserve ordering. 3. Append memory after functionResponse on ToolResult turns: The Qwen API requires the functionResponse part to immediately follow the model's functionCall (see lines 1209-1213). Prepending memory text risked breaking that pairing on the native Gemini path. Appending keeps the pair intact on Gemini and produces the same OpenAI output (text becomes a separate user message after the tool messages). Tests: - Updated ToolResult inject test to assert memory index > functionResponse - Added abort-previous-prefetch test (mid-flight UserQuery aborts old handle) 224/224 tests pass; tsc clean on changed files. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 90b8d26 commit 0b4bcc1

2 files changed

Lines changed: 98 additions & 25 deletions

File tree

packages/core/src/core/client.test.ts

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2445,13 +2445,71 @@ hello
24452445
// consume
24462446
}
24472447

2448-
expect(mockTurnRunFn).toHaveBeenLastCalledWith(
2449-
'test-model',
2450-
expect.arrayContaining([
2451-
'## Relevant memory\n\nDeferred memory result.',
2452-
]),
2453-
expect.any(AbortSignal),
2448+
// Memory must come AFTER the functionResponse part so the Qwen API
2449+
// call/response pairing isn't broken (see client.ts:1209-1213).
2450+
const lastCallArgs = mockTurnRunFn.mock.lastCall;
2451+
const requestArr = lastCallArgs![1] as unknown[];
2452+
const functionResponseIdx = requestArr.findIndex(
2453+
(p) => typeof p === 'object' && p !== null && 'functionResponse' in p,
2454+
);
2455+
const memoryIdx = requestArr.findIndex(
2456+
(p) => p === '## Relevant memory\n\nDeferred memory result.',
2457+
);
2458+
expect(functionResponseIdx).toBeGreaterThanOrEqual(0);
2459+
expect(memoryIdx).toBeGreaterThan(functionResponseIdx);
2460+
});
2461+
2462+
it('should abort the previous prefetch when a new UserQuery arrives mid-flight', async () => {
2463+
// Pending recall on first UserQuery — never resolves on its own.
2464+
const abortSignals: AbortSignal[] = [];
2465+
mockMemoryManager.recall.mockImplementation((_root, _query, opts) => {
2466+
abortSignals.push(opts.abortSignal as AbortSignal);
2467+
return new Promise(() => {});
2468+
});
2469+
2470+
const mockChat: Partial<GeminiChat> = {
2471+
addHistory: vi.fn(),
2472+
getHistory: vi.fn().mockReturnValue([]),
2473+
};
2474+
client['chat'] = mockChat as GeminiChat;
2475+
mockTurnRunFn.mockReturnValue(
2476+
(async function* () {
2477+
yield { type: 'content', value: 'Hello' };
2478+
})(),
24542479
);
2480+
2481+
// First UserQuery — installs prefetch #1
2482+
const stream1 = client.sendMessageStream(
2483+
[{ text: 'first' }],
2484+
new AbortController().signal,
2485+
'prompt-id-1',
2486+
{ type: SendMessageType.UserQuery },
2487+
);
2488+
for await (const _ of stream1) {
2489+
// consume
2490+
}
2491+
expect(abortSignals.length).toBe(1);
2492+
expect(abortSignals[0].aborted).toBe(false);
2493+
2494+
// Second UserQuery — should abort #1 before installing #2
2495+
mockTurnRunFn.mockReturnValue(
2496+
(async function* () {
2497+
yield { type: 'content', value: 'Hello again' };
2498+
})(),
2499+
);
2500+
const stream2 = client.sendMessageStream(
2501+
[{ text: 'second' }],
2502+
new AbortController().signal,
2503+
'prompt-id-2',
2504+
{ type: SendMessageType.UserQuery },
2505+
);
2506+
for await (const _ of stream2) {
2507+
// consume
2508+
}
2509+
2510+
expect(abortSignals.length).toBe(2);
2511+
expect(abortSignals[0].aborted).toBe(true);
2512+
expect(abortSignals[1].aborted).toBe(false);
24552513
});
24562514

24572515
it('should proceed without auto-memory when managed auto-memory is disabled', async () => {

packages/core/src/core/client.ts

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,11 @@ export class GeminiClient {
10571057
messageType === SendMessageType.Cron
10581058
) {
10591059
if (this.config.getManagedAutoMemoryEnabled()) {
1060+
// A previous recall may still be pending (slow side-query, new user
1061+
// turn arrived before it settled). Abort it before installing the
1062+
// new handle so the orphan doesn't keep running indefinitely.
1063+
this.pendingMemoryPrefetch?.controller.abort();
1064+
this.pendingMemoryPrefetch = undefined;
10601065
const controller = new AbortController();
10611066
const promise = this.config
10621067
.getMemoryManager()
@@ -1274,24 +1279,6 @@ export class GeminiClient {
12741279
messageType === SendMessageType.Cron
12751280
) {
12761281
const systemReminders = [];
1277-
// Zero-wait poll: consume only if the prefetch has already settled.
1278-
// If not settled yet, skip — the ToolResult inject point will retry.
1279-
const prefetchHandle = this.pendingMemoryPrefetch;
1280-
if (
1281-
prefetchHandle &&
1282-
prefetchHandle.settledAt !== null &&
1283-
!prefetchHandle.consumed
1284-
) {
1285-
prefetchHandle.consumed = true;
1286-
this.pendingMemoryPrefetch = undefined;
1287-
const result = await prefetchHandle.promise; // already settled, returns immediately
1288-
if (result.prompt) {
1289-
systemReminders.push(result.prompt);
1290-
for (const doc of result.selectedDocs) {
1291-
this.surfacedRelevantAutoMemoryPaths.add(doc.filePath);
1292-
}
1293-
}
1294-
}
12951282

12961283
// add subagent system reminder if there are subagents
12971284
const hasAgentTool = await this.config
@@ -1326,6 +1313,27 @@ export class GeminiClient {
13261313
}
13271314
}
13281315

1316+
// Zero-wait poll: consume only if the prefetch has already settled.
1317+
// Done AFTER the async reminder setup above so recall settling during
1318+
// those awaits still gets caught here. If still not settled, skip —
1319+
// the ToolResult inject point will retry on the next turn.
1320+
const prefetchHandle = this.pendingMemoryPrefetch;
1321+
if (
1322+
prefetchHandle &&
1323+
prefetchHandle.settledAt !== null &&
1324+
!prefetchHandle.consumed
1325+
) {
1326+
prefetchHandle.consumed = true;
1327+
this.pendingMemoryPrefetch = undefined;
1328+
const result = await prefetchHandle.promise; // already settled, returns immediately
1329+
if (result.prompt) {
1330+
systemReminders.unshift(result.prompt);
1331+
for (const doc of result.selectedDocs) {
1332+
this.surfacedRelevantAutoMemoryPaths.add(doc.filePath);
1333+
}
1334+
}
1335+
}
1336+
13291337
requestToSend = [...systemReminders, ...requestToSend];
13301338
}
13311339

@@ -1340,7 +1348,14 @@ export class GeminiClient {
13401348
this.pendingMemoryPrefetch = undefined;
13411349
const result = await prefetchHandle.promise;
13421350
if (result.prompt) {
1343-
requestToSend = [result.prompt, ...requestToSend];
1351+
// Append (not prepend): on a ToolResult turn, requestToSend leads
1352+
// with functionResponse parts that must immediately follow the
1353+
// model's functionCall (Qwen API constraint, see lines 1209-1213).
1354+
// Putting the memory text after the functionResponse parts keeps
1355+
// the call/response pairing intact under native Gemini, while the
1356+
// OpenAI converter still emits the text as a separate user message
1357+
// after the tool messages.
1358+
requestToSend = [...requestToSend, result.prompt];
13441359
for (const doc of result.selectedDocs) {
13451360
this.surfacedRelevantAutoMemoryPaths.add(doc.filePath);
13461361
}

0 commit comments

Comments
 (0)