Skip to content

Commit c8651a3

Browse files
ASRagabclaude
authored andcommitted
fix(coordinator): cache wait_for_signal_done timeouts via shared complete() path
`waitForSignalDone` had two terminal paths: the registered `wrapped` resolver (which cleared the timer, wrote the replay cache, and resolved) and the timeout callback (which removed the resolver, finished the wait, but \*did not\* write the replay cache before resolving). A client retry that reused the same requestId after a timeout would therefore re-enter the wait path and register a fresh resolver, inflating `activeSignalWaitCounts` and re-blocking the coordinator's notification batching. - Collapse both paths into a single idempotent `complete(result)` that clears the timer, removes itself from `anySignalResolvers`, calls `finishSignalWait`, writes the replay cache when `requestId` is set, and resolves the promise. The `settled` guard makes repeated calls a no-op so external callers can keep shifting the resolver out before invoking it. - Drop the now-redundant `finishSignalWait` calls in the three external paths that consume a resolver (PTY exit, cleanupChildTask, signalDone) — `complete` handles bookkeeping itself, so leaving the manual call in place would double-decrement the wait count. - In `unregisterCoordinator`, snapshot the resolvers before fan-out because `complete` now splices itself from the live array. - Add a coordinator.test.ts case: same requestId after a timeout returns the cached timed-out result quickly instead of starting a new live wait. Addresses finding 1 of issue #157. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent ac031a7 commit c8651a3

2 files changed

Lines changed: 45 additions & 14 deletions

File tree

electron/mcp/coordinator.test.ts

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5525,6 +5525,24 @@ describe('Coordinator waitForSignalDone — requestId replay after transport fai
55255525
expect(result.taskId).toBe('task-1');
55265526
});
55275527

5528+
it('same requestId returns cached timed-out result after a timeout (no second live wait)', async () => {
5529+
await coordinator.createTask({ name: 'test', prompt: 'do', coordinatorTaskId: 'coord-1' });
5530+
5531+
const requestId = 'replay-timeout-id';
5532+
// First call times out — no unconsumed signal, short timeout.
5533+
const result1 = await coordinator.waitForSignalDone('coord-1', 25, requestId);
5534+
expect(result1.timedOut).toBe(true);
5535+
5536+
// Retry with the same requestId must replay the cached timed-out result
5537+
// instead of registering a fresh waiter — otherwise a stale HTTP retry
5538+
// would inflate activeSignalWaitCounts and re-block the coordinator.
5539+
const beforeRetry = Date.now();
5540+
const result2 = await coordinator.waitForSignalDone('coord-1', 10_000, requestId);
5541+
const elapsed = Date.now() - beforeRetry;
5542+
expect(result2).toEqual(result1);
5543+
expect(elapsed).toBeLessThan(100);
5544+
});
5545+
55285546
it('cached result for coord-A does not replay for coord-B with the same requestId', async () => {
55295547
coordinator.registerCoordinator('coord-2', 'proj-1');
55305548

electron/mcp/coordinator.ts

Lines changed: 27 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -229,14 +229,15 @@ export class Coordinator {
229229
this.suppressPendingNotificationForTask(task);
230230
task.reviewNotificationQueued = true;
231231
const remaining = this.countRemaining(coordinatorId);
232+
// The resolver IS `complete` from waitForSignalDone — it handles
233+
// finishSignalWait, replay-cache write, and timer cleanup itself.
232234
firstAnyResolver({
233235
taskId: task.id,
234236
name: task.name,
235237
status: 'exited',
236238
signalDoneAt: new Date().toISOString(),
237239
remaining,
238240
});
239-
this.finishSignalWait(coordinatorId);
240241
}
241242
this.maybeQueueReviewNotification(task, 'exited', exitCode ?? null);
242243
break;
@@ -1926,14 +1927,14 @@ export class Coordinator {
19261927
this.suppressPendingNotificationForTask(task);
19271928
task.reviewNotificationQueued = true;
19281929
const remaining = this.countRemaining(coordinatorId);
1930+
// Resolver `complete` from waitForSignalDone handles finishSignalWait.
19291931
firstAnyResolver({
19301932
taskId: task.id,
19311933
name: task.name,
19321934
status: 'exited',
19331935
signalDoneAt: new Date().toISOString(),
19341936
remaining,
19351937
});
1936-
this.finishSignalWait(coordinatorId);
19371938
}
19381939
this.clearAgentBuffers(task.agentId);
19391940
// Delete per-task MCP config tmp file
@@ -2227,7 +2228,11 @@ export class Coordinator {
22272228
signalDoneAt: new Date().toISOString(),
22282229
remaining: 0,
22292230
};
2230-
for (const resolve of anyResolvers) resolve(syntheticResult);
2231+
// Snapshot first — each resolver is `complete` from waitForSignalDone,
2232+
// which splices itself out of the array, and mutating during iteration
2233+
// would skip waiters.
2234+
const snapshot = [...anyResolvers];
2235+
for (const resolve of snapshot) resolve(syntheticResult);
22312236
}
22322237
this.anySignalResolvers.delete(coordinatorTaskId);
22332238
this.activeSignalWaitCounts.delete(coordinatorTaskId);
@@ -2376,14 +2381,14 @@ export class Coordinator {
23762381
// Suppress before finishSignalWait so it doesn't re-stage
23772382
this.suppressPendingNotificationForTask(task);
23782383
const remaining = this.countRemaining(coordinatorId);
2384+
// Resolver `complete` from waitForSignalDone handles finishSignalWait.
23792385
firstAnyResolver({
23802386
taskId,
23812387
name: task.name,
23822388
status: task.status,
23832389
signalDoneAt: (task.signalDoneAt ?? new Date()).toISOString(),
23842390
remaining,
23852391
});
2386-
this.finishSignalWait(coordinatorId);
23872392
// Tell renderer — coordinator already gets result via MCP return value, no UI notification needed
23882393
this.notifyRenderer(IPC.MCP_TaskStateSync, {
23892394
taskId,
@@ -2529,36 +2534,44 @@ export class Coordinator {
25292534

25302535
return new Promise((resolve) => {
25312536
const timerRef = { value: undefined as ReturnType<typeof setTimeout> | undefined };
2532-
2533-
const wrapped = (result: WaitForSignalDoneResult) => {
2537+
let settled = false;
2538+
2539+
// Single termination path: timer cleanup, resolver removal, active-wait
2540+
// bookkeeping, replay-cache write, and promise resolution all happen here
2541+
// regardless of whether the result came from a signal, an exit, a
2542+
// coordinator close, or the timeout. Idempotent — repeated calls are a
2543+
// no-op so external callers can shift the resolver out before invoking.
2544+
const complete = (result: WaitForSignalDoneResult) => {
2545+
if (settled) return;
2546+
settled = true;
25342547
if (timerRef.value !== undefined) clearTimeout(timerRef.value);
2535-
if (requestId) this.recentlyDelivered.set(coordinatorTaskId, requestId, result);
2536-
resolve(result);
2537-
};
2538-
2539-
timerRef.value = setTimeout(() => {
25402548
const resolvers = this.anySignalResolvers.get(coordinatorTaskId);
25412549
if (resolvers) {
2542-
const idx = resolvers.indexOf(wrapped);
2550+
const idx = resolvers.indexOf(complete);
25432551
if (idx >= 0) resolvers.splice(idx, 1);
25442552
}
25452553
this.finishSignalWait(coordinatorTaskId);
2554+
if (requestId) this.recentlyDelivered.set(coordinatorTaskId, requestId, result);
2555+
resolve(result);
2556+
};
2557+
2558+
timerRef.value = setTimeout(() => {
25462559
logWarn('coordinator.signal_wait', `wait_for_signal_done timed out after ${timeoutMs}ms`, {
25472560
coordinatorTaskId,
25482561
reason: 'timeout',
25492562
timeoutMs,
25502563
activeWaitCount: this.activeSignalWaitCounts.get(coordinatorTaskId) ?? 0,
25512564
});
25522565
const remaining = this.countRemaining(coordinatorTaskId);
2553-
resolve({ remaining, timedOut: true });
2566+
complete({ remaining, timedOut: true });
25542567
}, timeoutMs);
25552568

25562569
let resolvers = this.anySignalResolvers.get(coordinatorTaskId);
25572570
if (!resolvers) {
25582571
resolvers = [];
25592572
this.anySignalResolvers.set(coordinatorTaskId, resolvers);
25602573
}
2561-
resolvers.push(wrapped);
2574+
resolvers.push(complete);
25622575
});
25632576
}
25642577

0 commit comments

Comments
 (0)