Skip to content

Commit da432fa

Browse files
authored
fix: cancel in-flight generation when starting a new session (#289)
Signed-off-by: Logan Nguyen <lg.131.dev@gmail.com>
1 parent afeaab5 commit da432fa

2 files changed

Lines changed: 123 additions & 20 deletions

File tree

src/hooks/__tests__/useModel.test.tsx

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,51 @@ describe('useModel', () => {
901901
expect.objectContaining({ reason: 'user_reset' }),
902902
);
903903
});
904+
905+
it('cancels the in-flight backend generation when starting a new session', async () => {
906+
// Stall ask_model so the generation stays active while we reset.
907+
let resolveInvoke!: () => void;
908+
invoke.mockImplementationOnce(
909+
async (_cmd: string, args?: Record<string, unknown>) => {
910+
if (args && 'onEvent' in args) {
911+
return new Promise<void>((res) => {
912+
resolveInvoke = res;
913+
});
914+
}
915+
},
916+
);
917+
918+
const { result } = renderHook(() => useModel(''));
919+
920+
act(() => {
921+
void result.current.ask('hello');
922+
});
923+
expect(result.current.isGenerating).toBe(true);
924+
925+
await act(async () => {
926+
result.current.reset();
927+
await Promise.resolve();
928+
});
929+
930+
// A new session must stop the backend stream, not just the frontend
931+
// view - otherwise the old generation holds the engine's single slot
932+
// and the next turn queues behind it.
933+
expect(invoke).toHaveBeenCalledWith('cancel_generation');
934+
935+
act(() => {
936+
resolveInvoke?.();
937+
});
938+
});
939+
940+
it('does not call cancel_generation when reset runs with no active generation', () => {
941+
const { result } = renderHook(() => useModel(''));
942+
943+
act(() => {
944+
result.current.reset();
945+
});
946+
947+
expect(invoke).not.toHaveBeenCalledWith('cancel_generation');
948+
});
904949
});
905950

906951
// ─── onTurnComplete callback ─────────────────────────────────────────────────
@@ -1125,6 +1170,40 @@ describe('useModel', () => {
11251170
expect.objectContaining({ reason: 'history_load' }),
11261171
);
11271172
});
1173+
1174+
it('cancels the in-flight backend generation when loading another conversation', async () => {
1175+
// Stall ask_model so the generation stays active while we load.
1176+
let resolveInvoke!: () => void;
1177+
invoke.mockImplementationOnce(
1178+
async (_cmd: string, args?: Record<string, unknown>) => {
1179+
if (args && 'onEvent' in args) {
1180+
return new Promise<void>((res) => {
1181+
resolveInvoke = res;
1182+
});
1183+
}
1184+
},
1185+
);
1186+
1187+
const { result } = renderHook(() => useModel(''));
1188+
1189+
act(() => {
1190+
void result.current.ask('original');
1191+
});
1192+
expect(result.current.isGenerating).toBe(true);
1193+
1194+
await act(async () => {
1195+
result.current.loadMessages([
1196+
{ id: 'l1', role: 'user', content: 'loaded' },
1197+
]);
1198+
await Promise.resolve();
1199+
});
1200+
1201+
expect(invoke).toHaveBeenCalledWith('cancel_generation');
1202+
1203+
act(() => {
1204+
resolveInvoke?.();
1205+
});
1206+
});
11281207
});
11291208

11301209
// ─── ThinkingToken handling ──────────────────────────────────────────────────

src/hooks/useModel.ts

Lines changed: 44 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,30 @@ export function useModel(
255255
return true;
256256
}, []);
257257

258+
/**
259+
* Signals the backend to stop the active generation and tracks the
260+
* in-flight cancel in `pendingCancelRef` so the next `ask()` /
261+
* `askSearch()` awaits the round-trip before starting a new turn. That
262+
* wait is what stops a fresh request from racing the outgoing one onto
263+
* the engine's single decode slot. Idempotent while a cancel is already
264+
* pending, so overlapping callers (double cancel, cancel-then-reset) only
265+
* fire `cancel_generation` once. Returns the pending-cancel promise.
266+
*/
267+
const requestBackendCancel = useCallback((): Promise<void> => {
268+
if (!pendingCancelRef.current) {
269+
pendingCancelRef.current = (async () => {
270+
try {
271+
await invoke('cancel_generation');
272+
} catch {
273+
// Local hard-abort already reset the UI; backend best-effort only.
274+
} finally {
275+
pendingCancelRef.current = null;
276+
}
277+
})();
278+
}
279+
return pendingCancelRef.current;
280+
}, []);
281+
258282
/**
259283
* Submits a message to the Ollama backend and starts the streaming response.
260284
*
@@ -717,22 +741,8 @@ export function useModel(
717741
}
718742

719743
abortActiveGeneration();
720-
721-
if (!pendingCancelRef.current) {
722-
const cancelPromise = (async () => {
723-
try {
724-
await invoke('cancel_generation');
725-
} catch {
726-
// Local hard-abort already reset the UI; backend best-effort only.
727-
} finally {
728-
pendingCancelRef.current = null;
729-
}
730-
})();
731-
pendingCancelRef.current = cancelPromise;
732-
}
733-
734-
await pendingCancelRef.current;
735-
}, [abortActiveGeneration, isGenerating]);
744+
await requestBackendCancel();
745+
}, [abortActiveGeneration, isGenerating, requestBackendCancel]);
736746

737747
/** Resets all conversation state for a fresh session.
738748
*
@@ -746,7 +756,15 @@ export function useModel(
746756
* user-visible reset.
747757
*/
748758
const reset = useCallback(() => {
749-
abortActiveGeneration();
759+
const hadActiveGeneration = abortActiveGeneration();
760+
// Starting a fresh session must also stop any in-flight backend stream,
761+
// not just the frontend view abort above. Otherwise the outgoing
762+
// generation keeps the engine's single decode slot and the next turn
763+
// queues behind it. Routed through the same `pendingCancelRef` plumbing
764+
// `cancel()` uses so the next `ask()` awaits the cancel round-trip.
765+
if (hadActiveGeneration) {
766+
void requestBackendCancel();
767+
}
750768
setMessages([]);
751769
const outgoingId = traceConversationIdRef.current;
752770
if (outgoingId !== null && !isFirstTurnRef.current) {
@@ -758,7 +776,7 @@ export function useModel(
758776
traceConversationIdRef.current = null;
759777
isFirstTurnRef.current = true;
760778
void invoke('reset_conversation');
761-
}, [abortActiveGeneration]);
779+
}, [abortActiveGeneration, requestBackendCancel]);
762780

763781
/** Replaces the current message list with a previously loaded set of messages.
764782
*
@@ -770,7 +788,13 @@ export function useModel(
770788
*/
771789
const loadMessages = useCallback(
772790
(msgs: Message[]) => {
773-
abortActiveGeneration();
791+
const hadActiveGeneration = abortActiveGeneration();
792+
// Loading another conversation is a session boundary too: stop the
793+
// in-flight backend stream so it does not hold the engine slot and
794+
// stall the loaded conversation's next turn. Same plumbing as reset().
795+
if (hadActiveGeneration) {
796+
void requestBackendCancel();
797+
}
774798
const outgoingId = traceConversationIdRef.current;
775799
if (outgoingId !== null && !isFirstTurnRef.current) {
776800
void invoke('record_conversation_end', {
@@ -782,7 +806,7 @@ export function useModel(
782806
isFirstTurnRef.current = true;
783807
setMessages(msgs);
784808
},
785-
[abortActiveGeneration],
809+
[abortActiveGeneration, requestBackendCancel],
786810
);
787811

788812
/**

0 commit comments

Comments
 (0)