Vizards
diff --git a/‎src/client.ts‎
Lines changed: 9 additions & 4 deletions b/‎src/client.ts‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎src/provider/cache.ts‎
Lines changed: 14 additions & 4 deletions b/‎src/provider/cache.ts‎
Lines changed: 14 additions & 4 deletions
diff --git a/‎src/provider/convert.ts‎
Lines changed: 24 additions & 9 deletions b/‎src/provider/convert.ts‎
Lines changed: 24 additions & 9 deletions
@@ -27,10 +27,12 @@ export class DeepSeekClient {
 		cancellationToken?: CancellationToken,
 	): Promise<void> {
 		const controller = new AbortController();
-
 		const cancelListener = cancellationToken?.onCancellationRequested(() => {
 			controller.abort();
 		});
+		if (cancellationToken?.isCancellationRequested) {
+			controller.abort();
+		}
 
 		try {
 			// Request usage stats in streaming responses so we can calibrate token counting.
@@ -75,7 +77,7 @@ export class DeepSeekClient {
 			while (true) {
 				if (cancellationToken?.isCancellationRequested) {
 					controller.abort();
-					break;
+					return;
 				}
 
 				const { done, value } = await reader.read();
@@ -172,8 +174,7 @@ export class DeepSeekClient {
 
 			callbacks.onDone();
 		} catch (error) {
-			if (error instanceof Error && error.name === 'AbortError') {
-				callbacks.onDone();
+			if (isAbortError(error) && cancellationToken?.isCancellationRequested) {
 				return;
 			}
 			callbacks.onError(error instanceof Error ? error : new Error(String(error)));
@@ -182,3 +183,7 @@ export class DeepSeekClient {
 		}
 	}
 }
+
+function isAbortError(error: unknown): boolean {
+	return error instanceof Error && error.name === 'AbortError';
+}
@@ -5,17 +5,27 @@ import { MAX_CACHE_SIZE } from '../consts';
  * can inject reasoning_content back into prior assistant messages.
  *
  * Key strategy (per DeepSeek docs):
- *  - Non-tool-call turns: reasoning_content does NOT need to be passed back.
- *  - Tool-call turns: reasoning_content MUST be in ALL subsequent requests.
+ *  - Plain non-tool turns: reasoning_content does NOT need to be passed back.
+ *  - Tool-call turns and their post-tool final turns: reasoning_content MUST be
+ *    in ALL subsequent requests.
  *
- * We cache by tool_call IDs so we can look up which reasoning goes with which
- * tool-call-bearing assistant message when reconstructing the message history.
+ * We cache by stable history keys so we can look up which reasoning goes with
+ * tool-call-bearing assistant messages and final post-tool assistant messages
+ * when reconstructing the message history.
  */
 export interface ReasoningEntry {
 	text: string;
 	timestamp: number;
 }
 
+export function createToolReasoningKey(toolCallId: string): string {
+	return `tool:${toolCallId}`;
+}
+
+export function createPostToolReasoningKey(toolCallIds: readonly string[]): string {
+	return `post-tool:${JSON.stringify(toolCallIds)}`;
+}
+
 export function pruneReasoningCache(cache: Map<string, ReasoningEntry>, clearAll: boolean): void {
 	if (clearAll) {
 		cache.clear();
 
@@ -1,18 +1,19 @@
 import vscode from 'vscode';
 import type { DeepSeekMessage, DeepSeekTool, DeepSeekToolCall } from '../types';
-import type { ReasoningEntry } from './cache';
+import { createPostToolReasoningKey, createToolReasoningKey, type ReasoningEntry } from './cache';
 
 /**
  * Convert VS Code chat messages to DeepSeek format.
- * Injects cached reasoning_content for assistant messages that had tool calls
- * in prior turns.
+ * Injects cached reasoning_content for assistant tool-call messages and final
+ * assistant messages after tool results.
  */
 export function convertMessages(
 	messages: readonly vscode.LanguageModelChatRequestMessage[],
 	isThinkingModel: boolean,
 	reasoningCache: Map<string, ReasoningEntry>,
 ): DeepSeekMessage[] {
 	const result: DeepSeekMessage[] = [];
+	let recentToolResultIds: string[] = [];
 
 	for (const message of messages) {
 		const role = mapRole(message.role);
@@ -53,12 +54,19 @@ export function convertMessages(
 			let reasoningContent: string | undefined;
 			if (isThinkingModel && toolCalls.length > 0) {
 				for (const tc of toolCalls) {
-					const cached = reasoningCache.get(tc.id);
+					// Prefer new `tool:<callId>` key; fallback to bare `callId` for entries written
+					// before the stable-key change (read-only compat, no new bare-key writes).
+					const cached =
+						reasoningCache.get(createToolReasoningKey(tc.id)) ?? reasoningCache.get(tc.id);
 					if (cached) {
 						reasoningContent = cached.text;
 						break;
 					}
 				}
+			} else if (isThinkingModel && recentToolResultIds.length > 0) {
+				reasoningContent = reasoningCache.get(
+					createPostToolReasoningKey(recentToolResultIds),
+				)?.text;
 			}
 
 			if (content || toolCalls.length > 0) {
@@ -76,12 +84,18 @@ export function convertMessages(
 				}
 
 				result.push(msg);
+				recentToolResultIds = [];
+			}
+		} else {
+			if (content) {
+				recentToolResultIds = [];
+				result.push({
+					role: role as 'user' | 'assistant',
+					content: content,
+				});
+			} else if (toolResults.length === 0) {
+				recentToolResultIds = [];
 			}
-		} else if (content) {
-			result.push({
-				role: role as 'user' | 'assistant',
-				content: content,
-			});
 		}
 
 		// Tool result messages follow their associated assistant message
@@ -91,6 +105,7 @@ export function convertMessages(
 				content: tr.content,
 				tool_call_id: tr.callId,
 			});
+			recentToolResultIds.push(tr.callId);
 		}
 	}