Vizards
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/consts.ts‎
Lines changed: 0 additions & 28 deletions b/‎src/consts.ts‎
Lines changed: 0 additions & 28 deletions
diff --git a/‎src/provider/diagnostics.ts‎
Lines changed: 54 additions & 38 deletions b/‎src/provider/diagnostics.ts‎
Lines changed: 54 additions & 38 deletions
diff --git a/‎src/provider/dump.ts‎
Lines changed: 24 additions & 4 deletions b/‎src/provider/dump.ts‎
Lines changed: 24 additions & 4 deletions
diff --git a/‎src/provider/request.ts‎
Lines changed: 13 additions & 4 deletions b/‎src/provider/request.ts‎
Lines changed: 13 additions & 4 deletions
@@ -148,7 +148,7 @@
         "deepseek-copilot.visionPrompt": {
           "type": "string",
           "editPresentation": "multilineText",
-          "default": "Describe the visual contents of this image in detail, including any text, objects, people, or context that would be relevant for understanding it. Focus on factual visual elements.",
+          "default": "Describe all image attachments in this message.\n\nIf there is one image, describe it directly.\nIf there are multiple images:\n1. Describe each image separately, preserving their order.\n2. Then provide a combined description explaining the overall context and relationships across the images.\n\nReturn one concise factual description suitable for inserting into a text-only chat prompt. Include visible text, objects, UI elements, people, and relevant context. Do not invent details.",
           "description": "%deepseek-copilot.config.visionPrompt.description%"
         },
         "deepseek-copilot.debugMode": {
 
@@ -26,34 +26,6 @@ export const WELCOME_SHOWN_KEY = 'deepseek-copilot.welcomeShown';
 /** Walkthrough contribution ID. */
 export const WALKTHROUGH_ID = 'Vizards.deepseek-v4-for-copilot#deepseekGettingStarted';
 
-// ---- Vision proxy ----
-
-/** Default model ID used for the vision proxy when auto-detection is enabled. */
-export const DEFAULT_VISION_MODEL_ID = 'oswe-vscode-prime';
-
-/**
- * Prompt sent to the vision proxy model when describing image attachments
- * before forwarding them to text-only DeepSeek models.
- */
-export const IMAGE_DESCRIPTION_PROMPT =
-	'Describe the visual contents of this image in detail, including any text, objects, people, or context that would be relevant for understanding it. Focus on factual visual elements.';
-
-/**
- * Stable fallback marker inserted into the chat prompt when the vision proxy
- * fails to describe an image. Keep this in English and out of i18n so prompt
- * shape and cache behaviour do not vary by VS Code display language.
- */
-export const IMAGE_DESCRIPTION_UNAVAILABLE = '[Image Description unavailable]';
-
-/**
- * Wrapper applied to vision model descriptions before they are inserted into
- * the chat prompt. The full format is: `[Image Description: <description>]`.
- * Keep these in English and out of i18n so cache keys and token estimates
- * stay stable regardless of VS Code display language.
- */
-export const IMAGE_DESCRIPTION_PREFIX = '[Image Description: ';
-export const IMAGE_DESCRIPTION_SUFFIX = ']';
-
 // ---- Reasoning cache ----
 
 /** Directory name under globalStorageUri for persisted DeepSeek reasoning_content. */
 
@@ -1,15 +1,12 @@
 import { createHash } from 'crypto';
 import vscode from 'vscode';
 import { getDebugLoggingEnabled } from '../config';
-import {
-	IMAGE_DESCRIPTION_UNAVAILABLE,
-	LANGUAGE_MODEL_CHAT_SYSTEM_ROLE,
-	REASONING_CACHE_TTL_MS,
-} from '../consts';
+import { LANGUAGE_MODEL_CHAT_SYSTEM_ROLE, REASONING_CACHE_TTL_MS } from '../consts';
 import { logger } from '../logger';
 import type { DeepSeekMessage, DeepSeekRequest, DeepSeekTool, DeepSeekUsage } from '../types';
 import type { ConversationSegment } from './segment';
-import type { VisionDescriptionCacheStats } from './vision/index';
+import { IMAGE_DESCRIPTION_UNAVAILABLE } from './vision/consts';
+import type { VisionResolutionStats as VisionPipelineStats } from './vision/index';
 
 const LARGE_MESSAGE_CHARS = 10_000;
 const HASH_WINDOW_CHARS = 2_048;
@@ -124,7 +121,7 @@ export interface BeginCacheDiagnosticsOptions {
 	inputMessages: readonly vscode.LanguageModelChatRequestMessage[];
 	resolvedMessages: readonly vscode.LanguageModelChatRequestMessage[];
 	visionModelId?: string;
-	visionCacheStats?: VisionDescriptionCacheStats;
+	visionStats?: VisionPipelineStats;
 }
 
 export interface CacheDiagnosticsDoneInfo {
@@ -149,6 +146,8 @@ export interface SegmentMarkerReportInfo {
 	segment: ConversationSegment;
 	status: SegmentMarkerReportStatus;
 	trigger?: SegmentMarkerReportTrigger;
+	markerBytes?: number;
+	visionTextChars?: number;
 	reason?: 'cancelled' | 'stream-error';
 	error?: unknown;
 }
@@ -183,7 +182,7 @@ export function createCacheDiagnosticsRecorder(): CacheDiagnosticsRecorder {
 	return new DefaultCacheDiagnosticsRecorder();
 }
 
-interface VisionResolutionStats {
+interface VisionMessageStats {
 	inputImageParts: number;
 	inputImageMessages: number;
 	describedImageMessages: number;
@@ -264,7 +263,7 @@ class DefaultCacheDiagnosticsRecorder implements CacheDiagnosticsRecorder {
 		for (const detailLine of formatCacheTraceDetailLines(cacheTrace)) {
 			logger.info(`[cache-trace #${requestId}] ${detailLine}`);
 		}
-		const visionTrace = formatVisionTrace(visionResolution, options.visionCacheStats);
+		const visionTrace = formatVisionTrace(visionResolution, options.visionStats);
 		if (visionTrace) {
 			logger.info(`[cache-trace #${requestId}] ${visionTrace}`);
 		}
@@ -404,13 +403,18 @@ function formatSegmentTrace(segment: ConversationSegment): string {
 
 function formatSegmentMarkerReport(info: SegmentMarkerReportInfo): string {
 	const trigger = info.trigger ? ` trigger=${info.trigger}` : '';
+	const markerBytes = info.markerBytes === undefined ? '' : ` markerBytes=${info.markerBytes}`;
+	const visionTextChars =
+		info.visionTextChars === undefined ? '' : ` visionTextChars=${info.visionTextChars}`;
 	const reason = info.reason ? ` reason=${info.reason}` : '';
 	const error = info.error ? ` error=${formatError(info.error)}` : '';
 	return (
 		`segmentMarker status=${info.status}` +
 		` segment=${info.segment.segmentId}` +
 		` segmentReason=${info.segment.reason}` +
 		trigger +
+		markerBytes +
+		visionTextChars +
 		reason +
 		error
 	);
@@ -518,8 +522,8 @@ function summarizeVisionResolution(
 	inputMessages: readonly vscode.LanguageModelChatRequestMessage[],
 	resolvedMessages: readonly vscode.LanguageModelChatRequestMessage[],
 	visionModelId: string | undefined,
-): VisionResolutionStats {
-	const stats: VisionResolutionStats = {
+): VisionMessageStats {
+	const stats: VisionMessageStats = {
 		inputImageParts: 0,
 		inputImageMessages: 0,
 		describedImageMessages: 0,
@@ -588,17 +592,21 @@ function getMessageText(message: vscode.LanguageModelChatRequestMessage): string
 }
 
 function formatVisionTrace(
-	stats: VisionResolutionStats,
-	cacheStats: VisionDescriptionCacheStats | undefined,
+	stats: VisionMessageStats,
+	pipelineStats: VisionPipelineStats | undefined,
 ): string | undefined {
-	if (stats.inputImageParts === 0 && stats.historyDescriptionMessages === 0) {
+	if (
+		stats.inputImageParts === 0 &&
+		stats.historyDescriptionMessages === 0 &&
+		!hasVisionPipelineActivity(pipelineStats)
+	) {
 		return undefined;
 	}
 
 	const note =
 		stats.inputImageParts === 0 && stats.historyDescriptionMessages > 0 ? ' note=history-only' : '';
 	const visionModel = formatVisionModel(stats);
-	const cacheTrace = formatVisionCacheStats(stats, cacheStats);
+	const pipelineTrace = formatVisionPipelineStats(pipelineStats);
 	return (
 		`vision rawImageParts=${stats.inputImageParts}` +
 		` rawImageMessages=${stats.inputImageMessages}` +
@@ -607,42 +615,50 @@ function formatVisionTrace(
 		` droppedImageParts=${stats.droppedImageParts}` +
 		` visionModel=${visionModel}` +
 		` historyDescriptionMessages=${stats.historyDescriptionMessages}` +
-		cacheTrace +
+		pipelineTrace +
 		note
 	);
 }
 
-function formatVisionCacheStats(
-	resolutionStats: VisionResolutionStats,
-	cacheStats: VisionDescriptionCacheStats | undefined,
-): string {
-	if (!cacheStats) {
-		return '';
+function hasVisionPipelineActivity(stats: VisionPipelineStats | undefined): boolean {
+	if (!stats) {
+		return false;
 	}
+	return (
+		stats.inputImageParts > 0 ||
+		stats.currentImageMessages > 0 ||
+		stats.generatedImageMessages > 0 ||
+		stats.replayedImageMessages > 0 ||
+		stats.omittedImageMessages > 0 ||
+		stats.unavailableImageMessages > 0 ||
+		stats.failedImageMessages > 0 ||
+		stats.invalidMarkerVisionMetadata > 0
+	);
+}
 
-	const hasCacheActivity =
-		cacheStats.hits > 0 ||
-		cacheStats.misses > 0 ||
-		cacheStats.deduplicatedDescriptions > 0 ||
-		cacheStats.generatedDescriptions > 0 ||
-		cacheStats.failedDescriptions > 0 ||
-		cacheStats.droppedImageParts > 0;
-	if (!hasCacheActivity && resolutionStats.inputImageParts === 0) {
+function formatVisionPipelineStats(stats: VisionPipelineStats | undefined): string {
+	if (!stats) {
+		return '';
+	}
+	if (!hasVisionPipelineActivity(stats)) {
 		return '';
 	}
 
 	return (
-		` cache(enabled=${cacheStats.enabled}` +
-		`,hits=${cacheStats.hits}` +
-		`,misses=${cacheStats.misses}` +
-		`,deduped=${cacheStats.deduplicatedDescriptions}` +
-		`,entries=${cacheStats.entries}` +
-		`,generated=${cacheStats.generatedDescriptions}` +
-		`,failed=${cacheStats.failedDescriptions})`
+		` markerReplay(inputImages=${stats.inputImageParts}` +
+		`,current=${stats.currentImageMessages}` +
+		`,generated=${stats.generatedImageMessages}` +
+		`,replayed=${stats.replayedImageMessages}` +
+		`,omitted=${stats.omittedImageMessages}` +
+		`,unavailable=${stats.unavailableImageMessages}` +
+		`,failed=${stats.failedImageMessages}` +
+		`,droppedParts=${stats.droppedImageParts}` +
+		`,markerChars=${stats.markerVisionTextChars}` +
+		`,invalidMarkerVision=${stats.invalidMarkerVisionMetadata})`
 	);
 }
 
-function formatVisionModel(stats: VisionResolutionStats): string {
+function formatVisionModel(stats: VisionMessageStats): string {
 	if (stats.visionModelId) {
 		return stats.visionModelId;
 	}
 
@@ -9,7 +9,7 @@ import { safeStringify, toWellFormedString } from '../json';
 import { logger } from '../logger';
 import type { DeepSeekMessage, DeepSeekRequest } from '../types';
 import { parseSegmentMarkerData, SEGMENT_MARKER_MIME, type ConversationSegment } from './segment';
-import type { VisionDescriptionCacheStats } from './vision/index';
+import type { VisionResolutionStats } from './vision/index';
 
 let dumpCounter = 0;
 let providerInputDumpCounter = 0;
@@ -84,7 +84,7 @@ export interface DumpDeepSeekRequestOptions {
 	resolvedMessages: readonly vscode.LanguageModelChatRequestMessage[];
 	requestOptions: vscode.ProvideLanguageModelChatResponseOptions;
 	visionModelId?: string;
-	visionCacheStats?: VisionDescriptionCacheStats;
+	visionStats?: VisionResolutionStats;
 }
 
 export interface DumpProviderInputOptions {
@@ -308,7 +308,7 @@ function createPipelineSnapshot(
 			stage === 'resolved'
 				? {
 						modelId: options.visionModelId ?? null,
-						stats: options.visionCacheStats ?? null,
+						stats: options.visionStats ?? null,
 					}
 				: undefined,
 		deepSeekPromptSummary: summarizeDeepSeekSystemPrompt(request.messages),
@@ -399,6 +399,8 @@ type SerializedContentPart =
 			segmentMarker?: {
 				valid: boolean;
 				segmentId?: string;
+				visionTextChars?: number;
+				visionTextIgnoredReason?: string;
 				error?: string;
 			};
 	  }
@@ -479,7 +481,9 @@ function serializeContentPart(part: unknown, index: number): SerializedContentPa
 
 	if (part instanceof vscode.LanguageModelDataPart) {
 		const segmentMarker =
-			part.mimeType === SEGMENT_MARKER_MIME ? parseSegmentMarkerData(part.data) : undefined;
+			part.mimeType === SEGMENT_MARKER_MIME
+				? summarizeSegmentMarker(parseSegmentMarkerData(part.data))
+				: undefined;
 		return {
 			index,
 			type: 'data',
@@ -503,6 +507,22 @@ function serializeContentPart(part: unknown, index: number): SerializedContentPa
 	};
 }
 
+function summarizeSegmentMarker(marker: ReturnType<typeof parseSegmentMarkerData>): {
+	valid: boolean;
+	segmentId?: string;
+	visionTextChars?: number;
+	visionTextIgnoredReason?: string;
+	error?: string;
+} {
+	return {
+		valid: marker.valid,
+		segmentId: marker.segmentId,
+		visionTextChars: marker.visionText?.length,
+		visionTextIgnoredReason: marker.visionTextIgnoredReason,
+		error: marker.error,
+	};
+}
+
 function serializeTools(
 	tools: readonly vscode.LanguageModelChatTool[] | undefined,
 ): object[] | undefined {
 
@@ -10,7 +10,7 @@ import { convertMessages, convertTools, countMessageChars } from './convert';
 import type { CacheDiagnosticsRecorder, CacheDiagnosticsRun } from './diagnostics';
 import { dumpDeepSeekRequest } from './dump';
 import { getConfiguredThinkingEffort, type ModelConfigurationOptions } from './models';
-import type { ConversationSegment } from './segment';
+import type { ConversationSegment, SegmentMarkerMetadata } from './segment';
 import { resolveImageMessages } from './vision/index';
 
 export interface PreparedChatRequest {
@@ -21,6 +21,8 @@ export interface PreparedChatRequest {
 	trailingToolResultIds: string[];
 	cacheDiagnostics: CacheDiagnosticsRun;
 	segment: ConversationSegment;
+	segmentMarkerMetadata: SegmentMarkerMetadata;
+	visionMarkerTextChars?: number;
 }
 
 export interface PrepareChatRequestOptions {
@@ -61,7 +63,12 @@ export async function prepareChatRequest({
 	const thinkingEffort = getConfiguredThinkingEffort(options as ModelConfigurationOptions);
 	const maxTokens = getMaxTokens();
 
-	const visionResolution = await resolveImageMessages(messages, token, getVisionModel);
+	const visionResolution = await resolveImageMessages(
+		messages,
+		token,
+		getVisionModel,
+		segment.segmentId,
+	);
 	const resolvedMessages = visionResolution.messages;
 	const deepseekMessages = convertMessages(resolvedMessages, isThinkingModel, reasoningLookup);
 	const tools = modelDef?.capabilities.toolCalling ? convertTools(options.tools) : undefined;
@@ -94,7 +101,7 @@ export async function prepareChatRequest({
 		resolvedMessages,
 		requestOptions: options,
 		visionModelId: visionResolution.visionModelId,
-		visionCacheStats: visionResolution.stats,
+		visionStats: visionResolution.stats,
 	});
 
 	const diagnosticsRun = cacheDiagnostics.beginRequest({
@@ -108,7 +115,7 @@ export async function prepareChatRequest({
 		inputMessages: messages,
 		resolvedMessages,
 		visionModelId: visionResolution.visionModelId,
-		visionCacheStats: visionResolution.stats,
+		visionStats: visionResolution.stats,
 	});
 
 	return {
@@ -119,6 +126,8 @@ export async function prepareChatRequest({
 		trailingToolResultIds: collectTrailingToolResultIds(deepseekMessages),
 		cacheDiagnostics: diagnosticsRun,
 		segment,
+		segmentMarkerMetadata: visionResolution.segmentMarkerMetadata,
+		visionMarkerTextChars: visionResolution.stats.markerVisionTextChars || undefined,
 	};
 }