Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,41 @@ export function isImageDataPart(part: unknown): part is LanguageModelDataPart {
return false;
}

/**
* Detect the actual MIME type by inspecting the file's magic bytes,
* since the declared mimeType (based on file extension) may be wrong.
*/
export function detectImageMimeType(data: Uint8Array): ChatImageMimeType | undefined {
if (data.length < 4) {
return undefined;
}

// JPEG: FF D8 FF
if (data[0] === 0xFF && data[1] === 0xD8 && data[2] === 0xFF) {
return ChatImageMimeType.JPEG;
}
// PNG: 89 50 4E 47
if (data[0] === 0x89 && data[1] === 0x50 && data[2] === 0x4E && data[3] === 0x47) {
return ChatImageMimeType.PNG;
}
// GIF: 47 49 46 38
if (data[0] === 0x47 && data[1] === 0x49 && data[2] === 0x46 && data[3] === 0x38) {
return ChatImageMimeType.GIF;
}
// WebP: RIFF....WEBP
if (data.length >= 12 &&
data[0] === 0x52 && data[1] === 0x49 && data[2] === 0x46 && data[3] === 0x46 &&
data[8] === 0x57 && data[9] === 0x45 && data[10] === 0x42 && data[11] === 0x50) {
return ChatImageMimeType.WEBP;
}
// BMP: 42 4D
if (data[0] === 0x42 && data[1] === 0x4D) {
return ChatImageMimeType.BMP;
}

return undefined;
}

function isChatImageMimeType(mimeType: string): mimeType is ChatImageMimeType {
switch (mimeType) {
case ChatImageMimeType.JPEG:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

import { describe, expect, test } from 'vitest';
import { ChatImageMimeType, detectImageMimeType } from '../languageModelChatMessageHelpers';

describe('detectImageMimeType', () => {
test('detects JPEG from magic bytes', () => {
const data = new Uint8Array([0xFF, 0xD8, 0xFF, 0xE0, 0x00]);
expect(detectImageMimeType(data)).toBe(ChatImageMimeType.JPEG);
});

test('detects PNG from magic bytes', () => {
const data = new Uint8Array([0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]);
expect(detectImageMimeType(data)).toBe(ChatImageMimeType.PNG);
});

test('detects GIF from magic bytes', () => {
const data = new Uint8Array([0x47, 0x49, 0x46, 0x38, 0x39, 0x61]);
expect(detectImageMimeType(data)).toBe(ChatImageMimeType.GIF);
});

test('detects WebP from magic bytes', () => {
// RIFF....WEBP
const data = new Uint8Array([0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, 0x45, 0x42, 0x50]);
expect(detectImageMimeType(data)).toBe(ChatImageMimeType.WEBP);
});

test('detects BMP from magic bytes', () => {
const data = new Uint8Array([0x42, 0x4D, 0x00, 0x00]);
expect(detectImageMimeType(data)).toBe(ChatImageMimeType.BMP);
});

test('returns undefined for unknown format', () => {
const data = new Uint8Array([0x00, 0x01, 0x02, 0x03]);
expect(detectImageMimeType(data)).toBeUndefined();
});

test('returns undefined for data shorter than 4 bytes', () => {
const data = new Uint8Array([0xFF, 0xD8]);
expect(detectImageMimeType(data)).toBeUndefined();
});

test('returns undefined for empty data', () => {
const data = new Uint8Array(0);
expect(detectImageMimeType(data)).toBeUndefined();
});

test('correctly identifies JPEG when file extension might suggest PNG', () => {
// This is the actual bug scenario: file named .png but content is JPEG
const jpegData = new Uint8Array([0xFF, 0xD8, 0xFF, 0xE1, 0x00, 0x10]);
expect(detectImageMimeType(jpegData)).toBe(ChatImageMimeType.JPEG);
});
});
25 changes: 20 additions & 5 deletions src/extension/prompts/node/panel/toolCalling.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import { URI, UriComponents } from '../../../../util/vs/base/common/uri';
import { IInstantiationService, ServicesAccessor } from '../../../../util/vs/platform/instantiation/common/instantiation';
import { ServiceCollection } from '../../../../util/vs/platform/instantiation/common/serviceCollection';
import { LanguageModelDataPart, LanguageModelDataPart2, LanguageModelPartAudience, LanguageModelPromptTsxPart, LanguageModelTextPart, LanguageModelTextPart2, LanguageModelToolMCPSource, LanguageModelToolResult } from '../../../../vscodeTypes';
import { isImageDataPart } from '../../../conversation/common/languageModelChatMessageHelpers';
import { detectImageMimeType, isImageDataPart } from '../../../conversation/common/languageModelChatMessageHelpers';
import { IResultMetadata } from '../../../prompt/common/conversation';
import { IBuildPromptContext, IToolCall, IToolCallRound } from '../../../prompt/common/intents';
import { toJsonSchema } from '../../../tools/common/toJsonSchema';
Expand Down Expand Up @@ -456,7 +456,7 @@ enum ToolInvocationOutcome {
export async function imageDataPartToTSX(part: LanguageModelDataPart, githubToken?: string, urlOrRequestMetadata?: string | RequestMetadata, logService?: ILogService, imageService?: IImageService) {
if (isImageDataPart(part)) {
let imageData: Uint8Array = part.data;
let mimeType = part.mimeType;
let mimeType = detectImageMimeType(part.data) ?? part.mimeType;

if (imageService) {
try {
Expand All @@ -473,7 +473,7 @@ export async function imageDataPartToTSX(part: LanguageModelDataPart, githubToke
const isChatRequest = typeof urlOrRequestMetadata !== 'string' && (urlOrRequestMetadata?.type === RequestType.ChatCompletions || urlOrRequestMetadata?.type === RequestType.ChatMessages);
if (githubToken && isChatRequest && imageService) {
try {
const uri = await imageService.uploadChatImageAttachment(imageData, 'tool-result-image', mimeType ?? 'image/png', githubToken);
const uri = await imageService.uploadChatImageAttachment(imageData, 'tool-result-image', mimeType ?? 'image/png', githubToken);
if (uri) {
imageSource = uri.toString();
}
Expand Down Expand Up @@ -638,6 +638,12 @@ class PrimitiveToolResult<T extends IPrimitiveToolResultProps> extends PromptEle
*/
private imageSizeBudgetLeft = (5 * 1024 * 1024) / 2; // 5MB

/**
* Track total image count to stay within model limits (e.g. Gemini's max_prompt_images).
* Reserve some budget for user-attached images by using half the model's limit.
*/
private imageCountBudgetLeft: number;

constructor(
props: T,
@IPromptEndpoint protected readonly endpoint: IPromptEndpoint,
Expand All @@ -649,6 +655,9 @@ class PrimitiveToolResult<T extends IPrimitiveToolResultProps> extends PromptEle
) {
super(props);
this.linkedResources = this.props.content.filter((c): c is LanguageModelDataPart => c instanceof LanguageModelDataPart && c.mimeType === McpLinkedResourceToolResult.mimeType);
this.imageCountBudgetLeft = endpoint?.maxPromptImages !== undefined
? Math.max(1, Math.floor(endpoint.maxPromptImages / 2))
: Infinity;
}

async render(): Promise<PromptPiece | undefined> {
Expand Down Expand Up @@ -696,13 +705,19 @@ class PrimitiveToolResult<T extends IPrimitiveToolResultProps> extends PromptEle
return '[Image content is not available because vision is not supported by the current model or is disabled by your organization.]';
}

// Check image count budget first
if (this.imageCountBudgetLeft <= 0) {
return '';
}
this.imageCountBudgetLeft--;

const githubToken = (await this.authService.getGitHubSession('any', { silent: true }))?.accessToken;
const uploadsEnabled = this.configurationService && this.experimentationService
? this.configurationService.getExperimentBasedConfig(ConfigKey.EnableChatImageUpload, this.experimentationService)
: false;

// Anthropic (from CAPI) currently does not support image uploads from tool calls.
const uploadToken = uploadsEnabled && modelCanUseMcpResultImageURL(this.endpoint) ? githubToken : undefined;
const uploadToken = uploadsEnabled && this.endpoint && modelCanUseMcpResultImageURL(this.endpoint) ? githubToken : undefined;

if (!uploadToken) {
if (this.imageSizeBudgetLeft < 0) {
Expand All @@ -715,7 +730,7 @@ class PrimitiveToolResult<T extends IPrimitiveToolResultProps> extends PromptEle
}
}

return Promise.resolve(imageDataPartToTSX(part, uploadToken, this.endpoint.urlOrRequestMetadata, this.logService, this.imageService));
return Promise.resolve(imageDataPartToTSX(part, uploadToken, this.endpoint?.urlOrRequestMetadata, this.logService, this.imageService));
}

protected onTSX(part: JSONTree.PromptElementJSON) {
Expand Down