Skip to content

Commit 9ca4508

Browse files
authored
cherry pick 0.37 fixes (#4265)
* cherry pick 0.37 fixes * fix merge conflict in test imports
1 parent da24bf4 commit 9ca4508

5 files changed

Lines changed: 266 additions & 16 deletions

File tree

src/extension/tools/common/toolUtils.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,39 @@ export function formatUriForFileWidget(uriOrLocation: URI | Location, metadata?:
2424

2525
return `[](${uri.toString()}${rangePart})`;
2626
}
27+
28+
/**
29+
* Encodes the URL hostname using punycode for security purposes.
30+
* This helps prevent homograph attacks by converting internationalized domain names to ASCII.
31+
* @param url The URL to encode
32+
* @returns An object containing the encoded URL and whether it was different from the original
33+
*/
34+
export function encodeUrlHostname(url: string): { encoded: string; isDifferent: boolean } {
35+
if (!URL.canParse(url)) {
36+
return { encoded: url, isDifferent: false };
37+
}
38+
39+
// The URL constructor automatically encodes Unicode hostnames to punycode
40+
const urlObj = new URL(url);
41+
let encodedUrl = urlObj.href;
42+
43+
// URL constructor adds trailing slash or slash before query/hash when original doesn't have path
44+
// e.g., "https://example.com?foo" becomes "https://example.com/?foo"
45+
const hasNoPath = !url.includes('/', url.indexOf('://') + 3);
46+
if (hasNoPath) {
47+
// Remove slash before query or hash that was added by URL constructor
48+
encodedUrl = encodedUrl.replace(/\/(\?|#)/, '$1');
49+
// Remove trailing slash if added at end
50+
if (encodedUrl.endsWith('/') && !url.endsWith('/')) {
51+
encodedUrl = encodedUrl.slice(0, -1);
52+
}
53+
}
54+
55+
// Check if the URL was changed (hostname was encoded)
56+
const isDifferent = url !== encodedUrl;
57+
58+
return {
59+
encoded: encodedUrl,
60+
isDifferent
61+
};
62+
}

src/extension/tools/node/applyPatch/parser.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,26 @@ export function text_to_patch(
791791
return [parser.patch, parser.fuzz];
792792
}
793793

794+
export function identify_files_affected(text: string): Array<string> {
795+
const lines = text.trim().split('\n');
796+
const result = new Set<string>();
797+
for (const line of lines) {
798+
if (line.startsWith(UPDATE_FILE_PREFIX)) {
799+
result.add(line.slice(UPDATE_FILE_PREFIX.length));
800+
} else if (line.startsWith(DELETE_FILE_PREFIX)) {
801+
result.add(line.slice(DELETE_FILE_PREFIX.length));
802+
} else if (line.startsWith(MOVE_FILE_TO_PREFIX)) {
803+
result.add(line.slice(MOVE_FILE_TO_PREFIX.length));
804+
} else if (line.startsWith(DELETE_FILE_PREFIX)) {
805+
result.add(line.slice(DELETE_FILE_PREFIX.length));
806+
} else if (line.startsWith(ADD_FILE_PREFIX)) {
807+
result.add(line.slice(ADD_FILE_PREFIX.length));
808+
}
809+
}
810+
811+
return [...result];
812+
}
813+
794814
export function identify_files_needed(text: string): Array<string> {
795815
const lines = text.trim().split('\n');
796816
const result = new Set<string>();

src/extension/tools/node/applyPatchTool.tsx

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ import { ICopilotTool, ToolRegistry } from '../common/toolsRegistry';
4646
import { IToolsService } from '../common/toolsService';
4747
import { formatUriForFileWidget } from '../common/toolUtils';
4848
import { PATCH_PREFIX, PATCH_SUFFIX } from './applyPatch/parseApplyPatch';
49-
import { ActionType, Commit, DiffError, FileChange, identify_files_added, identify_files_needed, InvalidContextError, InvalidPatchFormatError, processPatch } from './applyPatch/parser';
49+
import { ActionType, Commit, DiffError, FileChange, identify_files_added, identify_files_affected, identify_files_needed, InvalidContextError, InvalidPatchFormatError, processPatch } from './applyPatch/parser';
5050
import { EditFileResult, IEditedFile } from './editFileToolResult';
5151
import { canExistingFileBeEdited, createEditConfirmation, formatDiffAsUnified, getDisallowedEditUriError, logEditToolResult, openDocumentAndSnapshot } from './editFileToolUtils';
5252
import { sendEditNotebookTelemetry } from './editNotebookTool';
@@ -652,7 +652,7 @@ export class ApplyPatchTool implements ICopilotTool<IApplyPatchToolParams> {
652652
}
653653

654654
async prepareInvocation(options: vscode.LanguageModelToolInvocationPrepareOptions<IApplyPatchToolParams>, token: vscode.CancellationToken): Promise<vscode.PreparedToolInvocation> {
655-
const uris = [...identify_files_needed(options.input.input), ...identify_files_added(options.input.input)].map(f => URI.file(f));
655+
const uris = [...identify_files_affected(options.input.input)].map(f => URI.file(f));
656656

657657
return this.instantiationService.invokeFunction(
658658
createEditConfirmation,
@@ -696,16 +696,27 @@ export class ApplyPatchTool implements ICopilotTool<IApplyPatchToolParams> {
696696
const diffResults = await Promise.all(
697697
Object.entries(commit.changes).map(async ([file, changes]) => {
698698
const uri = resolveToolInputPath(file, promptPathRepresentationService);
699-
if (!urisNeedingConfirmationSet.has(uri)) {
699+
const moveTo = changes.movePath ? resolveToolInputPath(changes.movePath, promptPathRepresentationService) : undefined;
700+
if (!urisNeedingConfirmationSet.has(uri) && !(moveTo && urisNeedingConfirmationSet.has(moveTo))) {
700701
return;
701702
}
702703

703-
return await instantiationService.invokeFunction(
704+
if (changes.type === ActionType.DELETE) {
705+
return l10n.t`Delete ${formatUriForFileWidget(uri)}`;
706+
}
707+
708+
let diff = await instantiationService.invokeFunction(
704709
formatDiffAsUnified,
705710
uri,
706711
changes.oldContent || '',
707712
changes.newContent || ''
708713
);
714+
715+
if (moveTo) {
716+
diff = l10n.t`Move from ${formatUriForFileWidget(uri)} to ${formatUriForFileWidget(moveTo)}\n\n` + diff;
717+
}
718+
719+
return diff;
709720
})
710721
);
711722

src/extension/tools/node/test/toolUtils.spec.ts

Lines changed: 182 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Licensed under the MIT License. See License.txt in the project root for license information.
44
*--------------------------------------------------------------------------------------------*/
55

6-
import { afterAll, beforeAll, beforeEach, describe, expect, suite, test } from 'vitest';
6+
import { afterAll, beforeAll, beforeEach, describe, expect, suite, test, it } from 'vitest';
77
import { ConfigKey, IConfigurationService } from '../../../../platform/configuration/common/configurationService';
88
import { InMemoryConfigurationService } from '../../../../platform/configuration/test/common/inMemoryConfigurationService';
99
import { IFileSystemService } from '../../../../platform/filesystem/common/fileSystemService';
@@ -18,6 +18,7 @@ import { SyncDescriptor } from '../../../../util/vs/platform/instantiation/commo
1818
import { IInstantiationService } from '../../../../util/vs/platform/instantiation/common/instantiation';
1919
import { createExtensionUnitTestingServices } from '../../../test/node/services';
2020
import { assertFileOkForTool, isDirExternalAndNeedsConfirmation, isFileExternalAndNeedsConfirmation } from '../toolUtils';
21+
import { encodeUrlHostname } from '../../common/toolUtils';
2122

2223
class TestIgnoreService extends NullIgnoreService {
2324
private readonly _ignoredUris = new Set<string>();
@@ -266,3 +267,183 @@ suite('toolUtils - external file existence', () => {
266267
expect(await invokeIsFileExternalAndNeedsConfirmation(URI.file('/workspace/nonexistent.ts'))).toBe(false);
267268
});
268269
});
270+
271+
describe('encodeUrlHostname', () => {
272+
describe('ASCII URLs', () => {
273+
it('handles standard ASCII domain', () => {
274+
const result = encodeUrlHostname('https://example.com');
275+
expect(result.encoded).toBe('https://example.com');
276+
expect(result.isDifferent).toBe(false);
277+
});
278+
279+
it('handles ASCII domain with path', () => {
280+
const result = encodeUrlHostname('https://example.com/path/to/page');
281+
expect(result.encoded).toBe('https://example.com/path/to/page');
282+
expect(result.isDifferent).toBe(false);
283+
});
284+
285+
it('handles ASCII domain with query string', () => {
286+
const result = encodeUrlHostname('https://example.com/page?foo=bar&baz=qux');
287+
expect(result.encoded).toBe('https://example.com/page?foo=bar&baz=qux');
288+
expect(result.isDifferent).toBe(false);
289+
});
290+
291+
it('handles ASCII domain with fragment', () => {
292+
const result = encodeUrlHostname('https://example.com/page#section');
293+
expect(result.encoded).toBe('https://example.com/page#section');
294+
expect(result.isDifferent).toBe(false);
295+
});
296+
297+
it('handles http scheme', () => {
298+
const result = encodeUrlHostname('http://example.com');
299+
expect(result.encoded).toBe('http://example.com');
300+
expect(result.isDifferent).toBe(false);
301+
});
302+
});
303+
304+
describe('internationalized domain names (IDN)', () => {
305+
it('encodes Cyrillic domain', () => {
306+
const result = encodeUrlHostname('https://пример.рф');
307+
expect(result.encoded).toBe('https://xn--e1afmkfd.xn--p1ai');
308+
expect(result.isDifferent).toBe(true);
309+
});
310+
311+
it('encodes Chinese domain', () => {
312+
const result = encodeUrlHostname('https://例え.jp');
313+
expect(result.encoded).toBe('https://xn--r8jz45g.jp');
314+
expect(result.isDifferent).toBe(true);
315+
});
316+
317+
it('encodes German domain with umlaut', () => {
318+
const result = encodeUrlHostname('https://müller.de');
319+
expect(result.encoded).toBe('https://xn--mller-kva.de');
320+
expect(result.isDifferent).toBe(true);
321+
});
322+
323+
it('encodes Arabic domain', () => {
324+
const result = encodeUrlHostname('https://مثال.السعودية');
325+
expect(result.encoded).toBe('https://xn--mgbh0fb.xn--mgberp4a5d4ar');
326+
expect(result.isDifferent).toBe(true);
327+
});
328+
329+
it('preserves path when encoding IDN', () => {
330+
const result = encodeUrlHostname('https://пример.рф/path/to/page');
331+
expect(result.encoded).toBe('https://xn--e1afmkfd.xn--p1ai/path/to/page');
332+
expect(result.isDifferent).toBe(true);
333+
});
334+
335+
it('preserves query string when encoding IDN', () => {
336+
const result = encodeUrlHostname('https://пример.рф?foo=bar');
337+
expect(result.encoded).toBe('https://xn--e1afmkfd.xn--p1ai?foo=bar');
338+
expect(result.isDifferent).toBe(true);
339+
});
340+
});
341+
342+
describe('URLs with port', () => {
343+
it('handles ASCII domain with port', () => {
344+
const result = encodeUrlHostname('https://example.com:8080');
345+
expect(result.encoded).toBe('https://example.com:8080');
346+
expect(result.isDifferent).toBe(false);
347+
});
348+
349+
it('encodes IDN with port', () => {
350+
const result = encodeUrlHostname('https://пример.рф:8080');
351+
expect(result.encoded).toBe('https://xn--e1afmkfd.xn--p1ai:8080');
352+
expect(result.isDifferent).toBe(true);
353+
});
354+
355+
it('handles port with path', () => {
356+
const result = encodeUrlHostname('https://пример.рф:8080/path');
357+
expect(result.encoded).toBe('https://xn--e1afmkfd.xn--p1ai:8080/path');
358+
expect(result.isDifferent).toBe(true);
359+
});
360+
});
361+
362+
describe('URLs with userinfo', () => {
363+
it('handles userinfo with ASCII domain', () => {
364+
const result = encodeUrlHostname('https://user:pass@example.com');
365+
expect(result.encoded).toBe('https://user:pass@example.com');
366+
expect(result.isDifferent).toBe(false);
367+
});
368+
369+
it('encodes IDN with userinfo', () => {
370+
const result = encodeUrlHostname('https://user:pass@пример.рф');
371+
expect(result.encoded).toBe('https://user:pass@xn--e1afmkfd.xn--p1ai');
372+
expect(result.isDifferent).toBe(true);
373+
});
374+
375+
it('handles userinfo with port', () => {
376+
const result = encodeUrlHostname('https://user:pass@пример.рф:8080');
377+
expect(result.encoded).toBe('https://user:pass@xn--e1afmkfd.xn--p1ai:8080');
378+
expect(result.isDifferent).toBe(true);
379+
});
380+
381+
it('handles username without password', () => {
382+
const result = encodeUrlHostname('https://user@пример.рф');
383+
expect(result.encoded).toBe('https://user@xn--e1afmkfd.xn--p1ai');
384+
expect(result.isDifferent).toBe(true);
385+
});
386+
});
387+
388+
describe('subdomain handling', () => {
389+
it('encodes subdomain with non-ASCII characters', () => {
390+
const result = encodeUrlHostname('https://поддомен.пример.рф');
391+
expect(result.encoded).toBe('https://xn--d1aad1agbce.xn--e1afmkfd.xn--p1ai');
392+
expect(result.isDifferent).toBe(true);
393+
});
394+
395+
it('handles mixed ASCII and non-ASCII subdomains', () => {
396+
const result = encodeUrlHostname('https://www.пример.рф');
397+
expect(result.encoded).toBe('https://www.xn--e1afmkfd.xn--p1ai');
398+
expect(result.isDifferent).toBe(true);
399+
});
400+
});
401+
402+
describe('edge cases', () => {
403+
it('handles localhost', () => {
404+
const result = encodeUrlHostname('http://localhost:3000');
405+
expect(result.encoded).toBe('http://localhost:3000');
406+
expect(result.isDifferent).toBe(false);
407+
});
408+
409+
it('handles IP address', () => {
410+
const result = encodeUrlHostname('http://192.168.1.1:8080');
411+
expect(result.encoded).toBe('http://192.168.1.1:8080');
412+
expect(result.isDifferent).toBe(false);
413+
});
414+
415+
it('handles IPv6 address', () => {
416+
const result = encodeUrlHostname('http://[::1]:8080');
417+
expect(result.encoded).toBe('http://[::1]:8080');
418+
expect(result.isDifferent).toBe(false);
419+
});
420+
421+
it('handles empty authority gracefully', () => {
422+
const result = encodeUrlHostname('file:///path/to/file');
423+
expect(result.encoded).toBe('file:///path/to/file');
424+
expect(result.isDifferent).toBe(false);
425+
});
426+
427+
it('handles invalid URL gracefully', () => {
428+
const result = encodeUrlHostname('not a url');
429+
expect(result.encoded).toBe('not a url');
430+
expect(result.isDifferent).toBe(false);
431+
});
432+
});
433+
434+
describe('homograph attack prevention', () => {
435+
it('encodes Cyrillic "a" that looks like Latin "a"', () => {
436+
// Cyrillic а (U+0430) vs Latin a (U+0061)
437+
const result = encodeUrlHostname('https://exаmple.com'); // Contains Cyrillic а
438+
expect(result.isDifferent).toBe(true);
439+
expect(result.encoded).toContain('xn--');
440+
});
441+
442+
it('encodes Greek omicron that looks like Latin "o"', () => {
443+
// Greek ο (U+03BF) vs Latin o (U+006F)
444+
const result = encodeUrlHostname('https://gοοgle.com'); // Contains Greek ο
445+
expect(result.isDifferent).toBe(true);
446+
expect(result.encoded).toContain('xn--');
447+
});
448+
});
449+
});

src/extension/tools/vscode-node/fetchWebPageTool.tsx

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Licensed under the MIT License. See License.txt in the project root for license information.
44
*--------------------------------------------------------------------------------------------*/
55
import { BasePromptElementProps, Chunk, PromptElement, PromptSizing, TextChunk, useKeepWith } from '@vscode/prompt-tsx';
6-
import { CancellationToken, LanguageModelPromptTsxPart, LanguageModelTextPart, LanguageModelDataPart, LanguageModelToolInvocationOptions, LanguageModelToolInvocationPrepareOptions, LanguageModelToolResult, lm, PreparedToolInvocation, ProviderResult } from 'vscode';
6+
import { CancellationToken, LanguageModelDataPart, LanguageModelPromptTsxPart, LanguageModelTextPart, LanguageModelToolInvocationOptions, LanguageModelToolInvocationPrepareOptions, LanguageModelToolResult, lm, PreparedToolInvocation, ProviderResult } from 'vscode';
77
import { FileChunkAndScore } from '../../../platform/chunking/common/chunk';
88
import { ILogService } from '../../../platform/log/common/logService';
99
import { UrlChunkEmbeddingsIndex } from '../../../platform/urlChunkSearch/node/urlChunkEmbeddingsIndex';
@@ -15,6 +15,7 @@ import { renderPromptElementJSON } from '../../prompts/node/base/promptRenderer'
1515
import { imageDataPartToTSX } from '../../prompts/node/panel/toolCalling';
1616
import { ToolName } from '../common/toolNames';
1717
import { ICopilotTool, ToolRegistry } from '../common/toolsRegistry';
18+
import { encodeUrlHostname } from '../common/toolUtils';
1819

1920
interface IFetchWebPageParams {
2021
urls: string[];
@@ -67,10 +68,11 @@ class FetchWebPageTool implements ICopilotTool<IFetchWebPageParams> {
6768
if (!tool) {
6869
throw new Error('Tool not found');
6970
}
70-
const { urls } = options.input;
71-
const { content } = await lm.invokeTool(internalToolName, options, token);
72-
if (urls.length !== content.length) {
73-
this._logService.error(`Expected ${urls.length} responses but got ${content.length}`);
71+
// Encode URLs for security before processing
72+
const encodedUrls = options.input.urls.map(url => encodeUrlHostname(url).encoded);
73+
const { content } = await lm.invokeTool(internalToolName, { ...options, input: { ...options.input, urls: encodedUrls } }, token);
74+
if (encodedUrls.length !== content.length) {
75+
this._logService.error(`Expected ${encodedUrls.length} responses but got ${content.length}`);
7476
return new LanguageModelToolResult([
7577
new LanguageModelTextPart('Error: I did not receive the expected number of responses from the tool.')
7678
]);
@@ -80,9 +82,9 @@ class FetchWebPageTool implements ICopilotTool<IFetchWebPageParams> {
8082
const validTextContent: Array<{ readonly uri: URI; readonly content: string }> = [];
8183
const imageResults: WebPageImageResult[] = [];
8284

83-
for (let i = 0; i < urls.length; i++) {
85+
for (let i = 0; i < encodedUrls.length; i++) {
8486
try {
85-
const uri = URI.parse(urls[i]);
87+
const uri = URI.parse(encodedUrls[i]);
8688
const contentPart = content[i];
8789

8890
if (options.model?.capabilities.supportsImageToText && isImageDataPart(contentPart)) {
@@ -97,13 +99,13 @@ class FetchWebPageTool implements ICopilotTool<IFetchWebPageParams> {
9799
if (typeof textValue === 'string') {
98100
validTextContent.push({ uri, content: textValue });
99101
} else {
100-
this._logService.warn(`Unsupported content type at index ${i}: ${urls[i]}`);
101-
invalidUrls.push(urls[i]);
102+
this._logService.warn(`Unsupported content type at index ${i}: ${encodedUrls[i]}`);
103+
invalidUrls.push(encodedUrls[i]);
102104
}
103105
}
104106
} catch (error) {
105-
this._logService.error(`Invalid URL at index ${i}: ${urls[i]}`, error);
106-
invalidUrls.push(urls[i]);
107+
this._logService.error(`Invalid URL at index ${i}: ${encodedUrls[i]}`, error);
108+
invalidUrls.push(encodedUrls[i]);
107109
}
108110
}
109111

0 commit comments

Comments
 (0)