Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions core/edit/searchAndReplace/executeFindAndReplace.vitest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -326,5 +326,20 @@ describe("executeFindAndReplace", () => {

expect(result).toBe("new content");
});

it("should not corrupt the file when an earlier character changes length on lowercase", () => {
// "İ" (U+0130) lowercases to "i" + combining dot (2 UTF-16 units). The
// case-insensitive match must report indices into the original content,
// otherwise the replacement lands at the wrong offset and corrupts the file.
const content = "// İ marker comment\nconst value = 1;";
const result = executeFindAndReplace(
content,
"CONST VALUE = 1;",
"const value = 2;",
false,
);

expect(result).toBe("// İ marker comment\nconst value = 2;");
});
});
});
44 changes: 37 additions & 7 deletions core/edit/searchAndReplace/findSearchMatch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,50 @@ function trimmedMatch(

/**
* Case-insensitive matching strategy
*
* The match must be located in terms of the ORIGINAL file content. Searching
* the lowercased file directly is unsafe because `String.prototype.toLowerCase`
* is not guaranteed to be length-preserving (e.g. "İ" lowercases to "i̇", two
* UTF-16 units). A single such character before or within the match would shift
* every subsequent index, producing a misaligned slice and a corrupted edit.
*
* Instead we scan the original content and, for each candidate start position,
* lowercase only the slice that lines up with the lowercased search string. The
* matched region in the original may be longer or shorter than the search, so
* the end index is derived from that slice rather than from `searchContent.length`.
*/
function caseInsensitiveMatch(
fileContent: string,
searchContent: string,
): BasicMatchResult | null {
const lowerFileContent = fileContent.toLowerCase();
const lowerSearchContent = searchContent.toLowerCase();
const index = lowerFileContent.indexOf(lowerSearchContent);
if (index !== -1) {
return {
startIndex: index,
endIndex: index + searchContent.length,
};
if (lowerSearchContent.length === 0) {
return null;
}

for (let startIndex = 0; startIndex < fileContent.length; startIndex++) {
// Grow the candidate slice until its lowercased form is at least as long as
// the search, then check for equality. This keeps indices anchored to the
// original string even when lowercasing changes length.
for (
let endIndex = startIndex + 1;
endIndex <= fileContent.length;
endIndex++
) {
const lowerCandidate = fileContent
.slice(startIndex, endIndex)
.toLowerCase();
if (lowerCandidate.length < lowerSearchContent.length) {
continue;
}
if (lowerCandidate === lowerSearchContent) {
return { startIndex, endIndex };
}
// Once the candidate is long enough but doesn't match, advance the start.
break;
}
}

return null;
}

Expand Down
30 changes: 30 additions & 0 deletions core/edit/searchAndReplace/findSearchMatch.vitest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,36 @@ describe("findSearchMatch", () => {
strategyName: "caseInsensitiveMatch",
});
});

it("should return original-cased indices when an earlier character changes length on lowercase", () => {
// "İ" (U+0130) lowercases to "i" + combining dot (2 UTF-16 units), so an
// index found in the lowercased file would be shifted relative to the original.
const fileContent = "İ test";
const result = findSearchMatch(fileContent, "TEST");

expect(result).toEqual({
startIndex: 2,
endIndex: 6,
strategyName: "caseInsensitiveMatch",
});
expect(fileContent.slice(result!.startIndex, result!.endIndex)).toBe(
"test",
);
});

it("should return original-cased indices when the match itself changes length on lowercase", () => {
// The matched region in the file is a single code unit ("İ") that lowercases
// to two units, so endIndex must come from the original slice, not the search length.
const fileContent = "x İ y";
const result = findSearchMatch(fileContent, "i̇");

expect(result).toEqual({
startIndex: 2,
endIndex: 3,
strategyName: "caseInsensitiveMatch",
});
expect(fileContent.slice(result!.startIndex, result!.endIndex)).toBe("İ");
});
});

describe("Whitespace ignored strategy fallback", () => {
Expand Down
Loading