From 91d67a4ff61ffe1dcdff4792b5e6aa89cb91ed8d Mon Sep 17 00:00:00 2001 From: nyxst4ck <289980115+nyxst4ck@users.noreply.github.com> Date: Wed, 17 Jun 2026 21:26:35 -0300 Subject: [PATCH] fix(edit): correct case-insensitive search/replace offsets for non-length-preserving lowercasing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit caseInsensitiveMatch located the match by calling indexOf on the fully lowercased file content, then returned that index (plus searchContent.length) as a position into the ORIGINAL content. String.prototype.toLowerCase is not length-preserving for every character (e.g. "İ" U+0130 lowercases to "i" + a combining dot, two UTF-16 units), so any such character before or within the match shifts every subsequent index. The returned slice was misaligned, and an edit_existing_file / multi-edit replacement landed at the wrong offset and corrupted the file (e.g. "const value = 1;" became "cconst value = 2;"). Fix: scan the original content and, for each candidate start, lowercase only the aligned slice, comparing against the lowercased search. Indices stay anchored to the original string and the end index is derived from the matched slice rather than the search length. Adds regression tests in findSearchMatch.vitest.ts (both an earlier character and the matched region itself changing length) and an end-to-end test in executeFindAndReplace.vitest.ts. --- .../executeFindAndReplace.vitest.ts | 15 +++++++ core/edit/searchAndReplace/findSearchMatch.ts | 44 ++++++++++++++++--- .../findSearchMatch.vitest.ts | 30 +++++++++++++ 3 files changed, 82 insertions(+), 7 deletions(-) diff --git a/core/edit/searchAndReplace/executeFindAndReplace.vitest.ts b/core/edit/searchAndReplace/executeFindAndReplace.vitest.ts index de9f62cc07..e4c7473fa8 100644 --- a/core/edit/searchAndReplace/executeFindAndReplace.vitest.ts +++ b/core/edit/searchAndReplace/executeFindAndReplace.vitest.ts @@ -326,5 +326,20 @@ describe("executeFindAndReplace", () => { expect(result).toBe("new content"); }); + + it("should not corrupt the file when an earlier character changes length on lowercase", () => { + // "İ" (U+0130) lowercases to "i" + combining dot (2 UTF-16 units). The + // case-insensitive match must report indices into the original content, + // otherwise the replacement lands at the wrong offset and corrupts the file. + const content = "// İ marker comment\nconst value = 1;"; + const result = executeFindAndReplace( + content, + "CONST VALUE = 1;", + "const value = 2;", + false, + ); + + expect(result).toBe("// İ marker comment\nconst value = 2;"); + }); }); }); diff --git a/core/edit/searchAndReplace/findSearchMatch.ts b/core/edit/searchAndReplace/findSearchMatch.ts index dd826985d9..5156715d83 100644 --- a/core/edit/searchAndReplace/findSearchMatch.ts +++ b/core/edit/searchAndReplace/findSearchMatch.ts @@ -61,20 +61,50 @@ function trimmedMatch( /** * Case-insensitive matching strategy + * + * The match must be located in terms of the ORIGINAL file content. Searching + * the lowercased file directly is unsafe because `String.prototype.toLowerCase` + * is not guaranteed to be length-preserving (e.g. "İ" lowercases to "i̇", two + * UTF-16 units). A single such character before or within the match would shift + * every subsequent index, producing a misaligned slice and a corrupted edit. + * + * Instead we scan the original content and, for each candidate start position, + * lowercase only the slice that lines up with the lowercased search string. The + * matched region in the original may be longer or shorter than the search, so + * the end index is derived from that slice rather than from `searchContent.length`. */ function caseInsensitiveMatch( fileContent: string, searchContent: string, ): BasicMatchResult | null { - const lowerFileContent = fileContent.toLowerCase(); const lowerSearchContent = searchContent.toLowerCase(); - const index = lowerFileContent.indexOf(lowerSearchContent); - if (index !== -1) { - return { - startIndex: index, - endIndex: index + searchContent.length, - }; + if (lowerSearchContent.length === 0) { + return null; } + + for (let startIndex = 0; startIndex < fileContent.length; startIndex++) { + // Grow the candidate slice until its lowercased form is at least as long as + // the search, then check for equality. This keeps indices anchored to the + // original string even when lowercasing changes length. + for ( + let endIndex = startIndex + 1; + endIndex <= fileContent.length; + endIndex++ + ) { + const lowerCandidate = fileContent + .slice(startIndex, endIndex) + .toLowerCase(); + if (lowerCandidate.length < lowerSearchContent.length) { + continue; + } + if (lowerCandidate === lowerSearchContent) { + return { startIndex, endIndex }; + } + // Once the candidate is long enough but doesn't match, advance the start. + break; + } + } + return null; } diff --git a/core/edit/searchAndReplace/findSearchMatch.vitest.ts b/core/edit/searchAndReplace/findSearchMatch.vitest.ts index c2489d93e9..8a84cec782 100644 --- a/core/edit/searchAndReplace/findSearchMatch.vitest.ts +++ b/core/edit/searchAndReplace/findSearchMatch.vitest.ts @@ -179,6 +179,36 @@ describe("findSearchMatch", () => { strategyName: "caseInsensitiveMatch", }); }); + + it("should return original-cased indices when an earlier character changes length on lowercase", () => { + // "İ" (U+0130) lowercases to "i" + combining dot (2 UTF-16 units), so an + // index found in the lowercased file would be shifted relative to the original. + const fileContent = "İ test"; + const result = findSearchMatch(fileContent, "TEST"); + + expect(result).toEqual({ + startIndex: 2, + endIndex: 6, + strategyName: "caseInsensitiveMatch", + }); + expect(fileContent.slice(result!.startIndex, result!.endIndex)).toBe( + "test", + ); + }); + + it("should return original-cased indices when the match itself changes length on lowercase", () => { + // The matched region in the file is a single code unit ("İ") that lowercases + // to two units, so endIndex must come from the original slice, not the search length. + const fileContent = "x İ y"; + const result = findSearchMatch(fileContent, "i̇"); + + expect(result).toEqual({ + startIndex: 2, + endIndex: 3, + strategyName: "caseInsensitiveMatch", + }); + expect(fileContent.slice(result!.startIndex, result!.endIndex)).toBe("İ"); + }); }); describe("Whitespace ignored strategy fallback", () => {