Skip to content

Commit 4cbef96

Browse files
fix: Edit 工具增加 Tab/空格规范化匹配,修复中文和缩进文件编辑失败
Read 工具输出将 Tab 渲染为空格,用户复制后 Edit 工具无法匹配。 在 findActualString 中增加 Tab→空格规范化回退匹配,并精确映射回原始文件位置。 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent c633891 commit 4cbef96

2 files changed

Lines changed: 179 additions & 1 deletion

File tree

packages/builtin-tools/src/tools/FileEditTool/__tests__/utils.test.ts

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,84 @@ describe("findActualString", () => {
106106
const result = findActualString("hello", "");
107107
expect(result).toBe("");
108108
});
109+
110+
// ── Tab/space normalization (Bug #2 reproduction) ──
111+
112+
test("finds match when search uses spaces but file uses tabs", () => {
113+
// File content uses Tab indentation
114+
const fileContent = "\tif (x) {\n\t\treturn 1;\n\t}";
115+
// User copies from Read output which renders tabs as spaces
116+
const searchWithSpaces = " if (x) {\n return 1;\n }";
117+
const result = findActualString(fileContent, searchWithSpaces);
118+
expect(result).not.toBeNull();
119+
expect(result).toBe(fileContent);
120+
});
121+
122+
test("finds match when search mixes tabs and spaces inconsistently", () => {
123+
const fileContent = "\tconst x = 1; // comment";
124+
const searchMixed = " const x = 1; // comment";
125+
const result = findActualString(fileContent, searchMixed);
126+
expect(result).not.toBeNull();
127+
});
128+
129+
test("finds match for single-line tab-to-space mismatch", () => {
130+
const fileContent = "\t\torder_price = NormalizeDouble(ask, digits);";
131+
const searchSpaces = " order_price = NormalizeDouble(ask, digits);";
132+
const result = findActualString(fileContent, searchSpaces);
133+
expect(result).not.toBeNull();
134+
});
135+
136+
// ── CJK / UTF-8 characters (Bug #1 reproduction) ──
137+
138+
test("finds match with CJK characters in content", () => {
139+
const fileContent = "input int x = 620; // 止盈点数(点) — 32个pip=320点";
140+
const result = findActualString(fileContent, fileContent);
141+
expect(result).toBe(fileContent);
142+
});
143+
144+
test("finds match with CJK characters when tab/space differs", () => {
145+
const fileContent = "\t// 向上突破 → Sell Limit (逆方向做空)";
146+
const searchSpaces = " // 向上突破 → Sell Limit (逆方向做空)";
147+
const result = findActualString(fileContent, searchSpaces);
148+
expect(result).not.toBeNull();
149+
expect(result).toBe(fileContent);
150+
});
151+
152+
// ── Multiline with tabs + CJK (combined Bug #1 + #2) ──
153+
154+
test("finds multiline match with tabs and CJK characters", () => {
155+
const fileContent = "\tif(effective_dir == BREAKOUT_UP)\n\t\t{\n\t\t\t// 向上突破\n\t\t}";
156+
const searchSpaces = " if(effective_dir == BREAKOUT_UP)\n {\n // 向上突破\n }";
157+
const result = findActualString(fileContent, searchSpaces);
158+
expect(result).not.toBeNull();
159+
expect(result).toBe(fileContent);
160+
});
161+
162+
// ── Returned string must be a valid substring of fileContent ──
163+
164+
test("returned string from tab match is a real substring of fileContent", () => {
165+
const fileContent = "prefix\n\t\tindented code\nsuffix";
166+
const searchSpaces = "prefix\n indented code\nsuffix";
167+
const result = findActualString(fileContent, searchSpaces);
168+
expect(result).not.toBeNull();
169+
expect(fileContent.includes(result!)).toBe(true);
170+
});
171+
172+
test("returned string from partial tab match is a real substring", () => {
173+
const fileContent = "line1\n\tif (x) {\n\t\tdoStuff();\n\t}\nline5";
174+
const searchSpaces = " if (x) {\n doStuff();\n }";
175+
const result = findActualString(fileContent, searchSpaces);
176+
expect(result).not.toBeNull();
177+
expect(fileContent.includes(result!)).toBe(true);
178+
});
179+
180+
test("tab match with mixed indentation levels", () => {
181+
const fileContent = "class Foo {\n\t\tmethod1() {\n\t\t\treturn 42;\n\t\t}\n}";
182+
const searchSpaces = "class Foo {\n method1() {\n return 42;\n }\n}";
183+
const result = findActualString(fileContent, searchSpaces);
184+
expect(result).not.toBeNull();
185+
expect(fileContent.includes(result!)).toBe(true);
186+
});
109187
});
110188

111189
// ─── preserveQuoteStyle ─────────────────────────────────────────────────

packages/builtin-tools/src/tools/FileEditTool/utils.ts

Lines changed: 101 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,26 @@ export function stripTrailingWhitespace(str: string): string {
6363
return result
6464
}
6565

66+
/**
67+
* Normalizes whitespace for fuzzy matching by converting tabs to spaces
68+
* and collapsing leading whitespace on each line to a canonical form.
69+
* This handles the case where Read tool output renders tabs as spaces,
70+
* so users copy spaces from the output but the file actually has tabs.
71+
*/
72+
function normalizeWhitespace(str: string): string {
73+
return str.replace(/\t/g, ' ')
74+
}
75+
6676
/**
6777
* Finds the actual string in the file content that matches the search string,
68-
* accounting for quote normalization
78+
* accounting for quote normalization and tab/space differences.
79+
*
80+
* Matching cascade:
81+
* 1. Exact match
82+
* 2. Quote normalization (curly → straight quotes)
83+
* 3. Tab/space normalization (tabs ↔ spaces in leading whitespace)
84+
* 4. Quote + tab/space normalization combined
85+
*
6986
* @param fileContent The file content to search in
7087
* @param searchString The string to search for
7188
* @returns The actual string found in the file, or null if not found
@@ -89,9 +106,92 @@ export function findActualString(
89106
return fileContent.substring(searchIndex, searchIndex + searchString.length)
90107
}
91108

109+
// Try with tab/space normalization — handles the case where Read output
110+
// renders tabs as spaces and the user copies the rendered version
111+
const wsNormalizedFile = normalizeWhitespace(fileContent)
112+
const wsNormalizedSearch = normalizeWhitespace(searchString)
113+
114+
const wsSearchIndex = wsNormalizedFile.indexOf(wsNormalizedSearch)
115+
if (wsSearchIndex !== -1) {
116+
// Map the match position back to the original file content.
117+
// We need to find the corresponding range in the original string.
118+
return mapNormalizedMatchBackToFile(fileContent, wsNormalizedFile, wsSearchIndex, wsNormalizedSearch.length)
119+
}
120+
121+
// Try combined: quote normalization + tab/space normalization
122+
const combinedFile = normalizeWhitespace(normalizedFile)
123+
const combinedSearch = normalizeWhitespace(normalizedSearch)
124+
125+
const combinedIndex = combinedFile.indexOf(combinedSearch)
126+
if (combinedIndex !== -1) {
127+
return mapNormalizedMatchBackToFile(fileContent, combinedFile, combinedIndex, combinedSearch.length)
128+
}
129+
92130
return null
93131
}
94132

133+
/**
134+
* Given a match found in a normalized version of fileContent, map the match
135+
* position back to the original fileContent and extract the corresponding
136+
* substring.
137+
*
138+
* Strategy: walk through both strings character by character, building a
139+
* mapping from normalized offset to original offset. When a tab is expanded
140+
* to 4 spaces in the normalized version, the normalized offset advances by 4
141+
* while the original offset advances by 1.
142+
*/
143+
function mapNormalizedMatchBackToFile(
144+
fileContent: string,
145+
normalizedFile: string,
146+
normalizedStart: number,
147+
normalizedLength: number,
148+
): string {
149+
// Build a sparse mapping from normalized position → original position.
150+
// We only need to map the range [normalizedStart, normalizedStart + normalizedLength].
151+
let normPos = 0
152+
let origPos = 0
153+
let origStart = -1
154+
let origEnd = -1
155+
156+
while (origPos < fileContent.length && normPos <= normalizedStart + normalizedLength) {
157+
if (normPos === normalizedStart) {
158+
origStart = origPos
159+
}
160+
if (normPos === normalizedStart + normalizedLength) {
161+
origEnd = origPos
162+
break
163+
}
164+
165+
const origChar = fileContent[origPos]!
166+
if (origChar === '\t') {
167+
// Tab expands to 4 spaces in normalized version
168+
const nextNormPos = normPos + 4
169+
// If normalizedStart falls within this expanded tab, snap to origPos
170+
if (normPos < normalizedStart && nextNormPos > normalizedStart && origStart === -1) {
171+
origStart = origPos
172+
}
173+
if (normPos < normalizedStart + normalizedLength && nextNormPos > normalizedStart + normalizedLength && origEnd === -1) {
174+
origEnd = origPos + 1
175+
}
176+
normPos = nextNormPos
177+
origPos++
178+
} else {
179+
normPos++
180+
origPos++
181+
}
182+
}
183+
184+
// Fallback: if we couldn't map precisely, use character-count heuristic
185+
if (origStart === -1) origStart = 0
186+
if (origEnd === -1) {
187+
// Approximate: use the ratio of original to normalized length
188+
const ratio = fileContent.length / normalizedFile.length
189+
origEnd = Math.round(origStart + normalizedLength * ratio)
190+
}
191+
192+
return fileContent.substring(origStart, origEnd)
193+
}
194+
95195
/**
96196
* When old_string matched via quote normalization (curly quotes in file,
97197
* straight quotes from model), apply the same curly quote style to new_string

0 commit comments

Comments
 (0)