-
Notifications
You must be signed in to change notification settings - Fork 15.5k
feature: langfuse thinking 及 文本edit的问题修复( #371); 省略 diff 以减少内存峰值 #376
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
36bf4f2
3fb48ec
bcbb8a6
c633891
4cbef96
0fcdcd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,9 +63,26 @@ export function stripTrailingWhitespace(str: string): string { | |
| return result | ||
| } | ||
|
|
||
| /** | ||
| * Normalizes whitespace for fuzzy matching by converting tabs to spaces | ||
| * and collapsing leading whitespace on each line to a canonical form. | ||
| * This handles the case where Read tool output renders tabs as spaces, | ||
| * so users copy spaces from the output but the file actually has tabs. | ||
| */ | ||
| function normalizeWhitespace(str: string): string { | ||
| return str.replace(/\t/g, ' ') | ||
| } | ||
|
|
||
| /** | ||
| * Finds the actual string in the file content that matches the search string, | ||
| * accounting for quote normalization | ||
| * accounting for quote normalization and tab/space differences. | ||
| * | ||
| * Matching cascade: | ||
| * 1. Exact match | ||
| * 2. Quote normalization (curly → straight quotes) | ||
| * 3. Tab/space normalization (tabs ↔ spaces in leading whitespace) | ||
| * 4. Quote + tab/space normalization combined | ||
| * | ||
| * @param fileContent The file content to search in | ||
| * @param searchString The string to search for | ||
| * @returns The actual string found in the file, or null if not found | ||
|
|
@@ -89,9 +106,92 @@ export function findActualString( | |
| return fileContent.substring(searchIndex, searchIndex + searchString.length) | ||
| } | ||
|
|
||
| // Try with tab/space normalization — handles the case where Read output | ||
| // renders tabs as spaces and the user copies the rendered version | ||
| const wsNormalizedFile = normalizeWhitespace(fileContent) | ||
| const wsNormalizedSearch = normalizeWhitespace(searchString) | ||
|
|
||
| const wsSearchIndex = wsNormalizedFile.indexOf(wsNormalizedSearch) | ||
| if (wsSearchIndex !== -1) { | ||
| // Map the match position back to the original file content. | ||
| // We need to find the corresponding range in the original string. | ||
| return mapNormalizedMatchBackToFile(fileContent, wsNormalizedFile, wsSearchIndex, wsNormalizedSearch.length) | ||
| } | ||
|
|
||
| // Try combined: quote normalization + tab/space normalization | ||
| const combinedFile = normalizeWhitespace(normalizedFile) | ||
| const combinedSearch = normalizeWhitespace(normalizedSearch) | ||
|
|
||
| const combinedIndex = combinedFile.indexOf(combinedSearch) | ||
| if (combinedIndex !== -1) { | ||
| return mapNormalizedMatchBackToFile(fileContent, combinedFile, combinedIndex, combinedSearch.length) | ||
| } | ||
|
|
||
| return null | ||
| } | ||
|
|
||
| /** | ||
| * Given a match found in a normalized version of fileContent, map the match | ||
| * position back to the original fileContent and extract the corresponding | ||
| * substring. | ||
| * | ||
| * Strategy: walk through both strings character by character, building a | ||
| * mapping from normalized offset to original offset. When a tab is expanded | ||
| * to 4 spaces in the normalized version, the normalized offset advances by 4 | ||
| * while the original offset advances by 1. | ||
| */ | ||
| function mapNormalizedMatchBackToFile( | ||
| fileContent: string, | ||
| normalizedFile: string, | ||
| normalizedStart: number, | ||
| normalizedLength: number, | ||
| ): string { | ||
| // Build a sparse mapping from normalized position → original position. | ||
| // We only need to map the range [normalizedStart, normalizedStart + normalizedLength]. | ||
| let normPos = 0 | ||
| let origPos = 0 | ||
| let origStart = -1 | ||
| let origEnd = -1 | ||
|
|
||
| while (origPos < fileContent.length && normPos <= normalizedStart + normalizedLength) { | ||
| if (normPos === normalizedStart) { | ||
| origStart = origPos | ||
| } | ||
| if (normPos === normalizedStart + normalizedLength) { | ||
| origEnd = origPos | ||
| break | ||
| } | ||
|
|
||
| const origChar = fileContent[origPos]! | ||
| if (origChar === '\t') { | ||
| // Tab expands to 4 spaces in normalized version | ||
| const nextNormPos = normPos + 4 | ||
| // If normalizedStart falls within this expanded tab, snap to origPos | ||
| if (normPos < normalizedStart && nextNormPos > normalizedStart && origStart === -1) { | ||
| origStart = origPos | ||
| } | ||
| if (normPos < normalizedStart + normalizedLength && nextNormPos > normalizedStart + normalizedLength && origEnd === -1) { | ||
| origEnd = origPos + 1 | ||
| } | ||
| normPos = nextNormPos | ||
| origPos++ | ||
| } else { | ||
| normPos++ | ||
| origPos++ | ||
| } | ||
| } | ||
|
|
||
| // Fallback: if we couldn't map precisely, use character-count heuristic | ||
| if (origStart === -1) origStart = 0 | ||
| if (origEnd === -1) { | ||
| // Approximate: use the ratio of original to normalized length | ||
| const ratio = fileContent.length / normalizedFile.length | ||
| origEnd = Math.round(origStart + normalizedLength * ratio) | ||
| } | ||
|
|
||
| return fileContent.substring(origStart, origEnd) | ||
| } | ||
|
Comment on lines
+143
to
+193
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The loop terminates as soon as Counter-example:
Because the callers in 🔧 Proposed fix — explicitly snap origEnd to end-of-file when the loop exhausted fileContent while (origPos < fileContent.length && normPos <= normalizedStart + normalizedLength) {
// ... existing body ...
}
- // Fallback: if we couldn't map precisely, use character-count heuristic
- if (origStart === -1) origStart = 0
- if (origEnd === -1) {
- // Approximate: use the ratio of original to normalized length
- const ratio = fileContent.length / normalizedFile.length
- origEnd = Math.round(origStart + normalizedLength * ratio)
- }
+ // If the loop exhausted fileContent while the match still ran to (or past) its end,
+ // the match extends to EOF and origEnd should be the end of fileContent.
+ if (origEnd === -1 && normPos >= normalizedStart + normalizedLength) {
+ origEnd = fileContent.length
+ }
+ // Last-resort fallback (should not normally be reached given the cascade above).
+ if (origStart === -1) origStart = 0
+ if (origEnd === -1) origEnd = fileContent.length🤖 Prompt for AI Agents |
||
|
|
||
| /** | ||
| * When old_string matched via quote normalization (curly quotes in file, | ||
| * straight quotes from model), apply the same curly quote style to new_string | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
JSDoc doesn't match the implementation.
The doc says it "collapses leading whitespace on each line to a canonical form", but
str.replace(/\t/g, ' ')replaces every tab in the string regardless of position (interior tabs included). Either tighten the implementation to leading-only whitespace or update the comment to describe what actually happens (global tab→4-space expansion). Note the global behavior is also what the rest of the cascade relies on, so the comment is the simpler fix.📝 Suggested doc tweak
📝 Committable suggestion
🤖 Prompt for AI Agents