@@ -133,7 +133,6 @@ impl FsWrite {
133133 } ,
134134 FsWrite :: StrReplace { old_str, new_str, .. } => {
135135 let file = os. fs . read_to_string ( & path) . await ?;
136- let matches = file. match_indices ( old_str) . collect :: < Vec < _ > > ( ) ;
137136 queue ! (
138137 output,
139138 style:: Print ( "Updating: " ) ,
@@ -142,14 +141,8 @@ impl FsWrite {
142141 StyledText :: reset( ) ,
143142 style:: Print ( "\n " ) ,
144143 ) ?;
145- match matches. len ( ) {
146- 0 => return Err ( eyre ! ( "no occurrences of \" {old_str}\" were found" ) ) ,
147- 1 => {
148- let file = file. replacen ( old_str, new_str, 1 ) ;
149- os. fs . write ( & path, file) . await ?;
150- } ,
151- x => return Err ( eyre ! ( "{x} occurrences of old_str were found when only 1 is expected" ) ) ,
152- }
144+ let updated = str_replace_fuzzy ( & file, old_str, new_str) ?;
145+ os. fs . write ( & path, updated) . await ?;
153146 } ,
154147 FsWrite :: Insert {
155148 insert_line, new_str, ..
@@ -393,7 +386,16 @@ impl FsWrite {
393386 bail ! ( "Path must not be empty" )
394387 } ;
395388 } ,
396- FsWrite :: StrReplace { path, .. } | FsWrite :: Insert { path, .. } => {
389+ FsWrite :: StrReplace { path, old_str, .. } => {
390+ let path = sanitize_path_tool_arg ( os, path) ;
391+ if !path. exists ( ) {
392+ bail ! ( "The provided path must exist in order to replace or insert contents into it" )
393+ }
394+ if old_str. trim ( ) . is_empty ( ) {
395+ bail ! ( "old_str must not be empty — use fs_read to read the file first, then provide the exact text to replace" )
396+ }
397+ } ,
398+ FsWrite :: Insert { path, .. } => {
397399 let path = sanitize_path_tool_arg ( os, path) ;
398400 if !path. exists ( ) {
399401 bail ! ( "The provided path must exist in order to replace or insert contents into it" )
@@ -858,6 +860,189 @@ fn syntect_to_crossterm_color(syntect: syntect::highlighting::Color) -> style::C
858860 }
859861}
860862
863+ /// Attempts to replace `old_str` with `new_str` in `content` using a fallback chain:
864+ ///
865+ /// 1. **Exact match** — fastest, most precise.
866+ /// 2. **Line-trimmed match** — matches lines after stripping leading/trailing whitespace,
867+ /// then replaces the original (indented) text. Handles indentation drift.
868+ /// 3. **Block-anchor match** — matches by first+last line as anchors, uses Levenshtein
869+ /// similarity on middle lines to find the best candidate. Handles minor edits in context.
870+ ///
871+ /// Returns an error if no strategy finds exactly one unambiguous match.
872+ fn str_replace_fuzzy ( content : & str , old_str : & str , new_str : & str ) -> eyre:: Result < String > {
873+ // Strategy 1: exact match
874+ let exact_count = content. match_indices ( old_str) . count ( ) ;
875+ match exact_count {
876+ 1 => return Ok ( content. replacen ( old_str, new_str, 1 ) ) ,
877+ x if x > 1 => {
878+ return Err ( eyre:: eyre!(
879+ "{x} occurrences of old_str were found when only 1 is expected — \
880+ add more surrounding context to old_str to make it unique"
881+ ) )
882+ } ,
883+ _ => { } ,
884+ }
885+
886+ // Strategies 2 & 3: fuzzy — both return a byte range to splice at
887+ let range = line_trimmed_match ( content, old_str)
888+ . or_else ( || block_anchor_match ( content, old_str) ) ;
889+
890+ if let Some ( ( start, end) ) = range {
891+ return Ok ( format ! ( "{}{}{}" , & content[ ..start] , new_str, & content[ end..] ) ) ;
892+ }
893+
894+ Err ( eyre:: eyre!(
895+ "no occurrences of the provided old_str were found (tried exact, \
896+ line-trimmed, and block-anchor matching) — use fs_read to read the \
897+ current file content and retry str_replace with the exact text. \
898+ Do NOT fall back to shell commands like sed."
899+ ) )
900+ }
901+
902+ /// Strips leading and trailing empty lines from a split-by-newline vec.
903+ fn strip_empty_boundary_lines ( mut lines : Vec < & str > ) -> Vec < & str > {
904+ while lines. last ( ) . map ( |l : & & str | l. trim ( ) . is_empty ( ) ) . unwrap_or ( false ) {
905+ lines. pop ( ) ;
906+ }
907+ while lines. first ( ) . map ( |l : & & str | l. trim ( ) . is_empty ( ) ) . unwrap_or ( false ) {
908+ lines. remove ( 0 ) ;
909+ }
910+ lines
911+ }
912+
913+ /// Builds a prefix-sum table of byte offsets for lines split by `\n`.
914+ /// `offsets[i]` = byte offset of the start of line `i` in the original string.
915+ /// `offsets[lines.len()]` = one past the last byte (i.e. content.len() + 1 conceptually).
916+ fn build_line_offsets ( lines : & [ & str ] ) -> Vec < usize > {
917+ let mut offsets = Vec :: with_capacity ( lines. len ( ) + 1 ) ;
918+ offsets. push ( 0usize ) ;
919+ for line in lines {
920+ offsets. push ( offsets. last ( ) . unwrap ( ) + line. len ( ) + 1 ) ; // +1 for '\n'
921+ }
922+ offsets
923+ }
924+
925+ /// Matches `find` against `content` by comparing trimmed lines.
926+ /// Returns the byte range `(start, end)` in `content` if exactly one match is found.
927+ fn line_trimmed_match ( content : & str , find : & str ) -> Option < ( usize , usize ) > {
928+ let content_lines: Vec < & str > = content. split ( '\n' ) . collect ( ) ;
929+ let search_lines = strip_empty_boundary_lines ( find. split ( '\n' ) . collect ( ) ) ;
930+
931+ if search_lines. is_empty ( ) {
932+ return None ;
933+ }
934+
935+ let offsets = build_line_offsets ( & content_lines) ;
936+
937+ let mut matches: Vec < ( usize , usize ) > = Vec :: new ( ) ;
938+ ' outer: for i in 0 ..=content_lines. len ( ) . saturating_sub ( search_lines. len ( ) ) {
939+ for ( j, search_line) in search_lines. iter ( ) . enumerate ( ) {
940+ if content_lines[ i + j] . trim ( ) != search_line. trim ( ) {
941+ continue ' outer;
942+ }
943+ }
944+ let start = offsets[ i] ;
945+ let end = offsets[ i + search_lines. len ( ) ] . saturating_sub ( 1 ) . min ( content. len ( ) ) ;
946+ matches. push ( ( start, end) ) ;
947+ }
948+
949+ if matches. len ( ) == 1 { Some ( matches[ 0 ] ) } else { None }
950+ }
951+
952+ /// Levenshtein distance between two strings (char-level, O(min(m,n)) space).
953+ /// `a` is placed in the row dimension (longer), `b` in the column (shorter).
954+ fn levenshtein ( a : & str , b : & str ) -> usize {
955+ let a: Vec < char > = a. chars ( ) . collect ( ) ;
956+ let b: Vec < char > = b. chars ( ) . collect ( ) ;
957+ // Ensure `a` is the longer string so `b` (columns) is the smaller allocation
958+ let ( a, b) = if a. len ( ) >= b. len ( ) { ( a, b) } else { ( b, a) } ;
959+ let ( m, n) = ( a. len ( ) , b. len ( ) ) ;
960+ let mut prev: Vec < usize > = ( 0 ..=n) . collect ( ) ;
961+ let mut curr = vec ! [ 0usize ; n + 1 ] ;
962+ for i in 1 ..=m {
963+ curr[ 0 ] = i;
964+ for j in 1 ..=n {
965+ curr[ j] = if a[ i - 1 ] == b[ j - 1 ] {
966+ prev[ j - 1 ]
967+ } else {
968+ 1 + prev[ j] . min ( curr[ j - 1 ] ) . min ( prev[ j - 1 ] )
969+ } ;
970+ }
971+ std:: mem:: swap ( & mut prev, & mut curr) ;
972+ }
973+ prev[ n]
974+ }
975+
976+ const SIMILARITY_THRESHOLD : f64 = 0.6 ;
977+
978+ /// Matches `find` against `content` using first+last line as anchors and Levenshtein
979+ /// similarity on middle lines. Returns the byte range `(start, end)` in `content` if
980+ /// similarity exceeds the threshold and the match is unambiguous.
981+ fn block_anchor_match ( content : & str , find : & str ) -> Option < ( usize , usize ) > {
982+ let content_lines: Vec < & str > = content. split ( '\n' ) . collect ( ) ;
983+ let search_lines = strip_empty_boundary_lines ( find. split ( '\n' ) . collect ( ) ) ;
984+
985+ // Need at least 2 distinct lines for anchor matching
986+ if search_lines. len ( ) < 2 {
987+ return None ;
988+ }
989+
990+ let first = search_lines[ 0 ] . trim ( ) ;
991+ let last = search_lines[ search_lines. len ( ) - 1 ] . trim ( ) ;
992+
993+ // Symmetric anchors (e.g. `}` / `}`) produce too many false positives
994+ if first == last {
995+ return None ;
996+ }
997+
998+ // Build offsets once — reused for both scoring and final byte range
999+ let offsets = build_line_offsets ( & content_lines) ;
1000+
1001+ // Collect candidate windows where first and last anchor lines match
1002+ let mut candidates: Vec < ( usize , usize , f64 ) > = Vec :: new ( ) ;
1003+ for i in 0 ..content_lines. len ( ) {
1004+ if content_lines[ i] . trim ( ) != first { continue ; }
1005+ for j in ( i + 1 ) ..content_lines. len ( ) {
1006+ if content_lines[ j] . trim ( ) == last {
1007+ let score = similarity_score ( & content_lines, i, j, & search_lines) ;
1008+ candidates. push ( ( i, j, score) ) ;
1009+ break ;
1010+ }
1011+ }
1012+ }
1013+
1014+ // Pick the single best candidate above the threshold
1015+ let best = candidates
1016+ . into_iter ( )
1017+ . filter ( |& ( _, _, s) | s >= SIMILARITY_THRESHOLD )
1018+ . max_by ( |a, b| a. 2 . partial_cmp ( & b. 2 ) . unwrap_or ( std:: cmp:: Ordering :: Equal ) ) ?;
1019+
1020+ let start = offsets[ best. 0 ] ;
1021+ let end = offsets[ best. 1 + 1 ] . saturating_sub ( 1 ) . min ( content. len ( ) ) ;
1022+ Some ( ( start, end) )
1023+ }
1024+
1025+ /// Average Levenshtein similarity of middle lines between `search_lines` and the
1026+ /// corresponding window `content_lines[start..=end]`.
1027+ fn similarity_score ( content_lines : & [ & str ] , start : usize , end : usize , search_lines : & [ & str ] ) -> f64 {
1028+ let middle_count = search_lines. len ( ) . saturating_sub ( 2 ) ;
1029+ if middle_count == 0 { return 1.0 ; }
1030+
1031+ let mut total = 0.0 ;
1032+ let mut counted = 0 ;
1033+ for k in 1 ..search_lines. len ( ) . saturating_sub ( 1 ) {
1034+ let ci = start + k;
1035+ if ci >= end { break ; }
1036+ let a = content_lines[ ci] . trim ( ) ;
1037+ let b = search_lines[ k] . trim ( ) ;
1038+ let max_len = a. chars ( ) . count ( ) . max ( b. chars ( ) . count ( ) ) ;
1039+ if max_len == 0 { total += 1.0 ; counted += 1 ; continue ; }
1040+ total += 1.0 - levenshtein ( a, b) as f64 / max_len as f64 ;
1041+ counted += 1 ;
1042+ }
1043+ if counted == 0 { 1.0 } else { total / counted as f64 }
1044+ }
1045+
8611046#[ cfg( test) ]
8621047mod tests {
8631048 use std:: collections:: HashMap ;
@@ -870,6 +1055,110 @@ mod tests {
8701055 setup_test_directory,
8711056 } ;
8721057
1058+ // ── str_replace_fuzzy tests ──────────────────────────────────────────────
1059+
1060+ #[ test]
1061+ fn fuzzy_exact_match ( ) {
1062+ let content = "fn foo() {\n let x = 1;\n }\n " ;
1063+ let result = str_replace_fuzzy ( content, "let x = 1;" , "let x = 42;" ) . unwrap ( ) ;
1064+ assert_eq ! ( result, "fn foo() {\n let x = 42;\n }\n " ) ;
1065+ }
1066+
1067+ #[ test]
1068+ fn fuzzy_exact_match_fails_on_ambiguous ( ) {
1069+ let content = "let x = 1;\n let x = 1;\n " ;
1070+ assert ! ( str_replace_fuzzy( content, "let x = 1;" , "let x = 2;" ) . is_err( ) ) ;
1071+ }
1072+
1073+ #[ test]
1074+ fn fuzzy_line_trimmed_handles_indentation_drift ( ) {
1075+ // old_str has different indentation than the file
1076+ let content = "fn foo() {\n let x = 1;\n let y = 2;\n }\n " ;
1077+ let old_str = "let x = 1;\n let y = 2;" ; // no indentation
1078+ let result = str_replace_fuzzy ( content, old_str, "let x = 10;\n let y = 20;" ) . unwrap ( ) ;
1079+ assert ! ( result. contains( "let x = 10;" ) ) ;
1080+ assert ! ( result. contains( "let y = 20;" ) ) ;
1081+ }
1082+
1083+ #[ test]
1084+ fn fuzzy_block_anchor_handles_minor_middle_edits ( ) {
1085+ // Middle line has a minor typo vs what's in the file
1086+ let content = "fn calculate() {\n let result = a + b;\n return result;\n }\n " ;
1087+ // old_str has slightly different middle line
1088+ let old_str = "fn calculate() {\n let result = a + b; // sum\n return result;\n }" ;
1089+ let result = str_replace_fuzzy ( content, old_str, "fn calculate() {\n return a + b;\n }" ) ;
1090+ // Should find a match via block anchor (first+last line match)
1091+ assert ! ( result. is_ok( ) , "block anchor should match: {:?}" , result) ;
1092+ }
1093+
1094+ #[ test]
1095+ fn fuzzy_rejects_empty_old_str ( ) {
1096+ // empty old_str should be caught at validation, not reach fuzzy matching
1097+ let result = str_replace_fuzzy ( "fn foo() {}" , "" , "fn bar() {}" ) ;
1098+ assert ! ( result. is_err( ) ) ;
1099+ // str_replace_fuzzy itself: exact match on "" would match everywhere,
1100+ // so it should return an ambiguous error
1101+ let msg = result. unwrap_err ( ) . to_string ( ) ;
1102+ assert ! ( msg. contains( "occurrences" ) , "should report ambiguous match: {msg}" ) ;
1103+ }
1104+
1105+ #[ test]
1106+ fn fuzzy_returns_error_when_no_strategy_matches ( ) {
1107+ let content = "fn foo() {}\n " ;
1108+ let result = str_replace_fuzzy ( content, "fn bar() {}" , "fn baz() {}" ) ;
1109+ assert ! ( result. is_err( ) ) ;
1110+ let msg = result. unwrap_err ( ) . to_string ( ) ;
1111+ assert ! ( msg. contains( "fs_read" ) , "error should mention fs_read: {msg}" ) ;
1112+ assert ! ( msg. contains( "sed" ) , "error should warn against sed: {msg}" ) ;
1113+ }
1114+
1115+ #[ test]
1116+ fn fuzzy_replaces_correct_occurrence_when_matched_text_appears_elsewhere ( ) {
1117+ // The fuzzy-matched substring also appears earlier in the file.
1118+ // We must replace the matched position, not the first occurrence.
1119+ let content = " let x = 1;\n fn foo() {\n let x = 1;\n let y = 2;\n }\n " ;
1120+ // old_str with no indentation — line-trimmed will match the block inside fn foo
1121+ let old_str = "let x = 1;\n let y = 2;" ;
1122+ let result = str_replace_fuzzy ( content, old_str, "let x = 10;\n let y = 20;" ) . unwrap ( ) ;
1123+ // The standalone "let x = 1;" at the top must be untouched
1124+ assert ! ( result. starts_with( " let x = 1;\n " ) , "first occurrence must be untouched" ) ;
1125+ assert ! ( result. contains( "let x = 10;" ) , "matched block must be replaced" ) ;
1126+ }
1127+
1128+ #[ test]
1129+ fn block_anchor_skips_symmetric_first_last_lines ( ) {
1130+ // first == last — should not produce false positive via block anchor
1131+ let content = "}\n }\n " ;
1132+ let find = "}\n }" ;
1133+ // block_anchor_match should return None because first == last
1134+ assert ! ( block_anchor_match( content, find) . is_none( ) ) ;
1135+ }
1136+
1137+ #[ test]
1138+ fn levenshtein_space_optimised_matches_naive ( ) {
1139+ // Verify the O(n) space implementation gives correct results
1140+ assert_eq ! ( levenshtein( "" , "abc" ) , 3 ) ;
1141+ assert_eq ! ( levenshtein( "abc" , "" ) , 3 ) ;
1142+ assert_eq ! ( levenshtein( "saturday" , "sunday" ) , 3 ) ;
1143+ }
1144+
1145+ #[ test]
1146+ fn line_trimmed_match_finds_indented_block ( ) {
1147+ let content = "class Foo {\n void bar() {\n int x = 1;\n }\n }\n " ;
1148+ let find = "void bar() {\n int x = 1;\n }" ;
1149+ let matched = line_trimmed_match ( content, find) ;
1150+ assert ! ( matched. is_some( ) , "should find indented block" ) ;
1151+ let ( start, end) = matched. unwrap ( ) ;
1152+ assert ! ( content[ start..end] . contains( " void bar()" ) , "should preserve original indentation" ) ;
1153+ }
1154+
1155+ #[ test]
1156+ fn line_trimmed_match_returns_none_on_ambiguous ( ) {
1157+ let content = " foo()\n foo()\n " ;
1158+ let find = "foo()" ;
1159+ assert ! ( line_trimmed_match( content, find) . is_none( ) ) ;
1160+ }
1161+
8731162 #[ test]
8741163 fn test_fs_write_deserialize ( ) {
8751164 let path = "/my-file" ;
0 commit comments