@@ -58,16 +58,32 @@ pub fn path_ends_with_suffix(path: &str, suffix: &str) -> bool {
5858 }
5959
6060 let start = path. len ( ) - suffix. len ( ) ;
61- let Some ( path_suffix) = path. get ( start..) else {
61+
62+ // Must land on a char boundary — multi-byte UTF-8 can make
63+ // `path.len() - suffix.len()` land inside a char.
64+ if !path. is_char_boundary ( start) {
6265 return false ;
63- } ;
66+ }
6467
65- if !path_suffix . eq_ignore_ascii_case ( suffix) {
68+ if !path [ start.. ] . eq_ignore_ascii_case ( suffix) {
6669 return false ;
6770 }
6871
69- // Exact match, or the character before is /
70- start == 0 || path_bytes[ start - 1 ] == b'/'
72+ // Exact match, or the character before is '/'.
73+ // `start` is a char boundary but `start - 1` may be inside a multi-byte
74+ // char, so scan backward to find the preceding ASCII byte.
75+ if start == 0 {
76+ return true ;
77+ }
78+ let mut i = start;
79+ while i > 0 {
80+ i -= 1 ;
81+ // ASCII bytes (0..128) are single-byte UTF-8 code units
82+ if path_bytes[ i] < 128 {
83+ return path_bytes[ i] == b'/' ;
84+ }
85+ }
86+ false
7187}
7288
7389#[ inline]
@@ -78,6 +94,11 @@ pub fn file_has_extension(file_name: &str, ext: &str) -> bool {
7894 return false ;
7995 }
8096 let start = name_bytes. len ( ) - ext_bytes. len ( ) - 1 ;
97+ // `.` is ASCII (single byte), so `start` must be a char boundary.
98+ // If it lands inside a multi-byte char the extension can't match.
99+ if start > 0 && !file_name. is_char_boundary ( start) {
100+ return false ;
101+ }
81102 name_bytes. get ( start) == Some ( & b'.' ) && name_bytes[ start + 1 ..] . eq_ignore_ascii_case ( ext_bytes)
82103}
83104
@@ -91,6 +112,7 @@ pub fn path_contains_segment(path: &str, segment: &str) -> bool {
91112 // Check segment/ at start of path
92113 if path_bytes. len ( ) > segment_len
93114 && path_bytes. get ( segment_len) == Some ( & b'/' )
115+ && path. is_char_boundary ( segment_len)
94116 && path_bytes[ ..segment_len] . eq_ignore_ascii_case ( segment_bytes)
95117 {
96118 return true ;
@@ -107,6 +129,8 @@ pub fn path_contains_segment(path: &str, segment: &str) -> bool {
107129 let end = start + segment_len;
108130 if end < path_bytes. len ( )
109131 && path_bytes[ end] == b'/'
132+ && path. is_char_boundary ( start)
133+ && path. is_char_boundary ( end)
110134 && path_bytes[ start..end] . eq_ignore_ascii_case ( segment_bytes)
111135 {
112136 return true ;
@@ -422,12 +446,14 @@ mod tests {
422446 }
423447
424448 impl Constrainable for TestItem {
425- fn relative_path ( & self ) -> & str {
426- self . relative_path
449+ fn write_file_name ( & self , _arena : ArenaPtr , out : & mut String ) {
450+ out. clear ( ) ;
451+ out. push_str ( self . file_name ) ;
427452 }
428453
429- fn file_name ( & self ) -> & str {
430- self . file_name
454+ fn write_relative_path ( & self , _arena : ArenaPtr , out : & mut String ) {
455+ out. clear ( ) ;
456+ out. push_str ( self . relative_path ) ;
431457 }
432458
433459 fn git_status ( & self ) -> Option < git2:: Status > {
@@ -550,58 +576,36 @@ mod tests {
550576 }
551577
552578 #[ test]
553- <<<<<<< HEAD
554579 fn test_path_ends_with_suffix_does_not_panic_on_unicode_suffix ( ) {
555580 assert ! ( !path_ends_with_suffix( "유니코드_파일_테스트.csv" , "트.c" ) ) ;
556581 assert ! ( path_ends_with_suffix(
557582 "data/유니코드_파일_테스트.csv" ,
558583 "유니코드_파일_테스트.csv"
559- =======
584+ ) ) ;
585+ }
586+
587+ #[ test]
560588 fn test_path_ends_with_suffix_unicode_apostrophe_mismatch ( ) {
561589 assert ! ( !path_ends_with_suffix(
562590 "dir/\u{2019} bar/file.txt" ,
563591 "'bar/file.txt"
564- >>>>>>> origin/main
565592 ) ) ;
566593 }
567594
568595 #[ test]
569- <<<<<<< HEAD
570- fn test_path_contains_segment_does_not_panic_on_unicode_segment( ) {
571- assert!( !path_contains_segment( "문서/notes.txt" , "문x" ) ) ;
572- assert!( path_contains_segment( "프로젝트/문서/notes.txt" , "문서" ) ) ;
573- }
574-
575- #[ test]
576- fn test_apply_constraints_file_path_with_unicode_suffix( ) {
577- let item = TestItem {
578- relative_path: "data/유니코드_파일_테스트.csv" ,
579- file_name: "유니코드_파일_테스트.csv" ,
580- } ;
581-
582- let exact = [ Constraint :: FilePath ( "유니코드_파일_테스트.csv" ) ] ;
583- let mismatch = [ Constraint :: FilePath ( "트.c" ) ] ;
584-
585- let exact_items = [ item] ;
586- let exact_matches = apply_constraints( & exact_items, & exact) . expect( "constraints applied" ) ;
587- assert_eq!( exact_matches. len( ) , 1 ) ;
588-
589- let mismatch_item = TestItem {
590- relative_path: "data/유니코드_파일_테스트.csv" ,
591- file_name: "유니코드_파일_테스트.csv" ,
592- } ;
593- let mismatch_items = [ mismatch_item] ;
594- let mismatch_matches =
595- apply_constraints( & mismatch_items, & mismatch) . expect( "constraints applied" ) ;
596- assert!( mismatch_matches. is_empty( ) ) ;
597- =======
598596 fn test_path_ends_with_suffix_unicode_space_mismatch ( ) {
599597 assert ! ( !path_ends_with_suffix(
600598 "dir/\u{202f} am/file.txt" ,
601599 " am/file.txt"
602600 ) ) ;
603601 }
604602
603+ #[ test]
604+ fn test_path_contains_segment_does_not_panic_on_unicode_segment ( ) {
605+ assert ! ( !path_contains_segment( "문서/notes.txt" , "문x" ) ) ;
606+ assert ! ( path_contains_segment( "프로젝트/문서/notes.txt" , "문서" ) ) ;
607+ }
608+
605609 #[ test]
606610 fn test_path_contains_segment_unicode_no_panic ( ) {
607611 assert ! ( !path_contains_segment(
@@ -613,6 +617,33 @@ mod tests {
613617 #[ test]
614618 fn test_file_has_extension_unicode_no_panic ( ) {
615619 assert ! ( !file_has_extension( "cat\u{00e9} .rs" , "s" ) ) ;
616- >>>>>>> origin/main
620+ }
621+
622+ #[ test]
623+ fn test_file_has_extension_unicode_filename ( ) {
624+ assert ! ( file_has_extension( "운영-가이드.md" , "md" ) ) ;
625+ assert ! ( file_has_extension( "테스트.csv" , "csv" ) ) ;
626+ assert ! ( !file_has_extension( "테스트.csv" , "md" ) ) ;
627+ }
628+
629+ #[ test]
630+ fn test_unicode_path_no_panic_real_korean_cases ( ) {
631+ // Real Korean paths that caused panics
632+ let path1 = "Downloads/(커리큘럼) hermes agent_정승현님 - 1차 커리큘럼 (강사님 작성).csv" ;
633+ let path2 = "hermes-agent-lecture-materials/세부_커리큘럼_최종.csv" ;
634+ let path3 = "projects/fastcampus-hermes-agent-curriculum/chapters/part-02-Hermes-설치-및-기본-사용/section-02-doctor로-설치-상태-검증/research/03-fix가-자동-수정하는-것과-못하는-것.md" ;
635+
636+ // These must not panic regardless of segment/suffix used
637+ assert ! ( !path_contains_segment( path1, "작성" ) ) ;
638+ assert ! ( !path_ends_with_suffix( path1, "작성.csv" ) ) ;
639+ assert ! ( !path_contains_segment( path2, "최종" ) ) ;
640+ assert ! ( !path_ends_with_suffix( path2, "최종.csv" ) ) ;
641+ assert ! ( !path_contains_segment( path3, "수정" ) ) ;
642+ assert ! ( !path_ends_with_suffix( path3, "것.md" ) ) ;
643+
644+ // Positive cases should still work
645+ assert ! ( path_contains_segment( path2, "hermes-agent-lecture-materials" ) ) ;
646+ assert ! ( path_ends_with_suffix( path1, "(커리큘럼) hermes agent_정승현님 - 1차 커리큘럼 (강사님 작성).csv" ) ) ;
647+ assert ! ( path_ends_with_suffix( path2, "세부_커리큘럼_최종.csv" ) ) ;
617648 }
618649}
0 commit comments