@@ -47,20 +47,48 @@ fn has_non_whitespace(s: &str) -> bool {
4747 s. chars ( ) . any ( |c| !is_ws_char ( c) )
4848}
4949
50- /// Replace consecutive whitespace with a single space.
50+ /// Replace runs of 2+ consecutive whitespace characters with a single space.
51+ /// Single whitespace characters are preserved as-is.
5152fn collapse_whitespace ( s : & str ) -> String {
5253 let mut result = String :: with_capacity ( s. len ( ) ) ;
53- let mut prev_ws = false ;
54+ let mut ws_run_start: Option < usize > = None ;
55+ let mut ws_run_len = 0 ;
5456
55- for c in s. chars ( ) {
57+ for ( i , c ) in s. chars ( ) . enumerate ( ) {
5658 if is_ws_char ( c) {
57- if !prev_ws {
58- result. push ( ' ' ) ;
59- prev_ws = true ;
59+ if ws_run_start. is_none ( ) {
60+ ws_run_start = Some ( i) ;
6061 }
62+ ws_run_len += 1 ;
6163 } else {
64+ // End of whitespace run (if any)
65+ if let Some ( _start) = ws_run_start {
66+ if ws_run_len >= 2 {
67+ // Replace 2+ whitespace chars with single space
68+ result. push ( ' ' ) ;
69+ } else {
70+ // Keep single whitespace char as-is
71+ // We need to get the original char
72+ if let Some ( ws_char) = s. chars ( ) . nth ( i - 1 ) {
73+ result. push ( ws_char) ;
74+ }
75+ }
76+ ws_run_start = None ;
77+ ws_run_len = 0 ;
78+ }
6279 result. push ( c) ;
63- prev_ws = false ;
80+ }
81+ }
82+
83+ // Handle trailing whitespace
84+ if let Some ( start) = ws_run_start {
85+ if ws_run_len >= 2 {
86+ result. push ( ' ' ) ;
87+ } else {
88+ // Keep single trailing whitespace char as-is
89+ if let Some ( ws_char) = s. chars ( ) . nth ( start) {
90+ result. push ( ws_char) ;
91+ }
6492 }
6593 }
6694
@@ -722,9 +750,17 @@ mod tests {
722750
723751 #[ test]
724752 fn test_collapse_whitespace ( ) {
753+ // Multiple consecutive whitespace -> single space
725754 assert_eq ! ( collapse_whitespace( "a b" ) , "a b" ) ;
726755 assert_eq ! ( collapse_whitespace( "a\n \n \t b" ) , "a b" ) ;
727756 assert_eq ! ( collapse_whitespace( " a " ) , " a " ) ;
757+
758+ // Single whitespace characters are preserved as-is
759+ assert_eq ! ( collapse_whitespace( "\n " ) , "\n " ) ;
760+ assert_eq ! ( collapse_whitespace( "a\n b" ) , "a\n b" ) ;
761+ assert_eq ! ( collapse_whitespace( " " ) , " " ) ;
762+ assert_eq ! ( collapse_whitespace( "\t " ) , "\t " ) ;
763+ assert_eq ! ( collapse_whitespace( "a b" ) , "a b" ) ;
728764 }
729765
730766 #[ test]
0 commit comments