@@ -490,7 +490,7 @@ where
490490
491491 if !dupe {
492492 let ( start_states, re_str) =
493- self . parse_start_states ( i, line[ ..rspace] . trim_end_matches ( matches_whitespace ) ) ?;
493+ self . parse_start_states ( i, trim_end_unescaped ( & line[ ..rspace] ) ) ?;
494494 let rules_len = self . rules . len ( ) ;
495495 let tok_id = LexerTypesT :: StorageT :: try_from ( rules_len)
496496 . unwrap_or_else ( |_| panic ! ( "StorageT::try_from \
@@ -685,6 +685,43 @@ where
685685 }
686686}
687687
688+ fn trim_end_unescaped ( s : & str ) -> & str {
689+ use unicode_width:: UnicodeWidthChar ;
690+ let mut cbuf = [ 0 ; 4 ] ;
691+ let mut initial_space_bytes = 0 ;
692+ let mut last_char_width = 0 ;
693+ // First loop over spaces
694+ for ch in s. chars ( ) . rev ( ) . into_iter ( ) {
695+ if RE_SPACE_SEP . is_match ( ch. encode_utf8 ( & mut cbuf) ) {
696+ last_char_width = ch. width ( ) . unwrap_or ( 0 ) ;
697+ initial_space_bytes += last_char_width;
698+ } else {
699+ break ;
700+ }
701+ }
702+ if initial_space_bytes == 0 {
703+ return s;
704+ }
705+ let mut preceeding_backslashes = 0 ;
706+ // Next loop over escaped slashes or spaces,
707+ // an even number of backslashes are all escaping slashes,
708+ // and an odd number of backslashes must have a trailing escaped space.
709+ for c in s[ ..s. len ( ) - initial_space_bytes] . chars ( ) . rev ( ) {
710+ if c == '\\' {
711+ preceeding_backslashes += 1
712+ } else {
713+ break ;
714+ }
715+ }
716+ // The backslash count was odd, the last must escape a space.
717+ // Drop one of the intial spaces from the trim.
718+ if preceeding_backslashes % 2 == 1 {
719+ initial_space_bytes -= last_char_width;
720+ }
721+
722+ & s[ ..s. len ( ) - initial_space_bytes]
723+ }
724+
688725#[ cfg( test) ]
689726mod test {
690727 use super :: * ;
@@ -1826,4 +1863,37 @@ b "A"
18261863 18 ,
18271864 ) ;
18281865 }
1866+
1867+ #[ test]
1868+ fn unescaped_trim ( ) {
1869+ let escapes = [
1870+ ( r#"\ "# , r#"\ "# ) ,
1871+ ( r#"\ "# , r#"\ "# ) ,
1872+ ( r#"\\ "# , r#"\\"# ) ,
1873+ ( r#"\\ "# , r#"\\"# ) ,
1874+ ( r#"\\\ "# , r#"\\\ "# ) ,
1875+ ( r#"\\\ "# , r#"\\\ "# ) ,
1876+ ( r#"\\\\ "# , r#"\\\\"# ) ,
1877+ ( r#"\\\\ "# , r#"\\\\"# ) ,
1878+ ( r#"x"# , r#"x"# ) ,
1879+ ( r#"x\ "# , r#"x\ "# ) ,
1880+ ( r#"x\ "# , r#"x\ "# ) ,
1881+ ( r#"x\\ "# , r#"x\\"# ) ,
1882+ ( r#"x\\ "# , r#"x\\"# ) ,
1883+ ( r#"x\\\ "# , r#"x\\\ "# ) ,
1884+ ( r#"x\\\ "# , r#"x\\\ "# ) ,
1885+ ( r#"x\\\\ "# , r#"x\\\\"# ) ,
1886+ ( r#"x\\\\ "# , r#"x\\\\"# ) ,
1887+ ( r#"x\ y "# , r#"x\ y"# ) ,
1888+ ( r#"x\ y\ "# , r#"x\ y\ "# ) ,
1889+ ( r#"x\ y\\ "# , r#"x\ y\\"# ) ,
1890+ ( r#"x\ y "# , r#"x\ y"# ) ,
1891+ ( r#"x\ y\ "# , r#"x\ y\ "# ) ,
1892+ ( r#"x\ y\\ "# , r#"x\ y\\"# ) ,
1893+ ] ;
1894+ for ( escaped, expected) in escapes {
1895+ let trimmed = trim_end_unescaped ( escaped) ;
1896+ assert_eq ! ( expected, trimmed)
1897+ }
1898+ }
18291899}
0 commit comments