Skip to content

Commit efc3408

Browse files
committed
Stop trimming escaped spaces off the end regex
1 parent 12067d6 commit efc3408

2 files changed

Lines changed: 67 additions & 1 deletion

File tree

lrlex/src/lib/parser.rs

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,7 @@ where
490490

491491
if !dupe {
492492
let (start_states, re_str) =
493-
self.parse_start_states(i, line[..rspace].trim_end_matches(matches_whitespace))?;
493+
self.parse_start_states(i, trim_end_unescaped(&line[..rspace]))?;
494494
let rules_len = self.rules.len();
495495
let tok_id = LexerTypesT::StorageT::try_from(rules_len)
496496
.unwrap_or_else(|_| panic!("StorageT::try_from \
@@ -685,6 +685,20 @@ where
685685
}
686686
}
687687

688+
fn trim_end_unescaped(s: &str) -> &str {
689+
let trimmed = s.trim_end_matches(matches_whitespace);
690+
if trimmed.len() == s.len() {
691+
return s;
692+
}
693+
// If the number of backslashes is odd then the first space in the trimmed portion is escaped so re-add it.
694+
if trimmed.chars().rev().take_while(|&c| c == '\\').count() % 2 == 1 {
695+
// Panic safety: the trimmed portion is at least one char long.
696+
&s[..trimmed.len() + s[trimmed.len()..].chars().next().unwrap().len_utf8()]
697+
} else {
698+
trimmed
699+
}
700+
}
701+
688702
#[cfg(test)]
689703
mod test {
690704
use super::*;
@@ -1826,4 +1840,37 @@ b "A"
18261840
18,
18271841
);
18281842
}
1843+
1844+
#[test]
1845+
fn unescaped_trim() {
1846+
let escapes = [
1847+
(r#"\ "#, r#"\ "#),
1848+
(r#"\ "#, r#"\ "#),
1849+
(r#"\\ "#, r#"\\"#),
1850+
(r#"\\ "#, r#"\\"#),
1851+
(r#"\\\ "#, r#"\\\ "#),
1852+
(r#"\\\ "#, r#"\\\ "#),
1853+
(r#"\\\\ "#, r#"\\\\"#),
1854+
(r#"\\\\ "#, r#"\\\\"#),
1855+
(r#"x"#, r#"x"#),
1856+
(r#"x\ "#, r#"x\ "#),
1857+
(r#"x\ "#, r#"x\ "#),
1858+
(r#"x\\ "#, r#"x\\"#),
1859+
(r#"x\\ "#, r#"x\\"#),
1860+
(r#"x\\\ "#, r#"x\\\ "#),
1861+
(r#"x\\\ "#, r#"x\\\ "#),
1862+
(r#"x\\\\ "#, r#"x\\\\"#),
1863+
(r#"x\\\\ "#, r#"x\\\\"#),
1864+
(r#"x\ y "#, r#"x\ y"#),
1865+
(r#"x\ y\ "#, r#"x\ y\ "#),
1866+
(r#"x\ y\\ "#, r#"x\ y\\"#),
1867+
(r#"x\ y "#, r#"x\ y"#),
1868+
(r#"x\ y\ "#, r#"x\ y\ "#),
1869+
(r#"x\ y\\ "#, r#"x\ y\\"#),
1870+
];
1871+
for (escaped, expected) in escapes {
1872+
let trimmed = trim_end_unescaped(escaped);
1873+
assert_eq!(expected, trimmed)
1874+
}
1875+
}
18291876
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
name: Test with regex containing trailing ws
2+
grammar: |
3+
%grmtools {
4+
yacckind: Original(YaccOriginalActionKind::NoAction),
5+
recoverer: RecoveryKind::None,
6+
test_files: ["*.input_trailing_ws"],
7+
}
8+
%start Expr
9+
%%
10+
Expr: "trailing";
11+
12+
lexer: |
13+
%%
14+
[a-zA-Z]\ "trailing"
15+
[\n\t] ;
16+
17+
extra_files:
18+
input1.input_trailing_ws: |
19+
a

0 commit comments

Comments
 (0)