Skip to content

Commit b47f06f

Browse files
authored
Unicode whitespace stripping in string literal line continuation (#6860)
* Unicode whitespace stripping in string literal line continuation * Added explanation why char::is_whitespace is not used
1 parent e59ad98 commit b47f06f

3 files changed

Lines changed: 20 additions & 1 deletion

File tree

src/string.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,16 @@ fn is_new_line(grapheme: &str) -> bool {
360360
}
361361

362362
fn is_whitespace(grapheme: &str) -> bool {
363-
grapheme.chars().all(char::is_whitespace)
363+
// We explicitly match these characters instead of using char::is_whitespace
364+
// because char::is_whitespace uses Unicode White_Space which is broader
365+
// than the Rust language's definition of whitespace. For example it would
366+
// also match \u{A0} (non-breaking space). \x0B (vertical tab) and \x0C
367+
// (form feed) are included here because the Rust language defines them
368+
// as whitespace, but is_ascii_whitespace excludes them.
369+
370+
grapheme
371+
.chars()
372+
.all(|c| matches!(c, ' ' | '\t' | '\n' | '\r' | '\x0B' | '\x0C'))
364373
}
365374

366375
fn is_punctuation(grapheme: &str) -> bool {
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// Test Unicode whitespace characters in string literal line continuation
2+
fn main() {
3+
let str = "hello \
4+
 world";
5+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// Test Unicode whitespace characters in string literal line continuation
2+
fn main() {
3+
let str = "hello \
4+
 world";
5+
}

0 commit comments

Comments
 (0)