Skip to content

Commit d12f9ea

Browse files
authored
Merge pull request lightningdevkit#4593 from tnull/2026-05-printable-string-bidi
Strip Unicode `Cf` characters in `PrintableString`
2 parents a939148 + 1a01b5a commit d12f9ea

1 file changed

Lines changed: 58 additions & 1 deletion

File tree

lightning-types/src/string.rs

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,51 @@ impl<'a> fmt::Display for PrintableString<'a> {
3131
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
3232
use core::fmt::Write;
3333
for c in self.0.chars() {
34-
let c = if c.is_control() { core::char::REPLACEMENT_CHARACTER } else { c };
34+
let c = if c.is_control() || is_format_char(c) {
35+
core::char::REPLACEMENT_CHARACTER
36+
} else {
37+
c
38+
};
3539
f.write_char(c)?;
3640
}
3741

3842
Ok(())
3943
}
4044
}
4145

46+
// Codepoints in Unicode general category `Cf` (Format), per Unicode standard. These are not
47+
// matched by `char::is_control` (which only covers `Cc`), but include the bidirectional override /
48+
// isolate controls (e.g. U+202E RLO) and zero-width characters behind the "Trojan Source" attack
49+
// family (CVE-2021-42574), where an attacker-supplied string renders to a human reader as
50+
// something other than its byte content. Strip them alongside `Cc` characters when sanitising
51+
// untrusted input.
52+
fn is_format_char(c: char) -> bool {
53+
matches!(
54+
c as u32,
55+
0x00AD
56+
| 0x0600..=0x0605
57+
| 0x061C
58+
| 0x06DD
59+
| 0x070F
60+
| 0x0890..=0x0891
61+
| 0x08E2
62+
| 0x180E
63+
| 0x200B..=0x200F
64+
| 0x202A..=0x202E
65+
| 0x2060..=0x2064
66+
| 0x2066..=0x206F
67+
| 0xFEFF
68+
| 0xFFF9..=0xFFFB
69+
| 0x110BD
70+
| 0x110CD
71+
| 0x13430..=0x1343F
72+
| 0x1BCA0..=0x1BCA3
73+
| 0x1D173..=0x1D17A
74+
| 0xE0001
75+
| 0xE0020..=0xE007F
76+
)
77+
}
78+
4279
#[cfg(test)]
4380
mod tests {
4481
use super::PrintableString;
@@ -50,4 +87,24 @@ mod tests {
5087
"I \u{1F496} LDK!\u{FFFD}\u{26A1}",
5188
);
5289
}
90+
91+
#[test]
92+
fn sanitizes_unicode_bidi_override_characters() {
93+
// U+202E RIGHT-TO-LEFT OVERRIDE and friends are Unicode general category
94+
// `Cf` (Format), not `Cc` (Control). They enable "Trojan Source" /
95+
// bidi-spoofing attacks where an attacker-supplied string (e.g. a node
96+
// alias gossiped from a peer) renders to a human reader as something
97+
// other than its byte content. `PrintableString` is the sanitiser used
98+
// for exactly these untrusted strings, so it must replace them.
99+
let rendered = format!("{}", PrintableString("safe\u{202E}cipsxe.exe"));
100+
assert!(
101+
!rendered.contains('\u{202E}'),
102+
"PrintableString left a U+202E RLO override in its output: {:?}",
103+
rendered
104+
);
105+
106+
// U+13440 is in the Egyptian Hieroglyph Format Controls block, but its
107+
// general category is `Mn`, not `Cf`, so the `Cf` range ends at U+1343F.
108+
assert_eq!(format!("{}", PrintableString("x\u{1343F}y\u{13440}z")), "x\u{FFFD}y\u{13440}z");
109+
}
53110
}

0 commit comments

Comments
 (0)