@@ -1227,9 +1227,11 @@ fn is_nullish_coalesce(expr: &OutputExpression<'_>) -> bool {
12271227/// Escape a string for JavaScript output.
12281228///
12291229/// Uses double quotes to match Angular's output style.
1230- /// Only escapes control characters (`"`, `\`, `\n`, `\r`, and `$` when requested).
1231- /// Non-ASCII printable characters (e.g. `×`, `é`, `α`) are emitted as literal UTF-8,
1232- /// matching Angular's `escapeIdentifier` behavior.
1230+ /// Escapes `"`, `\`, `\n`, `\r`, `$` (when requested), ASCII control characters,
1231+ /// and all non-ASCII characters (code point > 0x7E) as `\uNNNN` sequences.
1232+ /// Characters above the BMP (U+10000+) are encoded as UTF-16 surrogate pairs
1233+ /// (`\uXXXX\uXXXX`). This matches TypeScript's emitter behavior, which escapes
1234+ /// non-ASCII characters in string literals.
12331235fn escape_string ( input : & str , escape_dollar : bool ) -> String {
12341236 let mut result = String :: with_capacity ( input. len ( ) + 2 ) ;
12351237 result. push ( '"' ) ;
@@ -1240,19 +1242,37 @@ fn escape_string(input: &str, escape_dollar: bool) -> String {
12401242 '\n' => result. push_str ( "\\ n" ) ,
12411243 '\r' => result. push_str ( "\\ r" ) ,
12421244 '$' if escape_dollar => result. push_str ( "\\ $" ) ,
1243- // Escape ASCII control characters (0x00-0x1F, 0x7F) other than \n and \r
1244- c if c. is_ascii_control ( ) => {
1245+ // ASCII printable characters (0x20-0x7E) are emitted literally
1246+ c if ( ' ' ..='\x7E' ) . contains ( & c) => result. push ( c) ,
1247+ // Everything else (ASCII control chars, non-ASCII) is escaped as \uNNNN.
1248+ // Characters above the BMP are encoded as UTF-16 surrogate pairs.
1249+ c => {
12451250 let code = c as u32 ;
1246- result. push_str ( & format ! ( "\\ u{code:04X}" ) ) ;
1251+ if code <= 0xFFFF {
1252+ push_unicode_escape ( & mut result, code) ;
1253+ } else {
1254+ let hi = 0xD800 + ( ( code - 0x10000 ) >> 10 ) ;
1255+ let lo = 0xDC00 + ( ( code - 0x10000 ) & 0x3FF ) ;
1256+ push_unicode_escape ( & mut result, hi) ;
1257+ push_unicode_escape ( & mut result, lo) ;
1258+ }
12471259 }
1248- // All other characters (including non-ASCII printable) are emitted literally
1249- _ => result. push ( c) ,
12501260 }
12511261 }
12521262 result. push ( '"' ) ;
12531263 result
12541264}
12551265
1266+ /// Push a `\uXXXX` escape sequence for a 16-bit code unit.
1267+ fn push_unicode_escape ( buf : & mut String , code : u32 ) {
1268+ const HEX : & [ u8 ; 16 ] = b"0123456789ABCDEF" ;
1269+ buf. push_str ( "\\ u" ) ;
1270+ buf. push ( HEX [ ( ( code >> 12 ) & 0xF ) as usize ] as char ) ;
1271+ buf. push ( HEX [ ( ( code >> 8 ) & 0xF ) as usize ] as char ) ;
1272+ buf. push ( HEX [ ( ( code >> 4 ) & 0xF ) as usize ] as char ) ;
1273+ buf. push ( HEX [ ( code & 0xF ) as usize ] as char ) ;
1274+ }
1275+
12561276/// Escape an identifier for use as a property key.
12571277fn escape_identifier ( input : & Atom < ' _ > , escape_dollar : bool , always_quote : bool ) -> String {
12581278 // Check if the identifier is a valid JavaScript identifier
@@ -1487,35 +1507,35 @@ mod tests {
14871507
14881508 #[ test]
14891509 fn test_escape_string_unicode_literals ( ) {
1490- // Non-ASCII printable characters should be emitted as literal UTF-8,
1491- // matching Angular 's escapeIdentifier behavior.
1510+ // Non-ASCII characters should be escaped as \uNNNN to match
1511+ // TypeScript 's emitter behavior.
14921512
1493- // × (multiplication sign U+00D7) -> literal ×
1494- assert_eq ! ( escape_string( "\u{00D7} " , false ) , "\" \u{00D7} \" " ) ;
1513+ // × (multiplication sign U+00D7) -> \u00D7
1514+ assert_eq ! ( escape_string( "\u{00D7} " , false ) , "\" \\ u00D7 \" " ) ;
14951515
1496- // (non-breaking space U+00A0) -> literal
1497- assert_eq ! ( escape_string( "\u{00A0} " , false ) , "\" \u{00A0} \" " ) ;
1516+ // (non-breaking space U+00A0) -> \u00A0
1517+ assert_eq ! ( escape_string( "\u{00A0} " , false ) , "\" \\ u00A0 \" " ) ;
14981518
14991519 // Mixed ASCII and non-ASCII
1500- assert_eq ! ( escape_string( "a\u{00D7} b" , false ) , "\" a\u{00D7} b \" " ) ;
1520+ assert_eq ! ( escape_string( "a\u{00D7} b" , false ) , "\" a\\ u00D7b \" " ) ;
15011521
15021522 // Multiple non-ASCII characters
1503- assert_eq ! ( escape_string( "\u{00D7} \u{00A0} " , false ) , "\" \u{00D7} \u{00A0} \" " ) ;
1523+ assert_eq ! ( escape_string( "\u{00D7} \u{00A0} " , false ) , "\" \\ u00D7 \\ u00A0 \" " ) ;
15041524
1505- // Characters outside BMP (emoji) -> emitted literally
1506- assert_eq ! ( escape_string( "\u{1F600} " , false ) , "\" \u{1F600} \" " ) ;
1525+ // Characters outside BMP (emoji) -> surrogate pair
1526+ assert_eq ! ( escape_string( "\u{1F600} " , false ) , "\" \\ uD83D \\ uDE00 \" " ) ;
15071527
1508- // Common HTML entities -> all emitted literally
1509- assert_eq ! ( escape_string( "\u{00A9} " , false ) , "\" \u{00A9} \" " ) ; // © ©
1510- assert_eq ! ( escape_string( "\u{00AE} " , false ) , "\" \u{00AE} \" " ) ; // ® ®
1511- assert_eq ! ( escape_string( "\u{2014} " , false ) , "\" \u{2014} \" " ) ; // — —
1512- assert_eq ! ( escape_string( "\u{2013} " , false ) , "\" \u{2013} \" " ) ; // – –
1528+ // Common HTML entities -> all escaped as \uNNNN
1529+ assert_eq ! ( escape_string( "\u{00A9} " , false ) , "\" \\ u00A9 \" " ) ; // © ©
1530+ assert_eq ! ( escape_string( "\u{00AE} " , false ) , "\" \\ u00AE \" " ) ; // ® ®
1531+ assert_eq ! ( escape_string( "\u{2014} " , false ) , "\" \\ u2014 \" " ) ; // — —
1532+ assert_eq ! ( escape_string( "\u{2013} " , false ) , "\" \\ u2013 \" " ) ; // – –
15131533
15141534 // Greek letter alpha
1515- assert_eq ! ( escape_string( "\u{03B1} " , false ) , "\" \u{03B1} \" " ) ; // α
1535+ assert_eq ! ( escape_string( "\u{03B1} " , false ) , "\" \\ u03B1 \" " ) ; // α
15161536
15171537 // Accented Latin letter
1518- assert_eq ! ( escape_string( "\u{00E9} " , false ) , "\" \u{00E9} \" " ) ; // é
1538+ assert_eq ! ( escape_string( "\u{00E9} " , false ) , "\" \\ u00E9 \" " ) ; // é
15191539 }
15201540
15211541 #[ test]
@@ -1533,6 +1553,41 @@ mod tests {
15331553 assert_eq ! ( escape_string( "\r " , false ) , "\" \\ r\" " ) ;
15341554 }
15351555
1556+ #[ test]
1557+ fn test_escape_string_non_ascii_as_unicode_escapes ( ) {
1558+ // Non-ASCII characters should be escaped as \uNNNN to match
1559+ // TypeScript's emitter behavior (which escapes non-ASCII in string literals).
1560+
1561+ // Non-breaking space U+00A0
1562+ assert_eq ! ( escape_string( "\u{00A0} " , false ) , "\" \\ u00A0\" " ) ;
1563+
1564+ // En dash U+2013
1565+ assert_eq ! ( escape_string( "\u{2013} " , false ) , "\" \\ u2013\" " ) ;
1566+
1567+ // Trademark U+2122
1568+ assert_eq ! ( escape_string( "\u{2122} " , false ) , "\" \\ u2122\" " ) ;
1569+
1570+ // Infinity U+221E
1571+ assert_eq ! ( escape_string( "\u{221E} " , false ) , "\" \\ u221E\" " ) ;
1572+
1573+ // Mixed ASCII and non-ASCII
1574+ assert_eq ! ( escape_string( "a\u{00D7} b" , false ) , "\" a\\ u00D7b\" " ) ;
1575+
1576+ // Multiple non-ASCII characters
1577+ assert_eq ! ( escape_string( "\u{00D7} \u{00A0} " , false ) , "\" \\ u00D7\\ u00A0\" " ) ;
1578+
1579+ // Characters above BMP should use surrogate pairs
1580+ // U+1F600 (grinning face) = surrogate pair D83D DE00
1581+ assert_eq ! ( escape_string( "\u{1F600} " , false ) , "\" \\ uD83D\\ uDE00\" " ) ;
1582+
1583+ // U+10000 (first supplementary char) = surrogate pair D800 DC00
1584+ assert_eq ! ( escape_string( "\u{10000} " , false ) , "\" \\ uD800\\ uDC00\" " ) ;
1585+
1586+ // ASCII printable chars (0x20-0x7E) should remain literal
1587+ assert_eq ! ( escape_string( " ~" , false ) , "\" ~\" " ) ;
1588+ assert_eq ! ( escape_string( "abc123!@#" , false ) , "\" abc123!@#\" " ) ;
1589+ }
1590+
15361591 // ========================================================================
15371592 // Source Map Tests
15381593 // ========================================================================
0 commit comments