Skip to content

Commit 72fb262

Browse files
Brooooooklynclaude
andcommitted
fix(angular): resolve e2e comparison mismatches
Fix multiple compiler output divergences from Angular TS: - fix(variable_optimization): handle Oxc's split handler_expression architecture in optimizeSaveRestoreView, removing unnecessary restoreView/resetView wrapping - fix(variable_optimization): reorder optimization steps to match Angular TS (arrow functions and listener handlers before create/update ops) - fix(variable_optimization): include Animation handlers in save/restore optimization - fix(emitter): emit non-ASCII characters as raw UTF-8 instead of \uNNNN escapes - fix(ordering): add OpKind::Control to update op ordering phase (priority 8, last) - fix(reify): add missing name string literal as second argument to ɵɵcontrol() - fix(emit): emit host binding pool constants (pure functions) alongside template declarations, matching Angular TS's shared ConstantPool behavior - fix(entities): use greedy &-to-; matching in decode_entities_in_string to replicate Angular TS's /&([^;]+);/g regex behavior Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 39174a8 commit 72fb262

File tree

13 files changed

+695
-170
lines changed

13 files changed

+695
-170
lines changed

crates/oxc_angular_compiler/src/component/transform.rs

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,10 +1569,20 @@ fn compile_component_full<'a>(
15691569
compile_component_host_bindings(allocator, metadata, template_pool_index);
15701570

15711571
// Extract the result and update pool index if host bindings were compiled
1572-
let (host_binding_result, host_binding_next_pool_index) = match host_binding_output {
1573-
Some(output) => (Some(output.result), Some(output.next_pool_index)),
1574-
None => (None, None),
1575-
};
1572+
let (host_binding_result, host_binding_next_pool_index, host_binding_declarations) =
1573+
match host_binding_output {
1574+
Some(output) => {
1575+
let declarations = output.result.declarations;
1576+
let result = HostBindingCompilationResult {
1577+
host_binding_fn: output.result.host_binding_fn,
1578+
host_attrs: output.result.host_attrs,
1579+
host_vars: output.result.host_vars,
1580+
declarations: OxcVec::new_in(allocator),
1581+
};
1582+
(Some(result), Some(output.next_pool_index), declarations)
1583+
}
1584+
None => (None, None, OxcVec::new_in(allocator)),
1585+
};
15761586

15771587
// Stage 7: Generate ɵcmp/ɵfac definitions
15781588
// The namespace registry is shared across all components in the file to ensure
@@ -1599,6 +1609,11 @@ fn compile_component_full<'a>(
15991609
declarations_js.push_str(&emitter.emit_statement(decl));
16001610
declarations_js.push('\n');
16011611
}
1612+
// Emit host binding declarations (pooled constants like pure functions)
1613+
for decl in host_binding_declarations.iter() {
1614+
declarations_js.push_str(&emitter.emit_statement(decl));
1615+
declarations_js.push('\n');
1616+
}
16021617

16031618
// For HMR, we emit the template separately using compile_template_to_js
16041619
// The ɵcmp already contains the template function inline

crates/oxc_angular_compiler/src/output/emitter.rs

Lines changed: 41 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,11 +1234,10 @@ fn is_nullish_coalesce(expr: &OutputExpression<'_>) -> bool {
12341234
/// Escape a string for JavaScript output.
12351235
///
12361236
/// Uses double quotes to match Angular's output style.
1237-
/// Escapes `"`, `\`, `\n`, `\r`, `$` (when requested), ASCII control characters,
1238-
/// and all non-ASCII characters (code point > 0x7E) as `\uNNNN` sequences.
1239-
/// Characters above the BMP (U+10000+) are encoded as UTF-16 surrogate pairs
1240-
/// (`\uXXXX\uXXXX`). This matches TypeScript's emitter behavior, which escapes
1241-
/// non-ASCII characters in string literals.
1237+
/// Escapes `"`, `\`, `\n`, `\r`, `$` (when requested), and ASCII control characters
1238+
/// as `\uNNNN` sequences. Non-ASCII characters (code point > 0x7E) are emitted as
1239+
/// raw UTF-8 to match Angular's TypeScript emitter behavior (see `escapeIdentifier`
1240+
/// in `abstract_emitter.ts`), which only escapes `'`, `\`, `\n`, `\r`, and `$`.
12421241
pub(crate) fn escape_string(input: &str, escape_dollar: bool) -> String {
12431242
let mut result = String::with_capacity(input.len() + 2);
12441243
result.push('"');
@@ -1251,18 +1250,14 @@ pub(crate) fn escape_string(input: &str, escape_dollar: bool) -> String {
12511250
'$' if escape_dollar => result.push_str("\\$"),
12521251
// ASCII printable characters (0x20-0x7E) are emitted literally
12531252
c if (' '..='\x7E').contains(&c) => result.push(c),
1254-
// Everything else (ASCII control chars, non-ASCII) is escaped as \uNNNN.
1255-
// Characters above the BMP are encoded as UTF-16 surrogate pairs.
1253+
// DEL (0x7F) is an ASCII control character and must be escaped
1254+
'\x7F' => push_unicode_escape(&mut result, 0x7F),
1255+
// Non-ASCII characters (> 0x7F) are emitted as raw UTF-8 to match
1256+
// Angular's TypeScript emitter, which does not escape them.
1257+
c if (c as u32) > 0x7F => result.push(c),
1258+
// ASCII control characters (0x00-0x1F) are escaped as \uNNNN.
12561259
c => {
1257-
let code = c as u32;
1258-
if code <= 0xFFFF {
1259-
push_unicode_escape(&mut result, code);
1260-
} else {
1261-
let hi = 0xD800 + ((code - 0x10000) >> 10);
1262-
let lo = 0xDC00 + ((code - 0x10000) & 0x3FF);
1263-
push_unicode_escape(&mut result, hi);
1264-
push_unicode_escape(&mut result, lo);
1265-
}
1260+
push_unicode_escape(&mut result, c as u32);
12661261
}
12671262
}
12681263
}
@@ -1514,35 +1509,35 @@ mod tests {
15141509

15151510
#[test]
15161511
fn test_escape_string_unicode_literals() {
1517-
// Non-ASCII characters should be escaped as \uNNNN to match
1518-
// TypeScript's emitter behavior.
1512+
// Non-ASCII characters should be emitted as raw UTF-8 to match
1513+
// Angular's TypeScript emitter behavior (escapeIdentifier in abstract_emitter.ts).
15191514

1520-
// &times; (multiplication sign U+00D7) -> \u00D7
1521-
assert_eq!(escape_string("\u{00D7}", false), "\"\\u00D7\"");
1515+
// &times; (multiplication sign U+00D7) -> raw UTF-8
1516+
assert_eq!(escape_string("\u{00D7}", false), "\"\u{00D7}\"");
15221517

1523-
// &nbsp; (non-breaking space U+00A0) -> \u00A0
1524-
assert_eq!(escape_string("\u{00A0}", false), "\"\\u00A0\"");
1518+
// &nbsp; (non-breaking space U+00A0) -> raw UTF-8
1519+
assert_eq!(escape_string("\u{00A0}", false), "\"\u{00A0}\"");
15251520

15261521
// Mixed ASCII and non-ASCII
1527-
assert_eq!(escape_string("a\u{00D7}b", false), "\"a\\u00D7b\"");
1522+
assert_eq!(escape_string("a\u{00D7}b", false), "\"a\u{00D7}b\"");
15281523

15291524
// Multiple non-ASCII characters
1530-
assert_eq!(escape_string("\u{00D7}\u{00A0}", false), "\"\\u00D7\\u00A0\"");
1525+
assert_eq!(escape_string("\u{00D7}\u{00A0}", false), "\"\u{00D7}\u{00A0}\"");
15311526

1532-
// Characters outside BMP (emoji) -> surrogate pair
1533-
assert_eq!(escape_string("\u{1F600}", false), "\"\\uD83D\\uDE00\"");
1527+
// Characters outside BMP (emoji) -> raw UTF-8
1528+
assert_eq!(escape_string("\u{1F600}", false), "\"\u{1F600}\"");
15341529

1535-
// Common HTML entities -> all escaped as \uNNNN
1536-
assert_eq!(escape_string("\u{00A9}", false), "\"\\u00A9\""); // &copy; ©
1537-
assert_eq!(escape_string("\u{00AE}", false), "\"\\u00AE\""); // &reg; ®
1538-
assert_eq!(escape_string("\u{2014}", false), "\"\\u2014\""); // &mdash; —
1539-
assert_eq!(escape_string("\u{2013}", false), "\"\\u2013\""); // &ndash; –
1530+
// Common HTML entities -> all emitted as raw UTF-8
1531+
assert_eq!(escape_string("\u{00A9}", false), "\"\u{00A9}\""); // &copy; ©
1532+
assert_eq!(escape_string("\u{00AE}", false), "\"\u{00AE}\""); // &reg; ®
1533+
assert_eq!(escape_string("\u{2014}", false), "\"\u{2014}\""); // &mdash; —
1534+
assert_eq!(escape_string("\u{2013}", false), "\"\u{2013}\""); // &ndash; –
15401535

15411536
// Greek letter alpha
1542-
assert_eq!(escape_string("\u{03B1}", false), "\"\\u03B1\""); // α
1537+
assert_eq!(escape_string("\u{03B1}", false), "\"\u{03B1}\""); // α
15431538

15441539
// Accented Latin letter
1545-
assert_eq!(escape_string("\u{00E9}", false), "\"\\u00E9\""); // é
1540+
assert_eq!(escape_string("\u{00E9}", false), "\"\u{00E9}\""); // é
15461541
}
15471542

15481543
#[test]
@@ -1561,34 +1556,33 @@ mod tests {
15611556
}
15621557

15631558
#[test]
1564-
fn test_escape_string_non_ascii_as_unicode_escapes() {
1565-
// Non-ASCII characters should be escaped as \uNNNN to match
1566-
// TypeScript's emitter behavior (which escapes non-ASCII in string literals).
1559+
fn test_escape_string_non_ascii_as_raw_utf8() {
1560+
// Non-ASCII characters should be emitted as raw UTF-8 to match
1561+
// Angular's TypeScript emitter behavior (escapeIdentifier in abstract_emitter.ts).
15671562

15681563
// Non-breaking space U+00A0
1569-
assert_eq!(escape_string("\u{00A0}", false), "\"\\u00A0\"");
1564+
assert_eq!(escape_string("\u{00A0}", false), "\"\u{00A0}\"");
15701565

15711566
// En dash U+2013
1572-
assert_eq!(escape_string("\u{2013}", false), "\"\\u2013\"");
1567+
assert_eq!(escape_string("\u{2013}", false), "\"\u{2013}\"");
15731568

15741569
// Trademark U+2122
1575-
assert_eq!(escape_string("\u{2122}", false), "\"\\u2122\"");
1570+
assert_eq!(escape_string("\u{2122}", false), "\"\u{2122}\"");
15761571

15771572
// Infinity U+221E
1578-
assert_eq!(escape_string("\u{221E}", false), "\"\\u221E\"");
1573+
assert_eq!(escape_string("\u{221E}", false), "\"\u{221E}\"");
15791574

15801575
// Mixed ASCII and non-ASCII
1581-
assert_eq!(escape_string("a\u{00D7}b", false), "\"a\\u00D7b\"");
1576+
assert_eq!(escape_string("a\u{00D7}b", false), "\"a\u{00D7}b\"");
15821577

15831578
// Multiple non-ASCII characters
1584-
assert_eq!(escape_string("\u{00D7}\u{00A0}", false), "\"\\u00D7\\u00A0\"");
1579+
assert_eq!(escape_string("\u{00D7}\u{00A0}", false), "\"\u{00D7}\u{00A0}\"");
15851580

1586-
// Characters above BMP should use surrogate pairs
1587-
// U+1F600 (grinning face) = surrogate pair D83D DE00
1588-
assert_eq!(escape_string("\u{1F600}", false), "\"\\uD83D\\uDE00\"");
1581+
// Characters above BMP (emoji) -> raw UTF-8
1582+
assert_eq!(escape_string("\u{1F600}", false), "\"\u{1F600}\"");
15891583

1590-
// U+10000 (first supplementary char) = surrogate pair D800 DC00
1591-
assert_eq!(escape_string("\u{10000}", false), "\"\\uD800\\uDC00\"");
1584+
// U+10000 (first supplementary char) -> raw UTF-8
1585+
assert_eq!(escape_string("\u{10000}", false), "\"\u{10000}\"");
15921586

15931587
// ASCII printable chars (0x20-0x7E) should remain literal
15941588
assert_eq!(escape_string(" ~", false), "\" ~\"");

crates/oxc_angular_compiler/src/parser/html/entities.rs

Lines changed: 90 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -50,42 +50,55 @@ pub fn decode_entity(entity: &str) -> Option<String> {
5050
/// For backward compatibility (matching Angular's behavior), this decodes
5151
/// HTML entities that appear in interpolation expressions.
5252
///
53-
/// Pattern: `&entity;` where entity can be:
54-
/// - Named: `amp`, `lt`, `gt`, `quot`, etc.
55-
/// - Decimal: `#123`
56-
/// - Hex: `#x7B` or `#X7B`
53+
/// Uses greedy matching to replicate Angular TS's `/&([^;]+);/g` regex behavior
54+
/// in `parser.ts` line 365. The regex matches from `&` to the first `;` with
55+
/// ANY characters in between. When decoding fails, the regex cursor advances
56+
/// past the `;`, which means earlier `&` characters can "consume" later `;`
57+
/// characters and prevent entity decoding.
58+
///
59+
/// Example: `foo && bar ? '&infin;' : baz` — the regex matches from the first
60+
/// `&` of `&&` all the way to the `;` in `&infin;`, creating one invalid match.
61+
/// The entity is not decoded because the entire span is consumed.
5762
pub fn decode_entities_in_string(s: &str) -> String {
5863
let mut result = String::with_capacity(s.len());
5964
let mut chars = s.char_indices().peekable();
6065

6166
while let Some((i, ch)) = chars.next() {
6267
if ch == '&' {
63-
// Look for entity pattern: &...;
64-
let mut entity_end = None;
68+
// Greedy match: find the first ';' (mimics /&([^;]+);/g)
6569
let remaining = &s[i..];
70+
let mut entity_end_byte = None;
71+
let mut chars_after_ampersand = 0u32;
6672

67-
// Find the semicolon
73+
// Skip the '&' at j=0, find the first ';'
6874
for (j, c) in remaining.char_indices() {
69-
if c == ';' {
70-
entity_end = Some(j);
71-
break;
72-
}
73-
// Entities shouldn't be too long, and should contain valid chars
74-
if j > 32 || (!c.is_ascii_alphanumeric() && c != '#' && c != '&') {
75-
break;
75+
if j > 0 {
76+
chars_after_ampersand += 1;
77+
if c == ';' {
78+
entity_end_byte = Some(j);
79+
break;
80+
}
7681
}
7782
}
7883

79-
if let Some(end) = entity_end {
84+
if let Some(end) = entity_end_byte {
8085
let entity = &remaining[..=end];
8186
if let Some(decoded) = decode_entity(entity) {
8287
result.push_str(&decoded);
83-
// Skip past the entity in the input
84-
for _ in 0..end {
88+
// Skip past the entity in the outer iterator.
89+
// Use char count (not byte offset) since .next() advances by character.
90+
for _ in 0..chars_after_ampersand {
8591
chars.next();
8692
}
8793
continue;
8894
}
95+
// Failed to decode: push the full matched text and advance past ';'
96+
// This matches regex behavior where the cursor advances past the match
97+
result.push_str(entity);
98+
for _ in 0..chars_after_ampersand {
99+
chars.next();
100+
}
101+
continue;
89102
}
90103
}
91104
result.push(ch);
@@ -2277,4 +2290,64 @@ mod tests {
22772290
// "Afr" maps to Mathematical Fraktur A (U+1D504)
22782291
assert!(entities.contains_key("Afr"));
22792292
}
2293+
2294+
#[test]
2295+
fn test_decode_entities_in_string_basic() {
2296+
assert_eq!(decode_entities_in_string("hello &amp; world"), "hello & world");
2297+
assert_eq!(decode_entities_in_string("&lt;div&gt;"), "<div>");
2298+
assert_eq!(decode_entities_in_string("no entities here"), "no entities here");
2299+
}
2300+
2301+
#[test]
2302+
fn test_decode_entities_greedy_matching() {
2303+
// Angular TS regex /&([^;]+);/g greedily matches from && to &infin;'s semicolon,
2304+
// consuming the entire span as one failed match — so &infin; is NOT decoded.
2305+
let input = "foo && bar ? '&infin;' : baz";
2306+
assert_eq!(
2307+
decode_entities_in_string(input),
2308+
"foo && bar ? '&infin;' : baz",
2309+
"greedy match from && should prevent &infin; decoding"
2310+
);
2311+
}
2312+
2313+
#[test]
2314+
fn test_decode_entities_multibyte_between_ampersand_and_semicolon() {
2315+
// When multi-byte characters appear between & and ;, the skip count must use
2316+
// character count (not byte offset) to avoid over-consuming from the iterator.
2317+
// 'café' has 4 chars but 5 bytes (é is 2 bytes in UTF-8).
2318+
// &café; is not a valid entity, so the full span is pushed as-is.
2319+
// The text after the semicolon ("rest") must be preserved.
2320+
let input = "&café;rest";
2321+
assert_eq!(
2322+
decode_entities_in_string(input),
2323+
"&café;rest",
2324+
"multi-byte chars between & and ; must not cause data loss after ;"
2325+
);
2326+
}
2327+
2328+
#[test]
2329+
fn test_decode_entities_multibyte_cjk_between_ampersand_and_semicolon() {
2330+
// CJK characters are 3 bytes each in UTF-8.
2331+
// &日本; has 3 chars between & and ; but 7 bytes.
2332+
// Using byte offset as skip count would consume 7 chars instead of 3,
2333+
// eating into " after" and silently dropping characters.
2334+
let input = "&日本;after";
2335+
assert_eq!(
2336+
decode_entities_in_string(input),
2337+
"&日本;after",
2338+
"CJK chars between & and ; must not cause data loss"
2339+
);
2340+
}
2341+
2342+
#[test]
2343+
fn test_decode_entities_emoji_between_ampersand_and_semicolon() {
2344+
// Emoji are 4 bytes each in UTF-8.
2345+
// &😀; byte offset of ; is 5, but only 1 char to skip after &.
2346+
let input = "&😀;tail";
2347+
assert_eq!(
2348+
decode_entities_in_string(input),
2349+
"&😀;tail",
2350+
"4-byte emoji between & and ; must not cause data loss"
2351+
);
2352+
}
22802353
}

crates/oxc_angular_compiler/src/pipeline/emit.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1525,6 +1525,10 @@ pub struct HostBindingCompilationResult<'a> {
15251525
/// Number of host variables for change detection.
15261526
/// Only set if > 0.
15271527
pub host_vars: Option<u32>,
1528+
/// Additional declarations (pooled constants like pure functions).
1529+
/// In Angular TS, template and host binding share the same ConstantPool,
1530+
/// so host binding constants get emitted alongside template constants.
1531+
pub declarations: OxcVec<'a, OutputStatement<'a>>,
15281532
}
15291533

15301534
/// Compile host bindings from start to finish.
@@ -1538,6 +1542,10 @@ pub struct HostBindingCompilationResult<'a> {
15381542
pub fn compile_host_bindings<'a>(
15391543
job: &mut HostBindingCompilationJob<'a>,
15401544
) -> HostBindingCompilationResult<'a> {
1545+
use crate::output::ast::DeclareVarStmt;
1546+
1547+
let allocator = job.allocator;
1548+
15411549
// Run all transformation phases for host bindings
15421550
transform_host(job);
15431551

@@ -1553,7 +1561,28 @@ pub fn compile_host_bindings<'a>(
15531561
let host_attrs = job.root.attributes.take();
15541562
let host_vars = job.root.vars.filter(|&v| v > 0);
15551563

1556-
HostBindingCompilationResult { host_binding_fn, host_attrs, host_vars }
1564+
// Collect declarations from host binding pool constants.
1565+
// In Angular TS, template and host binding share the same ConstantPool,
1566+
// so host binding pure functions get emitted alongside template constants.
1567+
let mut declarations = OxcVec::new_in(allocator);
1568+
for constant in job.pool.constants_mut() {
1569+
let value = emit_pooled_constant_value(allocator, &mut constant.kind);
1570+
declarations.push(OutputStatement::DeclareVar(Box::new_in(
1571+
DeclareVarStmt {
1572+
name: constant.name.clone(),
1573+
value: Some(value),
1574+
modifiers: StmtModifier::FINAL,
1575+
leading_comment: None,
1576+
source_span: None,
1577+
},
1578+
allocator,
1579+
)));
1580+
}
1581+
for stmt in job.pool.statements.iter() {
1582+
declarations.push(clone_output_statement(stmt, allocator));
1583+
}
1584+
1585+
HostBindingCompilationResult { host_binding_fn, host_attrs, host_vars, declarations }
15571586
}
15581587

15591588
/// Converts an IR binary operator to an output binary operator.

crates/oxc_angular_compiler/src/pipeline/phases/ordering.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ fn is_handled_update_op_kind(kind: OpKind) -> bool {
2727
| OpKind::TwoWayProperty
2828
| OpKind::DomProperty
2929
| OpKind::Attribute
30+
| OpKind::Control
3031
)
3132
}
3233

@@ -64,8 +65,10 @@ fn update_op_priority(op: &UpdateOp<'_>) -> u32 {
6465
(OpKind::TwoWayProperty, _) => 6, // Non-interpolation TwoWayProperty
6566
(OpKind::Property, false) => 6, // Non-interpolation Property
6667
(OpKind::Attribute, false) => 7, // Attribute without interpolation
68+
// Control comes last per Angular's UPDATE_ORDERING (line 69 of ordering.ts)
69+
(OpKind::Control, _) => 8,
6770
// DomProperty is not used in template bindings, but include for completeness
68-
(OpKind::DomProperty, _) => 8,
71+
(OpKind::DomProperty, _) => 9,
6972
_ => 100, // Other ops go last
7073
}
7174
}
@@ -124,6 +127,7 @@ fn get_update_op_target(op: &UpdateOp<'_>) -> Option<XrefId> {
124127
UpdateOp::ClassMap(p) => Some(p.target),
125128
UpdateOp::Attribute(p) => Some(p.target),
126129
UpdateOp::DomProperty(p) => Some(p.target),
130+
UpdateOp::Control(c) => Some(c.target),
127131
_ => None,
128132
}
129133
}

0 commit comments

Comments
 (0)