Skip to content

Commit 8d11968

Browse files
Brooooooklynclaude
andauthored
fix: preserve multi-byte UTF-8 characters in CSS encapsulation (#192)
The CSS encapsulation code used `bytes[i] as char` to copy characters, which treats each byte of a multi-byte UTF-8 sequence as a separate Latin-1 codepoint. This corrupted non-ASCII characters (e.g. bullet •) that appear after Sass compiles CSS escape sequences like `\2022`. Replace all 13 instances with `push_utf8_char()` which reads the UTF-8 character width from the leading byte and copies the full character. Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 290ef12 commit 8d11968

File tree

2 files changed

+85
-26
lines changed

2 files changed

+85
-26
lines changed

crates/oxc_angular_compiler/src/styles/encapsulation.rs

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,28 @@ const COMMENT_PLACEHOLDER: &str = "%COMMENT%";
3333
const POLYFILL_HOST: &str = "-shadowcsshost";
3434
const POLYFILL_HOST_NO_COMBINATOR: &str = "-shadowcsshost-no-combinator";
3535

36+
/// Push a single UTF-8 character starting at byte position `i` from `source` into `result`.
37+
/// Returns the number of bytes consumed (1 for ASCII, 2-4 for multi-byte).
38+
///
39+
/// This replaces the incorrect `result.push(bytes[i] as char)` pattern which
40+
/// corrupts multi-byte UTF-8 characters by treating each byte as a Latin-1 codepoint.
41+
#[inline]
42+
fn push_utf8_char(result: &mut String, source: &str, i: usize) -> usize {
43+
// Determine UTF-8 character width from the leading byte per RFC 3629.
44+
let b = source.as_bytes()[i];
45+
let width = if b < 0x80 {
46+
1
47+
} else if b < 0xE0 {
48+
2
49+
} else if b < 0xF0 {
50+
3
51+
} else {
52+
4
53+
};
54+
result.push_str(&source[i..i + width]);
55+
width
56+
}
57+
3658
// =============================================================================
3759
// SafeSelector - Escapes problematic CSS patterns before processing
3860
// =============================================================================
@@ -101,8 +123,7 @@ impl SafeSelector {
101123
i += 1;
102124
}
103125
} else {
104-
new_result.push(bytes[i] as char);
105-
i += 1;
126+
i += push_utf8_char(&mut new_result, &result, i);
106127
}
107128
}
108129
result = new_result;
@@ -125,8 +146,7 @@ impl SafeSelector {
125146
new_result.push_str(&placeholder);
126147
i += 2;
127148
} else {
128-
new_result.push(bytes[i] as char);
129-
i += 1;
149+
i += push_utf8_char(&mut new_result, &result, i);
130150
}
131151
}
132152
result = new_result;
@@ -349,8 +369,7 @@ fn extract_comments(css: &str) -> (String, Vec<String>) {
349369

350370
result.push_str(COMMENT_PLACEHOLDER);
351371
} else {
352-
result.push(bytes[i] as char);
353-
i += 1;
372+
i += push_utf8_char(&mut result, css, i);
354373
}
355374
}
356375

@@ -505,8 +524,7 @@ fn scope_keyframes_names(
505524
(name, name_end, None)
506525
} else {
507526
// No valid name found
508-
result.push(bytes[i] as char);
509-
i += 1;
527+
i += push_utf8_char(&mut result, css, i);
510528
continue;
511529
};
512530

@@ -571,8 +589,7 @@ fn scope_keyframes_names(
571589
}
572590
}
573591

574-
result.push(bytes[i] as char);
575-
i += 1;
592+
i += push_utf8_char(&mut result, css, i);
576593
}
577594

578595
result
@@ -627,8 +644,7 @@ fn scope_animation_rules(
627644
|| !css.is_char_boundary(i + value_start)
628645
|| !css.is_char_boundary(i + value_end)
629646
{
630-
result.push(bytes[i] as char);
631-
i += 1;
647+
i += push_utf8_char(&mut result, css, i);
632648
continue;
633649
}
634650
let prefix = &css[i..i + prefix_end];
@@ -655,8 +671,7 @@ fn scope_animation_rules(
655671
}
656672
}
657673

658-
result.push(bytes[i] as char);
659-
i += 1;
674+
i += push_utf8_char(&mut result, css, i);
660675
}
661676

662677
result
@@ -2581,8 +2596,7 @@ fn replace_host_context_patterns(s: &str, replacement: &str) -> String {
25812596
i = after;
25822597
continue;
25832598
}
2584-
result.push(bytes[i] as char);
2585-
i += 1;
2599+
i += push_utf8_char(&mut result, s, i);
25862600
}
25872601

25882602
result
@@ -2640,8 +2654,7 @@ fn insert_polyfill_directives(css: &str) -> String {
26402654
continue;
26412655
}
26422656
}
2643-
result.push(bytes[i] as char);
2644-
i += 1;
2657+
i += push_utf8_char(&mut result, css, i);
26452658
}
26462659

26472660
result
@@ -2762,8 +2775,7 @@ fn insert_polyfill_rules(css: &str) -> String {
27622775
}
27632776
}
27642777
}
2765-
result.push(bytes[i] as char);
2766-
i += 1;
2778+
i += push_utf8_char(&mut result, css, i);
27672779
}
27682780

27692781
result
@@ -3048,8 +3060,7 @@ fn strip_deep_combinators(s: &str) -> String {
30483060
continue;
30493061
}
30503062

3051-
result.push(bytes[i] as char);
3052-
i += 1;
3063+
i += push_utf8_char(&mut result, s, i);
30533064
}
30543065

30553066
result
@@ -3097,8 +3108,7 @@ fn strip_host_patterns(s: &str) -> String {
30973108
continue;
30983109
}
30993110

3100-
result.push(bytes[i] as char);
3101-
i += 1;
3111+
i += push_utf8_char(&mut result, s, i);
31023112
}
31033113

31043114
result
@@ -3147,8 +3157,7 @@ fn remove_unscoped_rules(css: &str) -> String {
31473157
continue;
31483158
}
31493159
}
3150-
result.push(bytes[i] as char);
3151-
i += 1;
3160+
i += push_utf8_char(&mut result, css, i);
31523161
}
31533162

31543163
result

crates/oxc_angular_compiler/tests/shadow_css_test.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,56 @@ fn test_handle_curly_braces_in_quoted_content() {
290290
assert_css_eq!(shim(css, "contenta"), expected);
291291
}
292292

293+
#[test]
294+
fn test_unicode_in_content_property() {
295+
// Issue #191: unicode characters in :after CSS content property
296+
// Raw CSS escape sequence (as from inline styles)
297+
let css = r".test-div:after { content: '\2022'; }";
298+
assert_css_eq!(shim(css, "contenta"), r".test-div[contenta]:after { content: '\2022'; }");
299+
300+
// After Sass compilation - Sass converts \2022 to actual bullet char
301+
let css = ".test-div:after { content: \"\u{2022}\"; }";
302+
assert_css_eq!(shim(css, "contenta"), ".test-div[contenta]:after { content:\"\u{2022}\"; }");
303+
}
304+
305+
#[test]
306+
fn test_multibyte_utf8_preserved_in_css_values() {
307+
// Various multi-byte UTF-8 characters in CSS content property
308+
// 2-byte: ¢ (U+00A2), © (U+00A9)
309+
let css = ".a:after { content: \"\u{00A2}\u{00A9}\"; }";
310+
let result = shim(css, "contenta");
311+
assert!(result.contains("\u{00A2}\u{00A9}"), "2-byte UTF-8 chars corrupted: {result}");
312+
313+
// 3-byte: • (U+2022), — (U+2014), → (U+2192)
314+
let css = ".a:after { content: \"\u{2022}\u{2014}\u{2192}\"; }";
315+
let result = shim(css, "contenta");
316+
assert!(result.contains("\u{2022}\u{2014}\u{2192}"), "3-byte UTF-8 chars corrupted: {result}");
317+
318+
// 4-byte: 😀 (U+1F600)
319+
let css = ".a:after { content: \"\u{1F600}\"; }";
320+
let result = shim(css, "contenta");
321+
assert!(result.contains('\u{1F600}'), "4-byte UTF-8 char corrupted: {result}");
322+
323+
// Non-ASCII in selector (e.g. class name with accented chars)
324+
let css = ".caf\u{00E9} { color: red; }";
325+
let result = shim(css, "contenta");
326+
assert!(result.contains("caf\u{00E9}"), "Non-ASCII in selector corrupted: {result}");
327+
}
328+
329+
#[test]
330+
fn test_finalize_preserves_unicode() {
331+
use oxc_angular_compiler::styles::finalize_component_style;
332+
// Full pipeline with Sass-compiled CSS containing actual bullet character
333+
let css = ".test:after { content: \"\u{2022}\"; }";
334+
let result = finalize_component_style(css, true, "_ngcontent-%COMP%", "_nghost-%COMP%", true);
335+
assert!(result.contains('\u{2022}'), "Bullet lost in full pipeline: {result}");
336+
337+
// With @charset prefix from Sass
338+
let css = "@charset \"UTF-8\";\n.test:after { content: \"\u{2022}\"; }";
339+
let result = finalize_component_style(css, true, "_ngcontent-%COMP%", "_nghost-%COMP%", true);
340+
assert!(result.contains('\u{2022}'), "Bullet lost with @charset: {result}");
341+
}
342+
293343
// ============================================================================
294344
// Playground CSS Test (real-world case)
295345
// ============================================================================

0 commit comments

Comments
 (0)