Skip to content

Commit b2af8df

Browse files
committed
align html transform
1 parent e0143ef commit b2af8df

File tree

6 files changed

+795
-821
lines changed

6 files changed

+795
-821
lines changed

crates/oxc_angular_compiler/src/parser/html/lexer.rs

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
//! Ported from Angular's `ml_parser/lexer.ts`.
66
77
use super::entities::{decode_entity, get_named_entities};
8+
use super::tags::{TagContentType, get_html_tag_definition};
89
use crate::util::chars;
910

1011
/// Supported block keywords for Angular control flow.
@@ -23,12 +24,6 @@ const SUPPORTED_BLOCKS: &[&str] = &[
2324
"error",
2425
];
2526

26-
/// Tags whose content is raw text (no entity decoding, no inner tag parsing).
27-
const RAW_TEXT_TAGS: &[&str] = &["script", "style"];
28-
29-
/// Tags whose content is escapable raw text (entity decoding, no inner tag parsing).
30-
const ESCAPABLE_RAW_TEXT_TAGS: &[&str] = &["title", "textarea"];
31-
3227
/// Token types for HTML templates.
3328
/// Matches Angular's `TokenType` enum from `ml_parser/tokens.ts`.
3429
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -1676,14 +1671,21 @@ impl<'a> HtmlLexer<'a> {
16761671
self.tokens.push(HtmlToken::empty(HtmlTokenType::TagOpenEnd, close_start, self.index));
16771672

16781673
// Check if this tag has raw or escapable raw text content
1679-
// Note: For title tag, only non-svg namespace uses escapable raw text
1674+
// Use get_html_tag_definition().get_content_type(prefix) to handle
1675+
// namespace-aware content types (e.g., svg:title is parsable, html:title is escapable raw)
16801676
let lower_name = name.to_lowercase();
1681-
if prefix.is_empty() {
1682-
if RAW_TEXT_TAGS.contains(&lower_name.as_str()) {
1677+
let ns_prefix = if prefix.is_empty() { None } else { Some(prefix.as_str()) };
1678+
let content_type = get_html_tag_definition(&lower_name).get_content_type(ns_prefix);
1679+
match content_type {
1680+
TagContentType::RawText => {
16831681
self.scan_raw_text_with_tag_close(&lower_name, false);
1684-
} else if ESCAPABLE_RAW_TEXT_TAGS.contains(&lower_name.as_str()) {
1682+
}
1683+
TagContentType::EscapableRawText => {
16851684
self.scan_raw_text_with_tag_close(&lower_name, true);
16861685
}
1686+
TagContentType::Parsable => {
1687+
// Normal parsable content, no special handling needed
1688+
}
16871689
}
16881690
} else if self.peek() == '/' {
16891691
// `/` without `>` following - Angular consumes the `/` and fails on `>`
@@ -1890,17 +1892,21 @@ impl<'a> HtmlLexer<'a> {
18901892
// Check if the tag suffix indicates raw or escapable raw text
18911893
// For component tags like <MyComp:script> or <MyComp:title>
18921894
// But NOT when there's a namespace prefix like svg: or math:
1893-
// (svg:title and math:title don't use escapable raw text)
1895+
// Use get_html_tag_definition().get_content_type(prefix) to handle
1896+
// namespace-aware content types (e.g., svg:title is parsable, html:title is escapable raw)
18941897
let lower_tag = tag_name.to_lowercase();
1895-
let lower_prefix = prefix.to_lowercase();
1896-
let has_special_prefix = lower_prefix == "svg" || lower_prefix == "math";
1897-
1898-
if !has_special_prefix {
1899-
if RAW_TEXT_TAGS.contains(&lower_tag.as_str()) {
1898+
let ns_prefix = if prefix.is_empty() { None } else { Some(prefix.as_str()) };
1899+
let content_type = get_html_tag_definition(&lower_tag).get_content_type(ns_prefix);
1900+
match content_type {
1901+
TagContentType::RawText => {
19001902
self.scan_component_raw_text(&component_name, &prefix, &tag_name, false);
1901-
} else if ESCAPABLE_RAW_TEXT_TAGS.contains(&lower_tag.as_str()) {
1903+
}
1904+
TagContentType::EscapableRawText => {
19021905
self.scan_component_raw_text(&component_name, &prefix, &tag_name, true);
19031906
}
1907+
TagContentType::Parsable => {
1908+
// Normal parsable content, no special handling needed
1909+
}
19041910
}
19051911
} else if self.peek() == '<' || self.peek() == chars::EOF {
19061912
// Incomplete component - find the most recent COMPONENT_OPEN_START token

crates/oxc_angular_compiler/src/parser/html/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ pub use entities::{NGSP_UNICODE, decode_entity, get_named_entities};
1919
pub use lexer::*;
2020
pub use parser::*;
2121
pub use tags::{
22-
HtmlTagDefinition, TagContentType, get_html_tag_definition, get_ns_prefix, is_void_element,
23-
merge_ns_and_name, split_ns_name,
22+
ContentType, HtmlTagDefinition, TagContentType, get_html_tag_definition, get_ns_prefix,
23+
is_void_element, merge_ns_and_name, namespace_uri, split_ns_name,
2424
};
2525
pub use whitespace::{PRESERVE_WS_ATTR_NAME, WhitespaceVisitor, remove_whitespaces};

crates/oxc_angular_compiler/src/parser/html/parser.rs

Lines changed: 67 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use crate::util::{ParseError, ParseLocation, ParseSourceFile, ParseSourceSpan};
1919

2020
use super::entities::decode_entities_in_string;
2121
use super::lexer::{HtmlLexer, HtmlToken, HtmlTokenType};
22+
use super::tags::{get_html_tag_definition, is_void_element};
2223

2324
/// Result of parsing an HTML template.
2425
pub struct HtmlParseResult<'a> {
@@ -517,15 +518,21 @@ impl<'a> HtmlParser<'a> {
517518
let start = start_token.start;
518519
// TagOpenStart has parts [prefix, name]
519520
// ComponentOpenStart has parts [component_name, prefix, tag_name]
520-
let tag_name = if start_token.token_type == HtmlTokenType::ComponentOpenStart {
521-
// For components, use the component name (first part)
522-
start_token.value().to_string()
523-
} else {
524-
// For regular tags, include the namespace prefix if present
525-
let prefix = start_token.prefix();
526-
let name = start_token.name();
527-
if prefix.is_empty() { name.to_string() } else { format!("{}:{}", prefix, name) }
528-
};
521+
let (tag_name, local_name, has_ns_prefix) =
522+
if start_token.token_type == HtmlTokenType::ComponentOpenStart {
523+
// For components, use the component name (first part)
524+
let name = start_token.value().to_string();
525+
(name.clone(), name, false)
526+
} else {
527+
// For regular tags, include the namespace prefix if present
528+
// Angular uses :prefix:name format for namespaced elements
529+
let prefix = start_token.prefix();
530+
let name = start_token.name();
531+
let has_prefix = !prefix.is_empty();
532+
let full_name =
533+
if has_prefix { format!(":{}:{}", prefix, name) } else { name.to_string() };
534+
(full_name, name.to_string(), has_prefix)
535+
};
529536

530537
// Check if we need to auto-close the current element (HTML5 optional end tags)
531538
self.auto_close_element_if_needed(&tag_name);
@@ -541,6 +548,23 @@ impl<'a> HtmlParser<'a> {
541548
{
542549
let end_pos = token.end;
543550
self.advance();
551+
552+
// Validate self-closing: only void, custom, and foreign elements can be self-closed
553+
// Foreign elements include those with explicit namespace prefix (e.g., svg:rect)
554+
// or those with implicit namespace prefix (e.g., <svg> has implicitNamespacePrefix='svg')
555+
let tag_def = get_html_tag_definition(&local_name);
556+
let is_foreign = has_ns_prefix || tag_def.implicit_namespace_prefix.is_some();
557+
if !(tag_def.can_self_close || is_foreign || tag_def.is_void) {
558+
let err = self.make_error(
559+
start,
560+
format!(
561+
"Only void, custom and foreign elements can be self closed \"{}\"",
562+
local_name
563+
),
564+
);
565+
self.errors.push(err);
566+
}
567+
544568
(true, Some(end_pos))
545569
} else if token.token_type == HtmlTokenType::TagOpenEnd
546570
|| token.token_type == HtmlTokenType::ComponentOpenEnd
@@ -595,16 +619,31 @@ impl<'a> HtmlParser<'a> {
595619
let end = token.end;
596620
// TagClose has parts [prefix, name]
597621
// ComponentClose has parts [component_name, prefix, tag_name]
598-
let tag_name = if token.token_type == HtmlTokenType::ComponentClose {
599-
token.value().to_string()
622+
let (tag_name, local_name) = if token.token_type == HtmlTokenType::ComponentClose {
623+
let name = token.value().to_string();
624+
(name.clone(), name)
600625
} else {
601626
// For regular tags, include the namespace prefix if present
627+
// Angular uses :prefix:name format for namespaced elements
602628
let prefix = token.prefix();
603629
let name = token.name();
604-
if prefix.is_empty() { name.to_string() } else { format!("{}:{}", prefix, name) }
630+
let full_name =
631+
if prefix.is_empty() { name.to_string() } else { format!(":{}:{}", prefix, name) };
632+
(full_name, name.to_string())
605633
};
606634
let end_span = self.make_span(start, end);
607635

636+
// Check if this is a void element - void elements don't have end tags
637+
let tag_def = get_html_tag_definition(&local_name);
638+
if tag_def.is_void {
639+
let err = self.make_error(
640+
start,
641+
format!("Void elements do not have end tags \"{}\"", local_name),
642+
);
643+
self.errors.push(err);
644+
return;
645+
}
646+
608647
// Pop the matching element from the stack
609648
if let Some(node) = self.pop_element_container(&tag_name, Some(end_span)) {
610649
self.add_to_parent(node);
@@ -644,13 +683,14 @@ impl<'a> HtmlParser<'a> {
644683
let Some(name_token) = self.advance() else {
645684
break; // Should not happen after peek, but handle gracefully
646685
};
647-
// Include namespace prefix if present (e.g., xlink:href -> xlink:href)
686+
// Include namespace prefix if present (e.g., xlink:href -> :xlink:href)
687+
// Angular uses :prefix:name format for namespaced attributes
648688
let prefix = name_token.prefix();
649689
let base_name = name_token.name();
650690
let name = if prefix.is_empty() {
651691
base_name.to_string()
652692
} else {
653-
format!("{prefix}:{base_name}")
693+
format!(":{prefix}:{base_name}")
654694
};
655695
let name_start = name_token.start;
656696
let name_end = name_token.end;
@@ -878,7 +918,10 @@ impl<'a> HtmlParser<'a> {
878918
tokens.push((InterpolatedTokenType::Text, vec![value], first_span));
879919
}
880920
HtmlTokenType::Interpolation => {
881-
let decoded_expr = first_token_parts.join("");
921+
// For backward compatibility, decode HTML entities in interpolation
922+
// (same as Angular's _consumeText in parser.ts)
923+
let joined = first_token_parts.join("");
924+
let decoded_expr = decode_entities_in_string(&joined);
882925
text.push_str(&decoded_expr);
883926
tokens.push((InterpolatedTokenType::Interpolation, first_token_parts, first_span));
884927
}
@@ -923,7 +966,10 @@ impl<'a> HtmlParser<'a> {
923966
tokens.push((InterpolatedTokenType::Text, vec![value], tok_span));
924967
}
925968
HtmlTokenType::Interpolation => {
926-
let decoded_expr = tok_parts.join("");
969+
// For backward compatibility, decode HTML entities in interpolation
970+
// (same as Angular's _consumeText in parser.ts)
971+
let joined = tok_parts.join("");
972+
let decoded_expr = decode_entities_in_string(&joined);
927973
text.push_str(&decoded_expr);
928974
tokens.push((
929975
InterpolatedTokenType::Interpolation,
@@ -1565,104 +1611,11 @@ impl<'a> HtmlParser<'a> {
15651611
}
15661612
}
15671613

1568-
/// Checks if an element is a void element (no closing tag).
1569-
fn is_void_element(name: &str) -> bool {
1570-
matches!(
1571-
name.to_lowercase().as_str(),
1572-
"area"
1573-
| "base"
1574-
| "br"
1575-
| "col"
1576-
| "embed"
1577-
| "hr"
1578-
| "img"
1579-
| "input"
1580-
| "link"
1581-
| "meta"
1582-
| "param"
1583-
| "source"
1584-
| "track"
1585-
| "wbr"
1586-
)
1587-
}
1588-
15891614
/// Checks if the current element should be auto-closed when a new element is opened.
1590-
/// Based on HTML5 optional end tag rules.
1615+
/// Uses the tag definitions from tags.rs to match Angular's behavior exactly.
15911616
fn should_auto_close(current_tag: &str, new_tag: &str) -> bool {
1592-
let new_tag_lower = new_tag.to_lowercase();
1593-
1594-
match current_tag {
1595-
// <p> is auto-closed by block-level elements or another <p>
1596-
"p" => matches!(
1597-
new_tag_lower.as_str(),
1598-
"p" | "div"
1599-
| "ul"
1600-
| "ol"
1601-
| "li"
1602-
| "dl"
1603-
| "dt"
1604-
| "dd"
1605-
| "table"
1606-
| "tr"
1607-
| "td"
1608-
| "th"
1609-
| "address"
1610-
| "article"
1611-
| "aside"
1612-
| "blockquote"
1613-
| "details"
1614-
| "fieldset"
1615-
| "figcaption"
1616-
| "figure"
1617-
| "footer"
1618-
| "form"
1619-
| "h1"
1620-
| "h2"
1621-
| "h3"
1622-
| "h4"
1623-
| "h5"
1624-
| "h6"
1625-
| "header"
1626-
| "hgroup"
1627-
| "hr"
1628-
| "main"
1629-
| "menu"
1630-
| "nav"
1631-
| "pre"
1632-
| "section"
1633-
| "video"
1634-
| "audio"
1635-
| "canvas"
1636-
),
1637-
// <li> is auto-closed by another <li>
1638-
"li" => new_tag_lower == "li",
1639-
// <dt> is auto-closed by <dt> or <dd>
1640-
"dt" => matches!(new_tag_lower.as_str(), "dt" | "dd"),
1641-
// <dd> is auto-closed by <dt> or <dd>
1642-
"dd" => matches!(new_tag_lower.as_str(), "dt" | "dd"),
1643-
// <tr> is auto-closed by another <tr>
1644-
"tr" => new_tag_lower == "tr",
1645-
// <td> and <th> are auto-closed by <td>, <th>, or <tr>
1646-
"td" | "th" => matches!(new_tag_lower.as_str(), "td" | "th" | "tr"),
1647-
// <option> is auto-closed by another <option> or <optgroup>
1648-
"option" => matches!(new_tag_lower.as_str(), "option" | "optgroup"),
1649-
// <optgroup> is auto-closed by another <optgroup>
1650-
"optgroup" => new_tag_lower == "optgroup",
1651-
// <colgroup> is auto-closed by elements other than <col>
1652-
"colgroup" => new_tag_lower != "col" && is_table_content(&new_tag_lower),
1653-
// <thead>, <tbody>, <tfoot> are auto-closed by each other
1654-
"thead" | "tbody" | "tfoot" => {
1655-
matches!(new_tag_lower.as_str(), "thead" | "tbody" | "tfoot")
1656-
}
1657-
// <rp> and <rt> are auto-closed by each other
1658-
"rp" | "rt" => matches!(new_tag_lower.as_str(), "rp" | "rt"),
1659-
_ => false,
1660-
}
1661-
}
1662-
1663-
/// Checks if a tag is table content (for colgroup auto-close logic).
1664-
fn is_table_content(tag: &str) -> bool {
1665-
matches!(tag, "thead" | "tbody" | "tfoot" | "tr" | "td" | "th" | "caption" | "colgroup")
1617+
let tag_def = get_html_tag_definition(current_tag);
1618+
tag_def.is_closed_by_child(new_tag)
16661619
}
16671620

16681621
#[cfg(test)]
@@ -1806,8 +1759,8 @@ mod tests {
18061759
// Should report unclosed block error
18071760
assert!(!result.errors.is_empty(), "Expected unclosed block error");
18081761
assert!(
1809-
result.errors[0].msg.contains("Unclosed block"),
1810-
"Error message should mention unclosed block"
1762+
result.errors.iter().any(|e| e.msg.contains("Unclosed block")),
1763+
"At least one error message should mention unclosed block"
18111764
);
18121765
}
18131766

0 commit comments

Comments
 (0)