Skip to content

Commit 447aedc

Browse files
committed
feat(ie-dom): add Doctype node type to DOM #61
Move DOCTYPE from ParseResult workaround to proper DOM node: - NodeKind::Doctype { name, public_id, system_id } variant - Document::create_doctype() method - Tree builder creates Doctype node in handle_initial, appended to document root - Remove doctype_name/public_id/system_id from ParseResult and TreeBuilder - Conformance serializer handles Doctype via normal tree traversal - parse_html example handles Doctype display - Tree conformance rate unchanged (42.3%)
1 parent 6ad2758 commit 447aedc

5 files changed

Lines changed: 52 additions & 36 deletions

File tree

crates/ie-dom/src/document.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,26 @@ impl Document {
6262
id
6363
}
6464

65+
pub fn create_doctype(
66+
&mut self,
67+
name: &str,
68+
public_id: Option<&str>,
69+
system_id: Option<&str>,
70+
) -> NodeId {
71+
let id = self.nodes.len();
72+
self.nodes.push(Node {
73+
kind: NodeKind::Doctype {
74+
name: name.to_string(),
75+
public_id: public_id.map(|s| s.to_string()),
76+
system_id: system_id.map(|s| s.to_string()),
77+
},
78+
parent: None,
79+
children: Vec::new(),
80+
attributes: HashMap::new(),
81+
});
82+
id
83+
}
84+
6585
// --- Accessors ---
6686

6787
pub fn node(&self, id: NodeId) -> Option<&Node> {

crates/ie-dom/src/node.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ pub struct Node {
1515
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1616
pub enum NodeKind {
1717
Document,
18+
Doctype {
19+
name: String,
20+
public_id: Option<String>,
21+
system_id: Option<String>,
22+
},
1823
Element(String),
1924
Text(String),
2025
Comment(String),

crates/ie-html/examples/parse_html.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ fn print_tree(doc: &ie_dom::Document, id: ie_dom::NodeId, indent: usize) {
3535
let prefix = "| ".repeat(indent);
3636
match &node.kind {
3737
ie_dom::NodeKind::Document => println!("{prefix}#document"),
38+
ie_dom::NodeKind::Doctype { name, .. } => println!("{prefix}<!DOCTYPE {name}>"),
3839
ie_dom::NodeKind::Element(name) => {
3940
let attrs: Vec<String> = node
4041
.attributes

crates/ie-html/src/tree_builder.rs

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@ pub struct ParseResult {
1111
pub errors: Vec<String>,
1212
pub style_elements: Vec<String>,
1313
pub link_stylesheets: Vec<String>,
14-
pub doctype_name: Option<String>,
15-
pub doctype_public_id: Option<String>,
16-
pub doctype_system_id: Option<String>,
1714
}
1815

1916
/// Top-level parse function. HTML parsing never fails — errors are collected.
@@ -25,9 +22,6 @@ pub fn parse(html: &str) -> ParseResult {
2522
errors: tb.errors,
2623
style_elements: tb.style_elements,
2724
link_stylesheets: tb.link_stylesheets,
28-
doctype_name: tb.doctype_name,
29-
doctype_public_id: tb.doctype_public_id,
30-
doctype_system_id: tb.doctype_system_id,
3125
}
3226
}
3327

@@ -48,9 +42,6 @@ struct TreeBuilder<'a> {
4842
link_stylesheets: Vec<String>,
4943
pending_text: String,
5044
reprocess_depth: u32,
51-
doctype_name: Option<String>,
52-
doctype_public_id: Option<String>,
53-
doctype_system_id: Option<String>,
5445
done: bool,
5546
}
5647

@@ -73,9 +64,6 @@ impl<'a> TreeBuilder<'a> {
7364
link_stylesheets: Vec::new(),
7465
pending_text: String::new(),
7566
reprocess_depth: 0,
76-
doctype_name: None,
77-
doctype_public_id: None,
78-
doctype_system_id: None,
7967
done: false,
8068
}
8169
}
@@ -850,9 +838,12 @@ impl<'a> TreeBuilder<'a> {
850838
system_id,
851839
..
852840
} => {
853-
self.doctype_name = name;
854-
self.doctype_public_id = public_id;
855-
self.doctype_system_id = system_id;
841+
let doctype_id = self.doc.create_doctype(
842+
name.as_deref().unwrap_or(""),
843+
public_id.as_deref(),
844+
system_id.as_deref(),
845+
);
846+
let _ = self.doc.append_child(self.doc.root, doctype_id);
856847
self.mode = InsertionMode::BeforeHtml;
857848
}
858849
_ => {
@@ -2378,6 +2369,7 @@ mod tests {
23782369
NodeKind::Text(t) => Some(format!("#text:{t}")),
23792370
NodeKind::Comment(c) => Some(format!("#comment:{c}")),
23802371
NodeKind::Document => Some("#document".to_string()),
2372+
NodeKind::Doctype { name, .. } => Some(format!("<!DOCTYPE {name}>")),
23812373
})
23822374
.collect()
23832375
}
@@ -2395,7 +2387,7 @@ mod tests {
23952387
let result = parse_and_check("<!DOCTYPE html><html><head></head><body></body></html>");
23962388
let root = result.document.root;
23972389
let children = child_names(&result, root);
2398-
assert_eq!(children, vec!["html"]);
2390+
assert_eq!(children, vec!["<!DOCTYPE html>", "html"]);
23992391

24002392
let html_id = find_element(&result, "html").unwrap();
24012393
let html_children = child_names(&result, html_id);

crates/ie-html/tests/tree_conformance.rs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -135,26 +135,8 @@ fn parse_test_file(path: &std::path::Path) -> Vec<TreeTest> {
135135
}
136136

137137
/// Serialize a DOM document to the html5lib tree test format.
138-
fn serialize_tree_with_doctype(result: &ie_html::ParseResult) -> String {
138+
fn serialize_tree(doc: &Document) -> String {
139139
let mut output = String::new();
140-
// Output doctype if present
141-
if result.doctype_name.is_some()
142-
|| result.doctype_public_id.is_some()
143-
|| result.doctype_system_id.is_some()
144-
{
145-
let name = result.doctype_name.as_deref().unwrap_or("");
146-
let public = result.doctype_public_id.as_deref();
147-
let system = result.doctype_system_id.as_deref();
148-
match (public, system) {
149-
(Some(p), Some(s)) => {
150-
output.push_str(&format!("| <!DOCTYPE {name} \"{p}\" \"{s}\">\n"))
151-
}
152-
(Some(p), None) => output.push_str(&format!("| <!DOCTYPE {name} \"{p}\" \"\">\n")),
153-
(None, Some(s)) => output.push_str(&format!("| <!DOCTYPE {name} \"\" \"{s}\">\n")),
154-
(None, None) => output.push_str(&format!("| <!DOCTYPE {name}>\n")),
155-
}
156-
}
157-
let doc = &result.document;
158140
let root = doc.root;
159141
let root_node = doc.node(root).unwrap();
160142
for &child_id in &root_node.children {
@@ -173,6 +155,22 @@ fn serialize_node(doc: &Document, id: NodeId, depth: usize, output: &mut String)
173155

174156
match &node.kind {
175157
NodeKind::Document => {}
158+
NodeKind::Doctype {
159+
name,
160+
public_id,
161+
system_id,
162+
} => match (public_id.as_deref(), system_id.as_deref()) {
163+
(Some(p), Some(s)) => {
164+
output.push_str(&format!("| {indent}<!DOCTYPE {name} \"{p}\" \"{s}\">\n"))
165+
}
166+
(Some(p), None) => {
167+
output.push_str(&format!("| {indent}<!DOCTYPE {name} \"{p}\" \"\">\n"))
168+
}
169+
(None, Some(s)) => {
170+
output.push_str(&format!("| {indent}<!DOCTYPE {name} \"\" \"{s}\">\n"))
171+
}
172+
(None, None) => output.push_str(&format!("| {indent}<!DOCTYPE {name}>\n")),
173+
},
176174
NodeKind::Element(name) => {
177175
output.push_str(&format!("| {indent}<{name}>\n"));
178176
// Sort attributes alphabetically for consistent comparison
@@ -214,7 +212,7 @@ fn run_test_file(filename: &str) -> (usize, usize, Vec<String>) {
214212
total += 1;
215213

216214
let result = parse(&test.input);
217-
let actual = serialize_tree_with_doctype(&result);
215+
let actual = serialize_tree(&result.document);
218216

219217
if actual == test.expected_tree {
220218
passed += 1;

0 commit comments

Comments
 (0)