Skip to content

Commit 1e86fb9

Browse files
Merge pull request #15 from webexpress-framework/copilot/improve-reverse-html-renderer
Add missing factory mappings and comprehensive parser tests
2 parents 040544d + 6fe81a0 commit 1e86fb9

5 files changed

Lines changed: 267 additions & 1 deletion

File tree

src/WebExpress.WebCore.Test/Html/Parser/UnitTestHtmlElementFactory.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,5 +113,38 @@ public void NullTagName_Throws()
113113
{
114114
Assert.Throws<System.ArgumentNullException>(() => HtmlElementFactory.Create(null));
115115
}
116+
117+
/// <summary>
118+
/// Both 'kbd' and 'kdb' map to the existing <see cref="HtmlElementTextSemanticsKdb"/>.
119+
/// </summary>
120+
[Fact]
121+
public void KbdTag_MapsToKdbElement()
122+
{
123+
var element = HtmlElementFactory.Create("kbd");
124+
125+
Assert.IsType<HtmlElementTextSemanticsKdb>(element);
126+
}
127+
128+
/// <summary>
129+
/// The 'keygen' tag is resolved to <see cref="HtmlElementFormKeygen"/>.
130+
/// </summary>
131+
[Fact]
132+
public void KnownTag_Keygen_ReturnsCorrectType()
133+
{
134+
var element = HtmlElementFactory.Create("keygen");
135+
136+
Assert.IsType<HtmlElementFormKeygen>(element);
137+
}
138+
139+
/// <summary>
140+
/// The 'command' tag is resolved to <see cref="HtmlElementInteractiveCommand"/>.
141+
/// </summary>
142+
[Fact]
143+
public void KnownTag_Command_ReturnsCorrectType()
144+
{
145+
var element = HtmlElementFactory.Create("command");
146+
147+
Assert.IsType<HtmlElementInteractiveCommand>(element);
148+
}
116149
}
117150
}

src/WebExpress.WebCore.Test/Html/Parser/UnitTestHtmlParser.cs

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -340,5 +340,161 @@ public void RoundTrip_Img_ProducesEquivalentHtml()
340340
// validation
341341
Assert.Equal(html, restoredHtml);
342342
}
343+
344+
// ------------------------------------------------------------------
345+
// Additional tests
346+
// ------------------------------------------------------------------
347+
348+
/// <summary>
349+
/// ParseSingle returns the first node.
350+
/// </summary>
351+
[Fact]
352+
public void ParseSingle_ReturnsFirstNode()
353+
{
354+
var node = Parser.ParseSingle("<div></div>");
355+
356+
Assert.IsType<HtmlElementTextContentDiv>(node);
357+
}
358+
359+
/// <summary>
360+
/// ParseSingle returns null for an empty input.
361+
/// </summary>
362+
[Fact]
363+
public void ParseSingle_EmptyInput_ReturnsNull()
364+
{
365+
var node = Parser.ParseSingle("");
366+
367+
Assert.Null(node);
368+
}
369+
370+
/// <summary>
371+
/// An element with an inline style attribute retains its value.
372+
/// </summary>
373+
[Fact]
374+
public void InlineStyleAttribute_IsPreserved()
375+
{
376+
var nodes = Parser.Parse("<div style=\"color: red;\"></div>");
377+
var div = nodes.OfType<HtmlElementTextContentDiv>().Single();
378+
379+
Assert.Equal("color: red;", div.Style);
380+
}
381+
382+
/// <summary>
383+
/// A table structure with thead, tbody, and rows is correctly reconstructed.
384+
/// </summary>
385+
[Fact]
386+
public void TableStructure_IsReconstructed()
387+
{
388+
var nodes = Parser.Parse("<table><thead><tr><th>Header</th></tr></thead><tbody><tr><td>Cell</td></tr></tbody></table>");
389+
var table = nodes.OfType<HtmlElementTableTable>().Single();
390+
var thead = table.Elements.OfType<HtmlElementTableThead>().Single();
391+
var tbody = table.Elements.OfType<HtmlElementTableTbody>().Single();
392+
393+
Assert.NotNull(thead);
394+
Assert.NotNull(tbody);
395+
}
396+
397+
/// <summary>
398+
/// Multiple top-level elements are all returned.
399+
/// </summary>
400+
[Fact]
401+
public void MultipleRoots_AreAllReturned()
402+
{
403+
var nodes = Parser.Parse("<p>one</p><p>two</p>");
404+
405+
Assert.Equal(2, nodes.Count);
406+
Assert.All(nodes, n => Assert.IsType<HtmlElementTextContentP>(n));
407+
}
408+
409+
/// <summary>
410+
/// A mismatched end tag is handled gracefully without throwing.
411+
/// </summary>
412+
[Fact]
413+
public void MismatchedEndTag_IsHandledGracefully()
414+
{
415+
var nodes = Parser.Parse("<div><span>text</div>");
416+
417+
var div = nodes.OfType<HtmlElementTextContentDiv>().Single();
418+
Assert.NotNull(div);
419+
}
420+
421+
/// <summary>
422+
/// Mixed text and element children are preserved in order.
423+
/// </summary>
424+
[Fact]
425+
public void MixedContent_TextAndElements_ArePreserved()
426+
{
427+
var nodes = Parser.Parse("<p>Hello <strong>World</strong>!</p>");
428+
var p = nodes.OfType<HtmlElementTextContentP>().Single();
429+
430+
Assert.Equal(3, p.Elements.Count());
431+
}
432+
433+
/// <summary>
434+
/// Roundtrip of a styled element preserves the style attribute.
435+
/// </summary>
436+
[Fact]
437+
public void RoundTrip_StyleAttribute_IsPreserved()
438+
{
439+
// arrange
440+
var original = new HtmlElementTextContentDiv();
441+
original.Style = "color: red;";
442+
443+
// act
444+
var html = original.ToString().Trim();
445+
var parsed = Parser.Parse(html);
446+
447+
var restored = parsed.OfType<HtmlElementTextContentDiv>().Single();
448+
449+
// validation
450+
Assert.Equal("color: red;", restored.Style);
451+
}
452+
453+
/// <summary>
454+
/// Roundtrip of an anchor element preserves href and text content.
455+
/// </summary>
456+
[Fact]
457+
public void RoundTrip_Anchor_PreservesHrefAndText()
458+
{
459+
// arrange
460+
var original = new HtmlElementTextSemanticsA(new HtmlText("click me"));
461+
original.Href = "https://example.com";
462+
463+
// act
464+
var html = original.ToString().Trim();
465+
var parsed = Parser.Parse(html);
466+
467+
var a = parsed.OfType<HtmlElementTextSemanticsA>().Single();
468+
var text = a.Elements.OfType<HtmlText>().Single();
469+
470+
// validation
471+
Assert.Equal("https://example.com", a.Href);
472+
Assert.Equal("click me", text.Value);
473+
}
474+
475+
/// <summary>
476+
/// A form with input fields is correctly reconstructed.
477+
/// </summary>
478+
[Fact]
479+
public void FormWithInputs_IsReconstructed()
480+
{
481+
var nodes = Parser.Parse("<form action=\"/submit\"><input type=\"text\" name=\"q\"></form>");
482+
var form = nodes.OfType<HtmlElementFormForm>().Single();
483+
var input = form.Elements.OfType<HtmlElementFieldInput>().Single();
484+
485+
Assert.NotNull(input);
486+
}
487+
488+
/// <summary>
489+
/// The kbd tag (standard HTML) maps to HtmlElementTextSemanticsKdb.
490+
/// </summary>
491+
[Fact]
492+
public void KbdTag_MapsToKdbElement()
493+
{
494+
var nodes = Parser.Parse("<kbd>Ctrl+C</kbd>");
495+
var kbd = nodes.OfType<HtmlElementTextSemanticsKdb>().Single();
496+
497+
Assert.NotNull(kbd);
498+
}
343499
}
344500
}

src/WebExpress.WebCore.Test/Html/Parser/UnitTestHtmlTokenizer.cs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,5 +246,76 @@ public void TagNameNormalisation_IsLowerCase()
246246
Assert.Equal("div", tokens[0].TagName);
247247
Assert.Equal("class", tokens[0].Attributes[0].Name);
248248
}
249+
250+
// ------------------------------------------------------------------
251+
// Whitespace and edge cases
252+
// ------------------------------------------------------------------
253+
254+
/// <summary>
255+
/// Whitespace-only text between tags is preserved as a text token.
256+
/// </summary>
257+
[Fact]
258+
public void WhitespaceText_IsPreservedAsTextToken()
259+
{
260+
var tokens = Tokenize("<div> </div>");
261+
262+
Assert.Equal(HtmlTokenType.StartTag, tokens[0].Type);
263+
Assert.Equal(HtmlTokenType.Text, tokens[1].Type);
264+
Assert.Equal(" ", tokens[1].Value);
265+
Assert.Equal(HtmlTokenType.EndTag, tokens[2].Type);
266+
}
267+
268+
/// <summary>
269+
/// An unquoted attribute value is read until whitespace or closing bracket.
270+
/// </summary>
271+
[Fact]
272+
public void UnquotedAttributeValue_IsExtracted()
273+
{
274+
var tokens = Tokenize("<div class=foo>");
275+
276+
var attr = tokens[0].Attributes.Single();
277+
Assert.Equal("class", attr.Name);
278+
Assert.Equal("foo", attr.Value);
279+
}
280+
281+
/// <summary>
282+
/// An inline style attribute is preserved in its entirety.
283+
/// </summary>
284+
[Fact]
285+
public void InlineStyleAttribute_IsPreserved()
286+
{
287+
var tokens = Tokenize("<div style=\"color: red; font-size: 14px;\">");
288+
289+
var attr = tokens[0].Attributes.Single();
290+
Assert.Equal("style", attr.Name);
291+
Assert.Equal("color: red; font-size: 14px;", attr.Value);
292+
}
293+
294+
/// <summary>
295+
/// A stray less-than character is emitted as text.
296+
/// </summary>
297+
[Fact]
298+
public void StrayLessThan_IsEmittedAsText()
299+
{
300+
var tokens = Tokenize("a < b");
301+
302+
Assert.Equal(HtmlTokenType.Text, tokens[0].Type);
303+
Assert.Equal("a ", tokens[0].Value);
304+
Assert.Equal(HtmlTokenType.Text, tokens[1].Type);
305+
Assert.Equal("<", tokens[1].Value);
306+
Assert.Equal(HtmlTokenType.Text, tokens[2].Type);
307+
}
308+
309+
/// <summary>
310+
/// A keygen void element without slash is emitted as self-closing.
311+
/// </summary>
312+
[Fact]
313+
public void KeygenVoidElement_ReturnsSelfClosing()
314+
{
315+
var tokens = Tokenize("<keygen>");
316+
317+
Assert.Equal(HtmlTokenType.SelfClosingTag, tokens[0].Type);
318+
Assert.Equal("keygen", tokens[0].TagName);
319+
}
249320
}
250321
}

src/WebExpress.WebCore/WebHtml/Parser/HtmlElementFactory.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ public class HtmlElementFactory
7777
["dfn"] = () => new HtmlElementTextSemanticsDfn(),
7878
["em"] = () => new HtmlElementTextSemanticsEm(),
7979
["i"] = () => new HtmlElementTextSemanticsI(),
80+
// The standard HTML element is <kbd>, but the existing class uses "kdb" as
81+
// its element name. Both spellings are mapped so that the parser handles
82+
// real-world HTML (<kbd>) as well as the project's own renderer output (<kdb>).
83+
["kbd"] = () => new HtmlElementTextSemanticsKdb(),
8084
["kdb"] = () => new HtmlElementTextSemanticsKdb(),
8185
// 'kbd' is the correct HTML tag name; 'kdb' mirrors the existing class typo.
8286
["kbd"] = () => new HtmlElementTextSemanticsKdb(),
@@ -140,6 +144,7 @@ public class HtmlElementFactory
140144
["datalist"] = () => new HtmlElementFormDatalist(),
141145
["fieldset"] = () => new HtmlElementFormFieldset(),
142146
["form"] = () => new HtmlElementFormForm(),
147+
["keygen"] = () => new HtmlElementFormKeygen(),
143148
["meter"] = () => new HtmlElementFormMeter(),
144149
["optgroup"] = () => new HtmlElementFormOptgroup(),
145150
["option"] = () => new HtmlElementFormOption(),
@@ -148,6 +153,7 @@ public class HtmlElementFactory
148153
["textarea"] = () => new HtmlElementFormTextarea(),
149154

150155
// Interactive
156+
["command"] = () => new HtmlElementInteractiveCommand(),
151157
["details"] = () => new HtmlElementInteractiveDetails(),
152158
["menu"] = () => new HtmlElementInteractiveMenu(),
153159
["summary"] = () => new HtmlElementInteractiveSummary(),

src/WebExpress.WebCore/WebHtml/Parser/HtmlTokenizer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public class HtmlTokenizer
2828
new(StringComparer.OrdinalIgnoreCase)
2929
{
3030
"area", "base", "br", "col", "embed", "hr", "img", "input",
31-
"link", "meta", "param", "source", "track", "wbr"
31+
"keygen", "link", "meta", "param", "source", "track", "wbr"
3232
};
3333

3434
/// <summary>

0 commit comments

Comments
 (0)