Skip to content

Commit e00ab01

Browse files
dmsnellwestonruter
andcommitted
HTML API: Trigger active format reconstruction when reaching text nodes.
When encountering text nodes in an HTML document, the HTML parser needs to run the active format reconstruction algorithm, even if it doesn't stop to visit those text nodes. This is because the formats, which might need reconstructing, will impact the breadcrumbs of all downstream nodes from the text node. In this patch, this process is triggered, but the text nodes are then skipped, since the HTML Processor doesn't currently support visiting them. Co-authored-by: Weston Ruter <westonruter@google.com>
1 parent faf0ed0 commit e00ab01

2 files changed

Lines changed: 93 additions & 0 deletions

File tree

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,39 @@ private function step_in_body() {
623623
$op_sigil = $this->is_tag_closer() ? '-' : '+';
624624
$op = "{$op_sigil}{$tag_name}";
625625

626+
if ( null === $tag_name && '#text' === $this->get_token_type() ) {
627+
/*
628+
* This rule is necessary even without supporting text nodes in the
629+
* HTML Processor because the parser has to move past text nodes, and
630+
* there could be breadcrumb implications when the text triggers the
631+
* active format reconstruction.
632+
*/
633+
$this->reconstruct_active_formatting_elements();
634+
635+
$current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ];
636+
if (
637+
1 <= $current_token->length &&
638+
"\x00" === $this->html[ $current_token->start ] &&
639+
strspn( $this->html, "\x00", $current_token->start, $current_token->length ) === $current_token->length
640+
) {
641+
// Parse error: ignore the token.
642+
return $this->step();
643+
}
644+
645+
/*
646+
* Whitespace-only text does not affect the frameset-ok flag.
647+
* It is probably inter-element whitespace, but it may also
648+
* contain character references which decode only to whitespace.
649+
*/
650+
$text = $this->get_modifiable_text();
651+
if ( strlen( $text ) !== strspn( $text, " \t\n\f\r" ) ) {
652+
$this->state->frameset_ok = false;
653+
}
654+
655+
// @todo Add support for text nodes: insert node and return "true" where when supported.
656+
return $this->step();
657+
}
658+
626659
switch ( $op ) {
627660
/*
628661
* > A start tag whose tag name is "button"
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
<?php
2+
/**
3+
* Unit tests for the HTML API ensuring proper handling of behaviors related to
4+
* active format reconstruction.
5+
*
6+
* @package WordPress
7+
* @subpackage HTML-API
8+
*
9+
* @since 6.5.0
10+
*
11+
* @group html-api
12+
*
13+
* @coversDefaultClass WP_HTML_Processor
14+
*/
15+
class Tests_HtmlApi_WpHtmlSupportRequiredActiveFormatReconstruction extends WP_UnitTestCase {
16+
/**
17+
* Ensures that active formats are properly reconstructed when visiting text nodes,
18+
* verifying that the proper breadcrumbs are maintained when scanning through HTML.
19+
*
20+
* @ticket {TICKET_NUMBER}
21+
*
22+
* @since 6.5.0
23+
*/
24+
public function test_reconstructs_active_formats_on_text_nodes() {
25+
$processor = WP_HTML_Processor::create_fragment( '<p><b>One<span><p>Two' );
26+
27+
$processor->next_tag( 'span' );
28+
$this->assertSame(
29+
array( 'HTML', 'BODY', 'P', 'B', 'SPAN' ),
30+
$processor->get_breadcrumbs(),
31+
'Should have identified the stack of open elements for the first text node.'
32+
);
33+
34+
$this->assertTrue(
35+
$processor->next_tag( 'p' ),
36+
'Should have found second P element.'
37+
);
38+
39+
/*
40+
* There are two ways this test could fail. One is to appropriately find the
41+
* second text node but fail to reconstruct the implicitly-closed B element.
42+
* The other way is to fail to abort when encountering the second text node
43+
* because the kind of active format reconstruction isn't supported.
44+
*/
45+
46+
if ( $processor->next_token() ) {
47+
$this->assertSame(
48+
array( 'HTML', 'BODY', 'P', 'B', '#text' ),
49+
$processor->get_breadcrumbs(),
50+
'Should have reconstructed the implicitly-closed B element.'
51+
);
52+
} else {
53+
$this->assertSame(
54+
WP_HTML_Processor::ERROR_UNSUPPORTED,
55+
$processor->get_last_error(),
56+
'Should have aborted for incomplete active format reconstruction when encountering the second text node.'
57+
);
58+
}
59+
}
60+
}

0 commit comments

Comments
 (0)