Skip to content

Commit ab1096f

Browse files
committed
HTML API: Implement "reconstruct the active formatting elements" algorithm.
As part of work to add more spec support to the HTML API, this patch fills out the active format reconstruction algorithm so that more HTML can be supported in situations requiring that reconstruction, for example, when a formatting element such as an A tag or a CODE tag is implicitly closed. See Core-61576
1 parent 6962fa2 commit ab1096f

4 files changed

Lines changed: 120 additions & 38 deletions

File tree

src/wp-includes/html-api/class-wp-html-active-formatting-elements.php

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,22 @@ class WP_HTML_Active_Formatting_Elements {
4343
*/
4444
private $stack = array();
4545

46+
/**
47+
* Returns the node at the given index in the list of active formatting elements.
48+
*
49+
* Do not use this method; it is meant to be used only by the HTML Processor.
50+
*
51+
* @since 6.7.0
52+
*
53+
* @access private
54+
*
55+
* @param int $index Number of nodes from the top node to return.
56+
* @return WP_HTML_Token|null Node at the given index in the stack, if one exists, otherwise null.
57+
*/
58+
public function at( $index ) {
59+
return $this->stack[ $index ];
60+
}
61+
4662
/**
4763
* Reports if a specific node is in the stack of active formatting elements.
4864
*
@@ -86,6 +102,22 @@ public function current_node() {
86102
return $current_node ? $current_node : null;
87103
}
88104

105+
/**
106+
* Inserts a "marker" at the end of the list of active formatting elements.
107+
*
108+
* > The markers are inserted when entering applet, object, marquee,
109+
* > template, td, th, and caption elements, and are used to prevent
110+
* > formatting from "leaking" into applet, object, marquee, template,
111+
* > td, th, and caption elements.
112+
*
113+
* @see https://html.spec.whatwg.org/#concept-parser-marker
114+
*
115+
* @since 6.7.0
116+
*/
117+
public function insert_marker() {
118+
$this->push( new WP_HTML_Token( null, 'marker', false ) );
119+
}
120+
89121
/**
90122
* Pushes a node onto the stack of active formatting elements.
91123
*

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2864,7 +2864,7 @@ private function generate_implied_end_tags_thoroughly() {
28642864
* > in the current body, cell, or caption (whichever is youngest) that haven't
28652865
* > been explicitly closed.
28662866
*
2867-
* @since 6.4.0
2867+
* @since 6.7.0
28682868
*
28692869
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
28702870
*
@@ -2873,15 +2873,19 @@ private function generate_implied_end_tags_thoroughly() {
28732873
* @return bool Whether any formatting elements needed to be reconstructed.
28742874
*/
28752875
private function reconstruct_active_formatting_elements() {
2876+
$count = $this->state->active_formatting_elements->count();
2877+
28762878
/*
28772879
* > If there are no entries in the list of active formatting elements, then there is nothing
28782880
* > to reconstruct; stop this algorithm.
28792881
*/
2880-
if ( 0 === $this->state->active_formatting_elements->count() ) {
2882+
if ( 0 === $count ) {
28812883
return false;
28822884
}
28832885

2884-
$last_entry = $this->state->active_formatting_elements->current_node();
2886+
// Start at the last node in the list of active formatting elements.
2887+
$currently_at = $count - 1;
2888+
$last_entry = $this->state->active_formatting_elements->at( $currently_at );
28852889
if (
28862890

28872891
/*
@@ -2900,8 +2904,39 @@ private function reconstruct_active_formatting_elements() {
29002904
return false;
29012905
}
29022906

2903-
$this->last_error = self::ERROR_UNSUPPORTED;
2904-
throw new WP_HTML_Unsupported_Exception( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' );
2907+
$entry = $last_entry;
2908+
2909+
while ( $currently_at >= 0 ) {
2910+
if ( 0 === $currently_at ) {
2911+
goto create;
2912+
}
2913+
$entry = $this->state->active_formatting_elements->at( --$currently_at );
2914+
2915+
/*
2916+
* > If entry is neither a marker nor an element that is also in the stack of open elements,
2917+
* > go to the step labeled rewind.
2918+
*/
2919+
if ( 'marker' === $entry->node_name || $this->state->stack_of_open_elements->contains_node( $entry ) ) {
2920+
break;
2921+
}
2922+
}
2923+
2924+
advance:
2925+
$entry = $this->state->active_formatting_elements->at( ++$currently_at );
2926+
2927+
create:
2928+
$this->insert_html_element( $entry );
2929+
2930+
/*
2931+
* > Replace the entry for entry in the list with an entry for new element.
2932+
* This doesn't need to happen here since no DOM is being created.
2933+
*/
2934+
2935+
if ( $count - 1 !== $currently_at ) {
2936+
goto advance;
2937+
}
2938+
2939+
return true;
29052940
}
29062941

29072942
/**

tests/phpunit/tests/html-api/wpHtmlProcessor.php

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -112,18 +112,23 @@ public function test_clear_to_navigate_after_seeking() {
112112
}
113113

114114
/**
115-
* Ensures that support is added for reconstructing active formatting elements
116-
* before the HTML Processor handles situations with unclosed formats requiring it.
115+
* Ensures that support is added for reconstructing active formatting elements.
117116
*
118117
* @ticket 58517
119118
*
120119
* @covers WP_HTML_Processor::reconstruct_active_formatting_elements
121120
*/
122-
public function test_fails_to_reconstruct_formatting_elements() {
123-
$processor = WP_HTML_Processor::create_fragment( '<p><em>One<p><em>Two<p><em>Three<p><em>Four' );
121+
public function test_reconstructs_formatting_elements() {
122+
$processor = WP_HTML_Processor::create_fragment( '<p><em>One<p><em><span>Two<p><em>Three<p><em>Four' );
124123

125124
$this->assertTrue( $processor->next_tag( 'EM' ), 'Could not find first EM.' );
126-
$this->assertFalse( $processor->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
125+
$this->assertSame( array( 'HTML', 'BODY', 'P', 'EM' ), $processor->get_breadcrumbs(), 'Found incorrect breadcrumbs for first EM.' );
126+
$this->assertTrue( $processor->next_tag( 'SPAN' ), 'Could not find test span.' );
127+
$this->assertSame(
128+
array( 'HTML', 'BODY', 'P', 'EM', 'EM', 'SPAN' ),
129+
$processor->get_breadcrumbs(),
130+
'Found incorrect breadcrumbs for test SPAN; should have created two EMs.'
131+
);
127132
}
128133

129134
/**

tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php

Lines changed: 38 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -219,45 +219,55 @@ public static function data_unsupported_elements() {
219219
}
220220

221221
/**
222-
* @ticket 58517
223-
*
224-
* @dataProvider data_unsupported_markup
222+
* Ensures that formats inside unclosed A elements are reconstructed.
225223
*
226-
* @param string $html HTML containing unsupported markup.
224+
* @ticket 61576
227225
*/
228-
public function test_fails_when_encountering_unsupported_markup( $html, $description ) {
229-
$processor = WP_HTML_Processor::create_fragment( $html );
230-
231-
while ( $processor->next_token() && null === $processor->get_attribute( 'supported' ) ) {
232-
continue;
233-
}
226+
public function test_reconstructs_formatting_from_unclosed_a_elements() {
227+
$processor = WP_HTML_Processor::create_fragment( '<a><strong>Click <a><big>Here</big></a></strong></a>' );
234228

235-
$this->assertNull(
236-
$processor->get_last_error(),
237-
'Bailed on unsupported input before finding supported checkpoint: check test code.'
229+
$processor->next_tag( 'STRONG' );
230+
$this->assertSame(
231+
array( 'HTML', 'BODY', 'A', 'STRONG' ),
232+
$processor->get_breadcrumbs(),
233+
'Failed to construct starting breadcrumbs properly.'
238234
);
239235

240-
$this->assertTrue( $processor->get_attribute( 'supported' ), 'Did not find required supported element.' );
241-
$processor->next_token();
242-
$this->assertNotNull( $processor->get_last_error(), "Didn't properly reject unsupported markup: {$description}" );
236+
$processor->next_tag( 'BIG' );
237+
$this->assertSame(
238+
array( 'HTML', 'BODY', 'STRONG', 'A', 'BIG' ),
239+
$processor->get_breadcrumbs(),
240+
'Failed to reconstruct the active formatting elements after an unclosed A element.'
241+
);
243242
}
244243

245244
/**
246-
* Data provider.
245+
* Ensures that unclosed A elements are reconstructed.
247246
*
248-
* @return array[]
247+
* @ticket 61576
249248
*/
250-
public static function data_unsupported_markup() {
251-
return array(
252-
'A with formatting following unclosed A' => array(
253-
'<a><strong>Click <span supported><a unsupported><big>Here</big></a></strong></a>',
254-
'Unclosed formatting requires complicated reconstruction.',
255-
),
249+
public function test_reconstructs_unclosed_a_elements() {
250+
$processor = WP_HTML_Processor::create_fragment( '<a><div><a></div></a>' );
256251

257-
'A after unclosed A inside DIV' => array(
258-
'<a><div supported><a unsupported></div></a>',
259-
'A is a formatting element, which requires more complicated reconstruction.',
260-
),
252+
$processor->next_tag( 'DIV' );
253+
$this->assertSame(
254+
array( 'HTML', 'BODY', 'DIV' ),
255+
$processor->get_breadcrumbs(),
256+
'Failed to construct breadcrumbs properly - the DIV should have closed the A element.'
257+
);
258+
259+
// When the DIV re-opens, it reconstructs an unclosed A, then the A in the text is a second A.
260+
$processor->next_tag( 'A' );
261+
$this->assertSame(
262+
array( 'HTML', 'BODY', 'DIV', 'A' ),
263+
'Failed to create proper breadcrumbs for recreated A element.'
264+
);
265+
266+
// This is the one that's second in the raw text.
267+
$processor->next_tag( 'A' );
268+
$this->assertSame(
269+
array( 'HTML', 'BODY', 'DIV', 'A' ),
270+
'Failed to create proper breadcrumbs for explicit A element - this A should have closed the reconstructed A.'
261271
);
262272
}
263273

0 commit comments

Comments
 (0)