Skip to content

Commit 7e4ecc7

Browse files
committed
HTML API: Handle adoption agency fallback end tags
1 parent 095fab5 commit 7e4ecc7

3 files changed

Lines changed: 144 additions & 35 deletions

File tree

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 54 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2913,8 +2913,7 @@ private function step_in_body(): bool {
29132913
case '-STRONG':
29142914
case '-TT':
29152915
case '-U':
2916-
$this->run_adoption_agency_algorithm();
2917-
return true;
2916+
return $this->run_adoption_agency_algorithm();
29182917

29192918
/*
29202919
* > A start tag whose tag name is one of: "applet", "marquee", "object"
@@ -3246,41 +3245,57 @@ private function step_in_body(): bool {
32463245
$this->insert_html_element( $this->state->current_token );
32473246
return true;
32483247
} else {
3249-
/*
3250-
* > Any other end tag
3251-
*/
3248+
return $this->step_in_body_any_other_end_tag();
3249+
}
32523250

3253-
/*
3254-
* Find the corresponding tag opener in the stack of open elements, if
3255-
* it exists before reaching a special element, which provides a kind
3256-
* of boundary in the stack. For example, a `</custom-tag>` should not
3257-
* close anything beyond its containing `P` or `DIV` element.
3258-
*/
3259-
foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
3260-
if ( 'html' === $node->namespace && $token_name === $node->node_name ) {
3261-
break;
3262-
}
3251+
$this->bail( 'Should not have been able to reach end of IN BODY processing. Check HTML API code.' );
3252+
// This unnecessary return prevents tools from inaccurately reporting type errors.
3253+
return false;
3254+
}
32633255

3264-
if ( self::is_special( $node ) ) {
3265-
// This is a parse error, ignore the token.
3266-
return $this->step();
3267-
}
3256+
/**
3257+
* Parses an "any other end tag" token in the "in body" insertion mode.
3258+
*
3259+
* @since 6.9.0
3260+
* @ignore
3261+
*
3262+
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
3263+
*
3264+
* @return bool Whether an element was found.
3265+
*/
3266+
private function step_in_body_any_other_end_tag(): bool {
3267+
$token_name = $this->get_token_name();
3268+
3269+
/*
3270+
* Find the corresponding tag opener in the stack of open elements, if
3271+
* it exists before reaching a special element, which provides a kind
3272+
* of boundary in the stack. For example, a `</custom-tag>` should not
3273+
* close anything beyond its containing `P` or `DIV` element.
3274+
*/
3275+
foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
3276+
if ( 'html' === $node->namespace && $token_name === $node->node_name ) {
3277+
break;
32683278
}
32693279

3270-
$this->generate_implied_end_tags( $token_name );
3271-
if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
3272-
// @todo Record parse error: this error doesn't impact parsing.
3280+
if ( self::is_special( $node ) ) {
3281+
// This is a parse error, ignore the token.
3282+
return $this->step();
32733283
}
3284+
}
32743285

3275-
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
3276-
$this->state->stack_of_open_elements->pop();
3277-
if ( $node === $item ) {
3278-
return true;
3279-
}
3286+
$this->generate_implied_end_tags( $token_name );
3287+
if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
3288+
// @todo Record parse error: this error doesn't impact parsing.
3289+
}
3290+
3291+
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
3292+
$this->state->stack_of_open_elements->pop();
3293+
if ( $node === $item ) {
3294+
return true;
32803295
}
32813296
}
32823297

3283-
$this->bail( 'Should not have been able to reach end of IN BODY processing. Check HTML API code.' );
3298+
$this->bail( 'Should not have been able to reach end of IN BODY "any other end tag" processing. Check HTML API code.' );
32843299
// This unnecessary return prevents tools from inaccurately reporting type errors.
32853300
return false;
32863301
}
@@ -6216,8 +6231,10 @@ private function reset_insertion_mode_appropriately(): void {
62166231
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
62176232
*
62186233
* @see https://html.spec.whatwg.org/#adoption-agency-algorithm
6234+
*
6235+
* @return bool Whether an element was found.
62196236
*/
6220-
private function run_adoption_agency_algorithm(): void {
6237+
private function run_adoption_agency_algorithm(): bool {
62216238
$budget = 1000;
62226239
$subject = $this->get_tag();
62236240
$current_node = $this->state->stack_of_open_elements->current_node();
@@ -6229,13 +6246,13 @@ private function run_adoption_agency_algorithm(): void {
62296246
! $this->state->active_formatting_elements->contains_node( $current_node )
62306247
) {
62316248
$this->state->stack_of_open_elements->pop();
6232-
return;
6249+
return true;
62336250
}
62346251

62356252
$outer_loop_counter = 0;
62366253
while ( $budget-- > 0 ) {
62376254
if ( $outer_loop_counter++ >= 8 ) {
6238-
return;
6255+
return true;
62396256
}
62406257

62416258
/*
@@ -6258,18 +6275,18 @@ private function run_adoption_agency_algorithm(): void {
62586275

62596276
// > If there is no such element, then return and instead act as described in the "any other end tag" entry above.
62606277
if ( null === $formatting_element ) {
6261-
$this->bail( 'Cannot run adoption agency when "any other end tag" is required.' );
6278+
return $this->step_in_body_any_other_end_tag();
62626279
}
62636280

62646281
// > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return.
62656282
if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) {
62666283
$this->state->active_formatting_elements->remove_node( $formatting_element );
6267-
return;
6284+
return true;
62686285
}
62696286

62706287
// > If formatting element is in the stack of open elements, but the element is not in scope, then this is a parse error; return.
62716288
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $formatting_element->node_name ) ) {
6272-
return;
6289+
return true;
62736290
}
62746291

62756292
/*
@@ -6305,7 +6322,7 @@ private function run_adoption_agency_algorithm(): void {
63056322

63066323
if ( $formatting_element->bookmark_name === $item->bookmark_name ) {
63076324
$this->state->active_formatting_elements->remove_node( $formatting_element );
6308-
return;
6325+
return true;
63096326
}
63106327
}
63116328
}
@@ -6314,6 +6331,8 @@ private function run_adoption_agency_algorithm(): void {
63146331
}
63156332

63166333
$this->bail( 'Cannot run adoption agency when looping required.' );
6334+
// This unnecessary return prevents tools from inaccurately reporting type errors.
6335+
return false;
63176336
}
63186337

63196338
/**

tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,36 @@ public function test_unexpected_closing_tags_are_removed() {
142142
);
143143
}
144144

145+
/**
146+
* Ensures that unexpected closing formatting tags are ignored.
147+
*
148+
* @ticket 65372
149+
*
150+
* @dataProvider data_unexpected_closing_formatting_tags
151+
*
152+
* @param string $html HTML containing an unexpected closing formatting tag.
153+
* @param string $expected Expected normalized output.
154+
*/
155+
public function test_unexpected_closing_formatting_tags_are_ignored( string $html, string $expected ) {
156+
$this->assertSame(
157+
$expected,
158+
WP_HTML_Processor::normalize( $html ),
159+
'Should have ignored unexpected closing formatting tags.'
160+
);
161+
}
162+
163+
/**
164+
* Data provider.
165+
*
166+
* @return array[]
167+
*/
168+
public static function data_unexpected_closing_formatting_tags() {
169+
return array(
170+
'Unexpected A end tag' => array( 'one</a>two', 'onetwo' ),
171+
'Unexpected B end tag' => array( 'one</b>two', 'onetwo' ),
172+
);
173+
}
174+
145175
/**
146176
* Ensures that self-closing elements in foreign content retain their self-closing flag.
147177
*

tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,66 @@ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element
405405
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'DIV' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting: SPAN should be closed and DIV should be its sibling.' );
406406
}
407407

408+
/**
409+
* Verifies that when the adoption agency algorithm finds no matching
410+
* active formatting element, it acts like "any other end tag".
411+
*
412+
* @covers WP_HTML_Processor::step_in_body
413+
*
414+
* @ticket 65372
415+
*
416+
* @dataProvider data_in_body_adoption_agency_falls_back_to_any_other_end_tag
417+
*
418+
* @param string $formatting_tag_name Formatting tag name with no active formatting element.
419+
*/
420+
public function test_in_body_adoption_agency_falls_back_to_any_other_end_tag( string $formatting_tag_name ) {
421+
$processor = WP_HTML_Processor::create_fragment( "<div><span></{$formatting_tag_name}><code target></code></span></div>" );
422+
423+
$processor->next_tag( 'SPAN' );
424+
$this->assertSame( 'SPAN', $processor->get_tag(), "Expected to start test on SPAN element but found {$processor->get_tag()} instead." );
425+
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN' ), $processor->get_breadcrumbs(), 'Failed to produce expected DOM nesting.' );
426+
427+
$this->assertTrue( $processor->next_tag( 'CODE' ), "Failed to ignore unexpected {$formatting_tag_name} closer and advance to CODE opener." );
428+
$this->assertSame( 'CODE', $processor->get_tag(), "Expected to find CODE element, but found {$processor->get_tag()} instead." );
429+
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'CODE' ), $processor->get_breadcrumbs(), 'Failed to keep SPAN open after unexpected formatting closer.' );
430+
}
431+
432+
/**
433+
* Verifies that the adoption agency fallback preserves the "any other end tag"
434+
* step result when the ignored token is followed by EOF.
435+
*
436+
* @covers WP_HTML_Processor::step_in_body
437+
*
438+
* @ticket 65372
439+
*
440+
* @dataProvider data_in_body_adoption_agency_falls_back_to_any_other_end_tag
441+
*
442+
* @param string $formatting_tag_name Formatting tag name with no active formatting element.
443+
*/
444+
public function test_in_body_adoption_agency_fallback_preserves_any_other_end_tag_step_result( string $formatting_tag_name ) {
445+
$ordinary_processor = WP_HTML_Processor::create_fragment( '<span></x>' );
446+
$this->assertTrue( $ordinary_processor->step(), 'Failed to find the SPAN opener before an ordinary unexpected end tag.' );
447+
$this->assertSame( 'SPAN', $ordinary_processor->get_tag(), "Expected to start test on SPAN element but found {$ordinary_processor->get_tag()} instead." );
448+
$this->assertFalse( $ordinary_processor->step(), 'Expected ordinary unexpected end tag followed by EOF to return false.' );
449+
450+
$formatting_processor = WP_HTML_Processor::create_fragment( "<span></{$formatting_tag_name}>" );
451+
$this->assertTrue( $formatting_processor->step(), 'Failed to find the SPAN opener before an unexpected formatting end tag.' );
452+
$this->assertSame( 'SPAN', $formatting_processor->get_tag(), "Expected to start test on SPAN element but found {$formatting_processor->get_tag()} instead." );
453+
$this->assertFalse( $formatting_processor->step(), 'Expected unexpected formatting end tag followed by EOF to return false.' );
454+
}
455+
456+
/**
457+
* Data provider.
458+
*
459+
* @return array[]
460+
*/
461+
public static function data_in_body_adoption_agency_falls_back_to_any_other_end_tag() {
462+
return array(
463+
'Unexpected A end tag' => array( 'a' ),
464+
'Unexpected B end tag' => array( 'b' ),
465+
);
466+
}
467+
408468
/**
409469
* Ensures that closing `</br>` tags are appropriately treated as opening tags with no attributes.
410470
*

0 commit comments

Comments
 (0)