Skip to content

Commit 59f4133

Browse files
committed
Add some support for adoption agency algorithm
1 parent ddd8579 commit 59f4133

3 files changed

Lines changed: 122 additions & 21 deletions

File tree

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 108 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1553,12 +1553,7 @@ private function run_adoption_agency_algorithm() {
15531553
return;
15541554
}
15551555

1556-
$outer_loop_counter = 0;
1557-
while ( $budget-- > 0 ) {
1558-
if ( $outer_loop_counter++ >= 8 ) {
1559-
return;
1560-
}
1561-
1556+
for ( $outer_loop_counter = 0; $outer_loop_counter < 8; $outer_loop_counter++ ) {
15621557
/*
15631558
* > Let formatting element be the last element in the list of active formatting elements that:
15641559
* > - is between the end of the list and the last marker in the list,
@@ -1579,8 +1574,35 @@ private function run_adoption_agency_algorithm() {
15791574

15801575
// > If there is no such element, then return and instead act as described in the "any other end tag" entry above.
15811576
if ( null === $formatting_element ) {
1582-
$this->last_error = self::ERROR_UNSUPPORTED;
1583-
throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when "any other end tag" is required.' );
1577+
/*
1578+
* > Any other end tag
1579+
*/
1580+
1581+
/*
1582+
* Find the corresponding tag opener in the stack of open elements, if
1583+
* it exists before reaching a special element, which provides a kind
1584+
* of boundary in the stack. For example, a `</custom-tag>` should not
1585+
* close anything beyond its containing `P` or `DIV` element.
1586+
*/
1587+
foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) {
1588+
if ( $subject === $node->node_name ) {
1589+
break;
1590+
}
1591+
1592+
if ( self::is_special( $node->node_name ) ) {
1593+
// This is a parse error, ignore the token.
1594+
return;
1595+
}
1596+
}
1597+
1598+
$this->generate_implied_end_tags( $subject );
1599+
1600+
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
1601+
$this->state->stack_of_open_elements->pop();
1602+
if ( $node === $item ) {
1603+
return;
1604+
}
1605+
}
15841606
}
15851607

15861608
// > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return.
@@ -1594,14 +1616,18 @@ private function run_adoption_agency_algorithm() {
15941616
return;
15951617
}
15961618

1619+
/*
1620+
* > If formatting element is not the current node, this is a parse error. (But do not return.)
1621+
*/
1622+
15971623
/*
15981624
* > Let furthest block be the topmost node in the stack of open elements that is lower in the stack
15991625
* > than formatting element, and is an element in the special category. There might not be one.
16001626
*/
16011627
$is_above_formatting_element = true;
16021628
$furthest_block = null;
16031629
foreach ( $this->state->stack_of_open_elements->walk_down() as $item ) {
1604-
if ( $is_above_formatting_element && $formatting_element->bookmark_name !== $item->bookmark_name ) {
1630+
if ( $is_above_formatting_element && $formatting_element !== $item ) {
16051631
continue;
16061632
}
16071633

@@ -1625,19 +1651,87 @@ private function run_adoption_agency_algorithm() {
16251651
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
16261652
$this->state->stack_of_open_elements->pop();
16271653

1628-
if ( $formatting_element->bookmark_name === $item->bookmark_name ) {
1654+
if ( $formatting_element === $item ) {
16291655
$this->state->active_formatting_elements->remove_node( $formatting_element );
16301656
return;
16311657
}
16321658
}
16331659
}
16341660

1661+
/*
1662+
* > Let common ancestor be the element immediately above formatting element in the stack of open elements.
1663+
*/
1664+
$common_ancestor = null;
1665+
foreach ( $this->state->stack_of_open_elements->walk_up( $formatting_element ) as $item ) {
1666+
$common_ancestor = $item;
1667+
break;
1668+
}
1669+
1670+
/*
1671+
* Let a bookmark note the position of formatting element in the list of active formatting elements relative to the elements on either side of it in the list.
1672+
*/
1673+
$formatting_element_index = 0;
1674+
foreach ( $this->state->active_formatting_elements->walk_down() as $item ) {
1675+
if ( $formatting_element === $item ) {
1676+
break;
1677+
}
1678+
1679+
++$formatting_element_index;
1680+
}
1681+
1682+
/*
1683+
* > Let node and last node be furthest block.
1684+
*/
1685+
$node = $furthest_block;
1686+
$last_node = $furthest_block;
1687+
1688+
$inner_loop_counter = 0;
1689+
while ( $budget-- > 0 ) {
1690+
++$inner_loop_counter;
1691+
1692+
if ( $this->state->stack_of_open_elements->contains_node( $node ) ) {
1693+
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
1694+
$node = $item;
1695+
break;
1696+
}
1697+
} else {
1698+
$this->last_error = self::ERROR_UNSUPPORTED;
1699+
throw new WP_HTML_Unsupported_Exception( 'Cannot adjust node pointer above removed node.' );
1700+
}
1701+
1702+
if ( $formatting_element === $node ) {
1703+
break;
1704+
}
1705+
1706+
if ( $inner_loop_counter > 3 && $this->state->active_formatting_elements->contains_node( $node ) ) {
1707+
$this->state->active_formatting_elements->remove_node( $node );
1708+
}
1709+
1710+
if ( ! $this->state->active_formatting_elements->contains_node( $node ) ) {
1711+
$this->state->stack_of_open_elements->remove_node( $node );
1712+
continue;
1713+
}
1714+
1715+
/*
1716+
* > Create an element for the token for which the element node was created,
1717+
* in the HTML namespace, with common ancestor as the intended parent;
1718+
* replace the entry for node in the list of active formatting elements
1719+
* with an entry for the new element, replace the entry for node in the
1720+
* stack of open elements with an entry for the new element, and let node
1721+
* be the new element.
1722+
*/
1723+
$this->last_error = self::ERROR_UNSUPPORTED;
1724+
throw new WP_HTML_Unsupported_Exception( 'Cannot create and reference new element for which no token exists.' );
1725+
}
1726+
1727+
/*
1728+
* > Insert whatever last node ended up being in the previous step at the appropriate
1729+
* > palce for inserting a node, but using common ancestor as the override target.
1730+
*/
1731+
16351732
$this->last_error = self::ERROR_UNSUPPORTED;
1636-
throw new WP_HTML_Unsupported_Exception( 'Cannot extract common ancestor in adoption agency algorithm.' );
1733+
throw new WP_HTML_Unsupported_Exception( 'Cannot create and reference new element for which no token exists.' );
16371734
}
1638-
1639-
$this->last_error = self::ERROR_UNSUPPORTED;
1640-
throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when looping required.' );
16411735
}
16421736

16431737
/**

tests/phpunit/tests/html-api/wpHtmlProcessor.php

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,11 +125,16 @@ public function test_clear_to_navigate_after_seeking() {
125125
*
126126
* @covers WP_HTML_Processor::reconstruct_active_formatting_elements
127127
*/
128-
public function test_fails_to_reconstruct_formatting_elements() {
128+
public function test_reconstructs_formatting_elements() {
129129
$p = WP_HTML_Processor::create_fragment( '<p><em>One<p><em>Two<p><em>Three<p><em>Four' );
130130

131131
$this->assertTrue( $p->next_tag( 'EM' ), 'Could not find first EM.' );
132-
$this->assertFalse( $p->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
132+
$this->assertTrue( $p->next_tag( 'EM' ), 'Should have found second EM.' );
133+
$this->assertSame(
134+
array( 'HTML', 'BODY', 'P', 'EM', 'EM' ),
135+
$p->get_breadcrumbs(),
136+
'Should have reconstructed the previous EM before finding the second.'
137+
);
133138
}
134139

135140
/**

tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,6 @@ public function test_fails_when_encountering_unsupported_markup( $html, $descrip
251251
*/
252252
public function data_unsupported_markup() {
253253
return array(
254-
'A with formatting following unclosed A' => array(
255-
'<a><strong>Click <a supported><big unsupported>Here</big></a></strong></a>',
256-
'Unclosed formatting requires complicated reconstruction.',
257-
),
258-
259254
'A after unclosed A inside DIV' => array(
260255
'<a><div supported><a unsupported></div></a>',
261256
'A is a formatting element, which requires more complicated reconstruction.',
@@ -340,6 +335,13 @@ public function data_html_target_with_breadcrumbs() {
340335
'P after closed P' => array( '<p><i>something</i></p><p target>This one</p>', array( 'HTML', 'BODY', 'P' ), 2 ),
341336
'A after unclosed A' => array( '<a><a target>', array( 'HTML', 'BODY', 'A' ), 2 ),
342337
'A after unclosed A, after a P' => array( '<p><a><a target>', array( 'HTML', 'BODY', 'P', 'A' ), 2 ),
338+
339+
'A with formatting following unclosed A' => array(
340+
'<a><strong>Click <a><big target>Here</big></a></strong></a>',
341+
array( 'HTML', 'BODY', 'STRONG', 'A', 'BIG' ),
342+
1
343+
),
344+
343345
// This one adds a test at a deep stack depth to ensure things work for situations beyond short test docs.
344346
'Large HTML document with deep P' => array(
345347
'<div><div><div><div><div><div><div><div><p></p><p></p><p><div><strong><em><code></code></em></strong></div></p></div></div></div></div></div></div></div></div><div><div><div><div><div><div><div><div><p></p><p></p><p><div><strong><em><code target></code></em></strong></div></p></div></div></div></div></div></div></div></div>',

0 commit comments

Comments
 (0)