Skip to content

Commit d3286f8

Browse files
committed
Merge branch 'html-api/support-invalid-first-character-in-tag-name-comments' into trunk
2 parents b6b6ded + cff5d31 commit d3286f8

2 files changed

Lines changed: 203 additions & 7 deletions

File tree

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -971,6 +971,7 @@ private function skip_script_data() {
971971
* closing `>`; these are left for other methods.
972972
*
973973
* @since 6.2.0
974+
* @since 6.2.1 Passes over invalid-tag-closer-comments like "</3 this is a comment>".
974975
*
975976
* @return bool Whether a tag was found before the end of the document.
976977
*/
@@ -1035,17 +1036,42 @@ private function parse_next_tag() {
10351036
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
10361037
*/
10371038
if (
1038-
strlen( $html ) > $at + 3 &&
1039+
strlen( $html ) > $at + 4 &&
10391040
'-' === $html[ $at + 2 ] &&
10401041
'-' === $html[ $at + 3 ]
10411042
) {
1042-
$closer_at = strpos( $html, '-->', $at + 4 );
1043-
if ( false === $closer_at ) {
1044-
return false;
1043+
$closer_at = $at + 4;
1044+
1045+
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
1046+
$span_of_dashes = strspn( $html, '-', $closer_at );
1047+
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
1048+
$at = $closer_at + $span_of_dashes + 1;
1049+
continue;
10451050
}
10461051

1047-
$at = $closer_at + 3;
1048-
continue;
1052+
/*
1053+
* Comments may be closed by either a --> or an invalid --!>.
1054+
* The first occurrence closes the comment.
1055+
*
1056+
* See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment
1057+
*/
1058+
$closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping.
1059+
while ( ++$closer_at < strlen( $html ) ) {
1060+
$closer_at = strpos( $html, '--', $closer_at );
1061+
if ( false === $closer_at ) {
1062+
return false;
1063+
}
1064+
1065+
if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) {
1066+
$at = $closer_at + 3;
1067+
continue 2;
1068+
}
1069+
1070+
if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
1071+
$at = $closer_at + 4;
1072+
continue 2;
1073+
}
1074+
}
10491075
}
10501076

10511077
/*
@@ -1104,9 +1130,19 @@ private function parse_next_tag() {
11041130
continue;
11051131
}
11061132

1133+
/*
1134+
* </> is a missing end tag name, which is ignored.
1135+
*
1136+
* See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
1137+
*/
1138+
if ( '>' === $html[ $at + 1 ] ) {
1139+
$at++;
1140+
continue;
1141+
}
1142+
11071143
/*
11081144
* <? transitions to a bogus comment state – skip to the nearest >
1109-
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
1145+
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
11101146
*/
11111147
if ( '?' === $html[ $at + 1 ] ) {
11121148
$closer_at = strpos( $html, '>', $at + 2 );
@@ -1118,6 +1154,22 @@ private function parse_next_tag() {
11181154
continue;
11191155
}
11201156

1157+
/*
1158+
* If a non-alpha starts the tag name in a tag closer it's a comment.
1159+
* Find the first `>`, which closes the comment.
1160+
*
1161+
* See https://github.com/WordPress/wordpress-develop/pull/4256
1162+
*/
1163+
if ( $this->is_closing_tag ) {
1164+
$closer_at = strpos( $html, '>', $at + 3 );
1165+
if ( false === $closer_at ) {
1166+
return false;
1167+
}
1168+
1169+
$at = $closer_at + 1;
1170+
continue;
1171+
}
1172+
11211173
++$at;
11221174
}
11231175

tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,19 @@
66
* @subpackage HTML-API
77
*/
88

9+
if ( ! class_exists( 'WP_UnitTestCase' ) ) {
10+
class WP_UnitTestCase extends PHPUnit\Framework\TestCase {}
11+
12+
require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-attribute-token.php';
13+
require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-span.php';
14+
// require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-spec.php';
15+
require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-text-replacement.php';
16+
require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-tag-processor.php';
17+
// require_once '/Users/dmsnell/code/WordPress-develop/src/wp-includes/html-api/class-wp-html-processor.php';
18+
19+
function esc_attr( $s ) { return str_replace( [ '<', '>', '"' ], [ '&lt;', '&gt;', '&quot;' ], $s ); }
20+
}
21+
922
/**
1023
* @group html-api
1124
*
@@ -1714,6 +1727,47 @@ public function data_next_tag_ignores_script_tag_contents() {
17141727
);
17151728
}
17161729

1730+
/**
1731+
* Invalid tag names are comments on tag closers.
1732+
*
1733+
* See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
1734+
*
1735+
* @ticket 58007
1736+
*
1737+
* @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments
1738+
*
1739+
* @param string $html_with_markers HTML containing an invalid tag closer whose element before and
1740+
* element after contain the "start" and "end" CSS classes.
1741+
*/
1742+
public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers ) {
1743+
$p = new WP_HTML_Tag_Processor( $html_with_markers );
1744+
$p->next_tag( array( 'class_name' => 'start' ) );
1745+
$p->next_tag();
1746+
1747+
$this->assertSame( 'end', $p->get_attribute( 'class' ) );
1748+
}
1749+
1750+
/**
1751+
* Data provider.
1752+
*
1753+
* @return array[]
1754+
*/
1755+
public function data_next_tag_ignores_invalid_first_character_of_tag_name_comments() {
1756+
return array(
1757+
'Invalid tag openers as normal text' => array(
1758+
'<ul><li><div class=start>I <3 when outflow > inflow</div><img class=end></li></ul>',
1759+
),
1760+
1761+
'Invalid tag closers as comments' => array(
1762+
'<ul><li><div class=start>I </3 when <img> outflow <br class=end> inflow</div></li></ul>',
1763+
),
1764+
1765+
'Unexpected question mark instead of tag name' => array(
1766+
'<div class=start><?xml-stylesheet type="text/css" href="style.css"?><hr class=end>',
1767+
),
1768+
);
1769+
}
1770+
17171771
/**
17181772
* @ticket 56299
17191773
*
@@ -1766,6 +1820,96 @@ public function data_next_tag_ignores_contents_of_rcdata_tag() {
17661820
);
17671821
}
17681822

1823+
/**
1824+
* Ensures that the invalid comment closing syntax "--!>" properly closes a comment.
1825+
*
1826+
* @ticket 58007
1827+
* @covers WP_HTML_Tag_Processor::next_tag
1828+
*
1829+
*/
1830+
public function test_allows_incorrectly_closed_comments() {
1831+
$p = new WP_HTML_Tag_Processor( '<img id=before><!-- <img id=inside> --!><img id=after>--><img id=final>' );
1832+
1833+
$p->next_tag();
1834+
$this->assertSame( 'before', $p->get_attribute( 'id' ), 'Did not find starting tag.' );
1835+
1836+
$p->next_tag();
1837+
$this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not properly close improperly-closed comment.' );
1838+
1839+
$p->next_tag();
1840+
$this->assertSame( 'final', $p->get_attribute( 'id' ), 'Did not skip over unopened comment-closer.' );
1841+
}
1842+
1843+
/**
1844+
* Ensures that unclosed and invalid comments don't trigger warnings or errors.
1845+
*
1846+
* @ticket 58007
1847+
*
1848+
* @covers WP_HTML_Tag_Processor::next_tag
1849+
* @dataProvider data_html_with_unclosed_comments
1850+
*
1851+
* @param string $html_ending_before_comment_close HTML with opened comments that aren't closed
1852+
*/
1853+
public function test_documents_may_end_with_unclosed_comment( $html_ending_before_comment_close ) {
1854+
$p = new WP_HTML_Tag_Processor( $html_ending_before_comment_close );
1855+
1856+
$this->assertFalse( $p->next_tag() );
1857+
}
1858+
1859+
/**
1860+
* Data provider.
1861+
*
1862+
* @return array[]
1863+
*/
1864+
public function data_html_with_unclosed_comments() {
1865+
return array(
1866+
'Shortest open valid comment' => array( '<!--' ),
1867+
'Basic truncated comment' => array( '<!-- this ends --' ),
1868+
'Comment with closer look-alike' => array( '<!-- this ends --x' ),
1869+
'Comment with closer look-alike 2' => array( '<!-- this ends --!x' ),
1870+
'Invalid tag-closer comment' => array( '</(when will this madness end?)' ),
1871+
'Invalid tag-closer comment 2' => array( '</(when will this madness end?)--' )
1872+
);
1873+
}
1874+
1875+
/**
1876+
* Ensures that abruptly-closed empty comments are properly closed.
1877+
*
1878+
* @ticket 58007
1879+
*
1880+
* @covers WP_HTML_Tag_Processor::next_tag
1881+
* @dataProvider data_abruptly_closed_empty_comments
1882+
*
1883+
* @param string $html_with_after_marker HTML to test with "id=after" on element immediately following an abruptly closed comment.
1884+
*/
1885+
public function test_closes_abrupt_closing_of_empty_comment( $html_with_after_marker ) {
1886+
$p = new WP_HTML_Tag_Processor( $html_with_after_marker );
1887+
$p->next_tag();
1888+
$p->next_tag();
1889+
1890+
$this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not find tag after closing abruptly-closed comment' );
1891+
}
1892+
1893+
/**
1894+
* Data provider.
1895+
*
1896+
* @return array[]
1897+
*/
1898+
public function data_abruptly_closed_empty_comments() {
1899+
return array(
1900+
'Empty comment with two dashes only' => array( '<hr><!--><hr id=after>' ),
1901+
'Empty comment with two dashes only, improperly closed' => array( '<hr><!--!><hr id=inside>--><hr id=after>' ),
1902+
'Comment with two dashes only, improperly closed twice' => array( '<hr><!--!><hr id=inside>--!><hr id=after>' ),
1903+
'Empty comment with three dashes' => array( '<hr><!---><hr id=after>' ),
1904+
'Empty comment with three dashes, improperly closed' => array( '<hr><!---!><hr id=inside>--><hr id=after>' ),
1905+
'Comment with three dashes, improperly closed twice' => array( '<hr><!---!><hr id=inside>--!><hr id=after>' ),
1906+
'Empty comment with four dashes' => array( '<hr><!----><hr id=after>' ),
1907+
'Empty comment with four dashes, improperly closed' => array( '<hr><!----!><hr id=after>--><hr id=final>' ),
1908+
'Comment with four dashes, improperly closed twice' => array( '<hr><!----!><hr id=after>--!><hr id=final>' ),
1909+
'Comment with almost-closer inside' => array( '<hr><!-- ---!><hr id=after>--!><hr id=final>' ),
1910+
);
1911+
}
1912+
17691913
/**
17701914
* @ticket 56299
17711915
*

0 commit comments

Comments
 (0)