Skip to content

Commit d0062e4

Browse files
committed
HTML API: Fixes for issues discovered while fuzzing.
Fuzz-testing was performed against the HTML API for finding edge cases that might be broken in the existing parsing code. A few issues were discovered with HTML normalization and warnings from out-of-bounds string reads. This patch contains new tests catching regressions on these behaviors and adds fixes for the discovered issues. Patch proposed by Codex and revised by dmsnell. Developed in: WordPress/wordpress-develop#11982 Discussed in: https://core.trac.wordpress.org/ticket/65372 Fixes #65372. Built from https://develop.svn.wordpress.org/trunk@62439 git-svn-id: http://core.svn.wordpress.org/trunk@61720 1a063a9b-81f0-0310-95a4-ce76da25c4cd
1 parent dff8c68 commit d0062e4

5 files changed

Lines changed: 71 additions & 12 deletions

File tree

wp-includes/class-wp-token-map.php

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,10 @@ public static function from_precomputed_table( $state ): ?WP_Token_Map {
440440
* @return bool Whether there's an entry for the given word in the map.
441441
*/
442442
public function contains( string $word, string $case_sensitivity = 'case-sensitive' ): bool {
443+
if ( str_contains( $word, "\x00" ) ) {
444+
return false;
445+
}
446+
443447
$ignore_case = 'ascii-case-insensitive' === $case_sensitivity;
444448

445449
if ( $this->key_length >= strlen( $word ) ) {
@@ -533,9 +537,17 @@ public function read_token( string $text, int $offset = 0, &$matched_token_byte_
533537

534538
// Search for a long word first, if the text is long enough, and if that fails, a short one.
535539
if ( $text_length > $this->key_length ) {
536-
$group_key = substr( $text, $offset, $this->key_length );
540+
/*
541+
* Keys cannot contain null bytes, which is taken care of for the full words,
542+
* but here it’s required to reject group keys with null bytes so that the
543+
* lookup doesn’t get off track when scanning the group string.
544+
*/
545+
if ( strcspn( $text, "\x00", $offset, $this->key_length ) < $this->key_length ) {
546+
return null;
547+
}
537548

538-
$group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key );
549+
$group_key = substr( $text, $offset, $this->key_length );
550+
$group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key );
539551
if ( false === $group_at ) {
540552
// Perhaps a short word then.
541553
return strlen( $this->small_words ) > 0

wp-includes/html-api/class-wp-html-open-elements.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,11 @@ public function after_element_pop( WP_HTML_Token $item ): void {
738738
* When adding support for new elements, expand this switch to trap
739739
* cases where the precalculated value needs to change.
740740
*/
741-
switch ( $item->node_name ) {
741+
$namespaced_name = 'html' === $item->namespace
742+
? $item->node_name
743+
: "{$item->namespace} {$item->node_name}";
744+
745+
switch ( $namespaced_name ) {
742746
case 'APPLET':
743747
case 'BUTTON':
744748
case 'CAPTION':

wp-includes/html-api/class-wp-html-processor.php

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -813,8 +813,14 @@ private function next_visitable_token(): bool {
813813
* until there are events or until there are no more
814814
* tokens works in the meantime and isn't obviously wrong.
815815
*/
816-
if ( empty( $this->element_queue ) && $this->step() ) {
817-
return $this->next_visitable_token();
816+
if ( empty( $this->element_queue ) ) {
817+
if ( $this->step() ) {
818+
return $this->next_visitable_token();
819+
}
820+
821+
if ( isset( $this->last_error ) ) {
822+
return false;
823+
}
818824
}
819825

820826
// Process the next event on the queue.
@@ -1401,6 +1407,7 @@ public function serialize_token(): string {
14011407
$tag_name = str_replace( "\x00", "\u{FFFD}", $this->get_tag() );
14021408
$in_html = 'html' === $this->get_namespace();
14031409
$qualified_name = $in_html ? strtolower( $tag_name ) : $this->get_qualified_tag_name();
1410+
$qualified_name = str_replace( "\x00", "\u{FFFD}", $qualified_name );
14041411

14051412
if ( $this->is_tag_closer() ) {
14061413
$html .= "</{$qualified_name}>";
@@ -1414,15 +1421,36 @@ public function serialize_token(): string {
14141421
}
14151422

14161423
$html .= "<{$qualified_name}";
1424+
1425+
$previous_attribute_was_true = false;
1426+
$seen_attribute_names = array();
14171427
foreach ( $attribute_names as $attribute_name ) {
1418-
$html .= " {$this->get_qualified_attribute_name( $attribute_name )}";
1428+
$qualified_attribute_name = $this->get_qualified_attribute_name( $attribute_name );
1429+
$qualified_attribute_name = str_replace( "\x00", "\u{FFFD}", $qualified_attribute_name );
1430+
$qualified_attribute_name = wp_scrub_utf8( $qualified_attribute_name );
1431+
if ( isset( $seen_attribute_names[ $qualified_attribute_name ] ) ) {
1432+
continue;
1433+
} else {
1434+
$seen_attribute_names[ $qualified_attribute_name ] = true;
1435+
}
1436+
1437+
if (
1438+
$previous_attribute_was_true &&
1439+
isset( $qualified_attribute_name[0] ) &&
1440+
'=' === $qualified_attribute_name[0]
1441+
) {
1442+
$html .= '=""';
1443+
}
1444+
1445+
$html .= " {$qualified_attribute_name}";
14191446
$value = $this->get_attribute( $attribute_name );
14201447

14211448
if ( is_string( $value ) ) {
14221449
$html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"';
14231450
}
14241451

1425-
$html = str_replace( "\x00", "\u{FFFD}", $html );
1452+
$previous_attribute_was_true = true === $value;
1453+
$html = str_replace( "\x00", "\u{FFFD}", $html );
14261454
}
14271455

14281456
if ( ! $in_html && $this->has_self_closing_flag() ) {
@@ -2667,8 +2695,7 @@ private function step_in_body(): bool {
26672695
*/
26682696
case '-FORM':
26692697
if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
2670-
$node = $this->state->form_element;
2671-
$this->state->form_element = null;
2698+
$node = $this->state->form_element;
26722699

26732700
/*
26742701
* > If node is null or if the stack of open elements does not have node
@@ -2681,10 +2708,20 @@ private function step_in_body(): bool {
26812708
null === $node ||
26822709
! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' )
26832710
) {
2684-
// Parse error: ignore the token.
2711+
/*
2712+
* Parse error: ignore the token.
2713+
*
2714+
* Keep the form pointer intact when the end tag is ignored, such as
2715+
* when a FORM closing tag appears inside an SVG TITLE integration
2716+
* point. Otherwise the ignored token changes parser state in a way
2717+
* that serialization cannot represent, allowing a later FORM opener
2718+
* to appear in the first normalization pass and disappear on the second.
2719+
*/
26852720
return $this->step();
26862721
}
26872722

2723+
$this->state->form_element = null;
2724+
26882725
$this->generate_implied_end_tags();
26892726
if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
26902727
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.

wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1424,7 +1424,7 @@ private function skip_rcdata( string $tag_name ): bool {
14241424
$this->tag_name_starts_at = $at;
14251425

14261426
// Fail if there is no possible tag closer.
1427-
if ( false === $at || ( $at + $tag_length ) >= $doc_length ) {
1427+
if ( false === $at || ( $at + 2 + $tag_length ) >= $doc_length ) {
14281428
return false;
14291429
}
14301430

@@ -1815,6 +1815,12 @@ private function parse_next_tag(): bool {
18151815

18161816
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
18171817
$span_of_dashes = strspn( $html, '-', $closer_at );
1818+
if ( $doc_length <= $span_of_dashes + $closer_at ) {
1819+
$this->parser_state = self::STATE_INCOMPLETE_INPUT;
1820+
1821+
return false;
1822+
}
1823+
18181824
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
18191825
/*
18201826
* @todo When implementing `set_modifiable_text()` ensure that updates to this token

wp-includes/version.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
*
1717
* @global string $wp_version
1818
*/
19-
$wp_version = '7.1-alpha-62438';
19+
$wp_version = '7.1-alpha-62439';
2020

2121
/**
2222
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.

0 commit comments

Comments
 (0)