Skip to content

Commit 7c1c48c

Browse files
committed
HTML API: Add support for list elements.
Adds support for the following HTML elements to the HTML Processor: - LI, OL, UL. - DD, DL, DT. Previously, these elements were not supported and the HTML Processor would bail when encountering them. With this patch it will proceed to parse an HTML document when encountering those tags as long as other normal conditions don't cause it to bail (such as complicated format reconstruction). Props audrasjb, jonsurrell, bernhard-reiter. Fixes #60215. git-svn-id: https://develop.svn.wordpress.org/trunk@57264 602fd350-edb4-49c9-b593-d223f7449a82
1 parent eff1a3d commit 7c1c48c

9 files changed

Lines changed: 704 additions & 44 deletions

phpcs.xml.dist

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,15 @@
250250
<exclude-pattern>/wp-tests-config-sample\.php</exclude-pattern>
251251
</rule>
252252

253+
<!-- Exclude forbidding goto in the HTML Processor, which mimics algorithms that are written
254+
this way in the HTML specification, and these particular algorithms are complex and
255+
highly imperative. Avoiding the goto introduces a number of risks that could make it
256+
more difficult to maintain the relationship to the standard, lead to subtle differences
257+
in the parsing, and distance the code from its standard. -->
258+
<rule ref="Generic.PHP.DiscourageGoto.Found">
259+
<exclude-pattern>/wp-includes/html-api/class-wp-html-processor\.php</exclude-pattern>
260+
</rule>
261+
253262
<!-- Exclude sample config from modernization to prevent breaking CI workflows based on WP-CLI scaffold.
254263
See: https://core.trac.wordpress.org/ticket/48082#comment:16 -->
255264
<rule ref="Modernize.FunctionCalls.Dirname.FileConstant">

src/wp-includes/html-api/class-wp-html-open-elements.php

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ public function has_element_in_specific_scope( $tag_name, $termination_list ) {
129129
}
130130

131131
if ( in_array( $node->node_name, $termination_list, true ) ) {
132-
return true;
132+
return false;
133133
}
134134
}
135135

@@ -166,18 +166,22 @@ public function has_element_in_scope( $tag_name ) {
166166
* Returns whether a particular element is in list item scope.
167167
*
168168
* @since 6.4.0
169+
* @since 6.5.0 Implemented: no longer throws on every invocation.
169170
*
170171
* @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope
171172
*
172-
* @throws WP_HTML_Unsupported_Exception Always until this function is implemented.
173-
*
174173
* @param string $tag_name Name of tag to check.
175174
* @return bool Whether given element is in scope.
176175
*/
177176
public function has_element_in_list_item_scope( $tag_name ) {
178-
throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on list item scope.' );
179-
180-
return false; // The linter requires this unreachable code until the function is implemented and can return.
177+
return $this->has_element_in_specific_scope(
178+
$tag_name,
179+
array(
180+
// There are more elements that belong here which aren't currently supported.
181+
'OL',
182+
'UL',
183+
)
184+
);
181185
}
182186

183187
/**
@@ -375,10 +379,22 @@ public function walk_down() {
375379
* see WP_HTML_Open_Elements::walk_down().
376380
*
377381
* @since 6.4.0
382+
* @since 6.5.0 Accepts $above_this_node to start traversal above a given node, if it exists.
383+
*
384+
* @param ?WP_HTML_Token $above_this_node Start traversing above this node, if provided and if the node exists.
378385
*/
379-
public function walk_up() {
386+
public function walk_up( $above_this_node = null ) {
387+
$has_found_node = null === $above_this_node;
388+
380389
for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) {
381-
yield $this->stack[ $i ];
390+
$node = $this->stack[ $i ];
391+
392+
if ( ! $has_found_node ) {
393+
$has_found_node = $node === $above_this_node;
394+
continue;
395+
}
396+
397+
yield $node;
382398
}
383399
}
384400

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@
105105
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
106106
* - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
107107
* - Links: A.
108-
* - Lists: DL.
108+
* - Lists: DD, DL, DT, LI, OL, LI.
109109
* - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
110110
* - Paragraph: P.
111111
* - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
@@ -648,10 +648,12 @@ private function step_in_body() {
648648
case '+MAIN':
649649
case '+MENU':
650650
case '+NAV':
651+
case '+OL':
651652
case '+P':
652653
case '+SEARCH':
653654
case '+SECTION':
654655
case '+SUMMARY':
656+
case '+UL':
655657
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
656658
$this->close_a_p_element();
657659
}
@@ -685,9 +687,11 @@ private function step_in_body() {
685687
case '-MAIN':
686688
case '-MENU':
687689
case '-NAV':
690+
case '-OL':
688691
case '-SEARCH':
689692
case '-SECTION':
690693
case '-SUMMARY':
694+
case '-UL':
691695
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name ) ) {
692696
// @todo Report parse error.
693697
// Ignore the token.
@@ -755,6 +759,109 @@ private function step_in_body() {
755759
$this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
756760
return true;
757761

762+
/*
763+
* > A start tag whose tag name is "li"
764+
* > A start tag whose tag name is one of: "dd", "dt"
765+
*/
766+
case '+DD':
767+
case '+DT':
768+
case '+LI':
769+
$this->state->frameset_ok = false;
770+
$node = $this->state->stack_of_open_elements->current_node();
771+
$is_li = 'LI' === $tag_name;
772+
773+
in_body_list_loop:
774+
/*
775+
* The logic for LI and DT/DD is the same except for one point: LI elements _only_
776+
* close other LI elements, but a DT or DD element closes _any_ open DT or DD element.
777+
*/
778+
if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) {
779+
$node_name = $is_li ? 'LI' : $node->node_name;
780+
$this->generate_implied_end_tags( $node_name );
781+
if ( $node_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
782+
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
783+
}
784+
785+
$this->state->stack_of_open_elements->pop_until( $node_name );
786+
goto in_body_list_done;
787+
}
788+
789+
if (
790+
'ADDRESS' !== $node->node_name &&
791+
'DIV' !== $node->node_name &&
792+
'P' !== $node->node_name &&
793+
$this->is_special( $node->node_name )
794+
) {
795+
/*
796+
* > If node is in the special category, but is not an address, div,
797+
* > or p element, then jump to the step labeled done below.
798+
*/
799+
goto in_body_list_done;
800+
} else {
801+
/*
802+
* > Otherwise, set node to the previous entry in the stack of open elements
803+
* > and return to the step labeled loop.
804+
*/
805+
foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) {
806+
$node = $item;
807+
break;
808+
}
809+
goto in_body_list_loop;
810+
}
811+
812+
in_body_list_done:
813+
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
814+
$this->close_a_p_element();
815+
}
816+
817+
$this->insert_html_element( $this->state->current_token );
818+
return true;
819+
820+
/*
821+
* > An end tag whose tag name is "li"
822+
* > An end tag whose tag name is one of: "dd", "dt"
823+
*/
824+
case '-DD':
825+
case '-DT':
826+
case '-LI':
827+
if (
828+
/*
829+
* An end tag whose tag name is "li":
830+
* If the stack of open elements does not have an li element in list item scope,
831+
* then this is a parse error; ignore the token.
832+
*/
833+
(
834+
'LI' === $tag_name &&
835+
! $this->state->stack_of_open_elements->has_element_in_list_item_scope( 'LI' )
836+
) ||
837+
/*
838+
* An end tag whose tag name is one of: "dd", "dt":
839+
* If the stack of open elements does not have an element in scope that is an
840+
* HTML element with the same tag name as that of the token, then this is a
841+
* parse error; ignore the token.
842+
*/
843+
(
844+
'LI' !== $tag_name &&
845+
! $this->state->stack_of_open_elements->has_element_in_scope( $tag_name )
846+
)
847+
) {
848+
/*
849+
* This is a parse error, ignore the token.
850+
*
851+
* @todo Indicate a parse error once it's possible.
852+
*/
853+
return $this->step();
854+
}
855+
856+
$this->generate_implied_end_tags( $tag_name );
857+
858+
if ( $tag_name !== $this->state->stack_of_open_elements->current_node()->node_name ) {
859+
// @todo Indicate a parse error once it's possible. This error does not impact the logic here.
860+
}
861+
862+
$this->state->stack_of_open_elements->pop_until( $tag_name );
863+
return true;
864+
758865
/*
759866
* > An end tag whose tag name is "p"
760867
*/
@@ -1223,6 +1330,9 @@ private function close_a_p_element() {
12231330
*/
12241331
private function generate_implied_end_tags( $except_for_this_element = null ) {
12251332
$elements_with_implied_end_tags = array(
1333+
'DD',
1334+
'DT',
1335+
'LI',
12261336
'P',
12271337
);
12281338

@@ -1248,6 +1358,9 @@ private function generate_implied_end_tags( $except_for_this_element = null ) {
12481358
*/
12491359
private function generate_implied_end_tags_thoroughly() {
12501360
$elements_with_implied_end_tags = array(
1361+
'DD',
1362+
'DT',
1363+
'LI',
12511364
'P',
12521365
);
12531366

tests/phpunit/tests/html-api/wpHtmlProcessor.php

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,8 +168,6 @@ public function data_unsupported_special_in_body_tags() {
168168
'CAPTION' => array( 'CAPTION' ),
169169
'COL' => array( 'COL' ),
170170
'COLGROUP' => array( 'COLGROUP' ),
171-
'DD' => array( 'DD' ),
172-
'DT' => array( 'DT' ),
173171
'EMBED' => array( 'EMBED' ),
174172
'FORM' => array( 'FORM' ),
175173
'FRAME' => array( 'FRAME' ),
@@ -180,7 +178,6 @@ public function data_unsupported_special_in_body_tags() {
180178
'IFRAME' => array( 'IFRAME' ),
181179
'INPUT' => array( 'INPUT' ),
182180
'KEYGEN' => array( 'KEYGEN' ),
183-
'LI' => array( 'LI' ),
184181
'LINK' => array( 'LINK' ),
185182
'LISTING' => array( 'LISTING' ),
186183
'MARQUEE' => array( 'MARQUEE' ),
@@ -191,7 +188,6 @@ public function data_unsupported_special_in_body_tags() {
191188
'NOFRAMES' => array( 'NOFRAMES' ),
192189
'NOSCRIPT' => array( 'NOSCRIPT' ),
193190
'OBJECT' => array( 'OBJECT' ),
194-
'OL' => array( 'OL' ),
195191
'OPTGROUP' => array( 'OPTGROUP' ),
196192
'OPTION' => array( 'OPTION' ),
197193
'PARAM' => array( 'PARAM' ),
@@ -218,7 +214,6 @@ public function data_unsupported_special_in_body_tags() {
218214
'TITLE' => array( 'TITLE' ),
219215
'TR' => array( 'TR' ),
220216
'TRACK' => array( 'TRACK' ),
221-
'UL' => array( 'UL' ),
222217
'WBR' => array( 'WBR' ),
223218
'XMP' => array( 'XMP' ),
224219
);

0 commit comments

Comments
 (0)