Skip to content

Commit 32dd59b

Browse files
committed
HTML API: Add support for H1-H6 elements in the HTML Processor.
Previously these have been unsupported, but in this patch, support is added for the tags so that the HTML Processor can process documents containing them. There was a design discussion about introducing a constant to communicate "any of the H1 - H6 elements" but this posed a number of challenges that don't need to be answered in this patch. For the time being, because the HTML specification treats H1 - H6 specially as a single kind of element, the HTML Processor uses an internal hard-coded string to indicate this. By using a hard-coded string it's possible to avoid introducing a class constant which cannot be made private due to PHP's class design. In the future, this will probably appear as a special constant in a new constant-containing class. Props dmsnell, jonsurrell. Fixes #60060. git-svn-id: https://develop.svn.wordpress.org/trunk@57186 602fd350-edb4-49c9-b593-d223f7449a82
1 parent edb416c commit 32dd59b

6 files changed

Lines changed: 228 additions & 40 deletions

File tree

src/wp-includes/html-api/class-wp-html-open-elements.php

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ public function has_element_in_specific_scope( $tag_name, $termination_list ) {
116116
return true;
117117
}
118118

119+
if (
120+
'(internal: H1 through H6 - do not use)' === $tag_name &&
121+
in_array( $node->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true )
122+
) {
123+
return true;
124+
}
125+
119126
switch ( $node->node_name ) {
120127
case 'HTML':
121128
return false;
@@ -270,6 +277,13 @@ public function pop_until( $tag_name ) {
270277
foreach ( $this->walk_up() as $item ) {
271278
$this->pop();
272279

280+
if (
281+
'(internal: H1 through H6 - do not use)' === $tag_name &&
282+
in_array( $item->node_name, array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), true )
283+
) {
284+
return true;
285+
}
286+
273287
if ( $tag_name === $item->node_name ) {
274288
return true;
275289
}

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@
102102
* - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
103103
* - Form elements: BUTTON, FIELDSET, SEARCH.
104104
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
105-
* - Heading elements: HGROUP.
105+
* - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
106106
* - Links: A.
107107
* - Lists: DL.
108108
* - Media elements: FIGCAPTION, FIGURE, IMG.
@@ -697,6 +697,60 @@ private function step_in_body() {
697697
$this->state->stack_of_open_elements->pop_until( $tag_name );
698698
return true;
699699

700+
/*
701+
* > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
702+
*/
703+
case '+H1':
704+
case '+H2':
705+
case '+H3':
706+
case '+H4':
707+
case '+H5':
708+
case '+H6':
709+
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
710+
$this->close_a_p_element();
711+
}
712+
713+
if (
714+
in_array(
715+
$this->state->stack_of_open_elements->current_node()->node_name,
716+
array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ),
717+
true
718+
)
719+
) {
720+
// @TODO: Indicate a parse error once it's possible.
721+
$this->state->stack_of_open_elements->pop();
722+
}
723+
724+
$this->insert_html_element( $this->state->current_token );
725+
return true;
726+
727+
/*
728+
* > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
729+
*/
730+
case '-H1':
731+
case '-H2':
732+
case '-H3':
733+
case '-H4':
734+
case '-H5':
735+
case '-H6':
736+
if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) {
737+
/*
738+
* This is a parse error; ignore the token.
739+
*
740+
* @TODO: Indicate a parse error once it's possible.
741+
*/
742+
return $this->step();
743+
}
744+
745+
$this->generate_implied_end_tags();
746+
747+
if ( $this->state->stack_of_open_elements->current_node()->node_name !== $tag_name ) {
748+
// @TODO: Record parse error: this error doesn't impact parsing.
749+
}
750+
751+
$this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
752+
return true;
753+
700754
/*
701755
* > An end tag whose tag name is "p"
702756
*/

tests/phpunit/tests/html-api/wpHtmlProcessor.php

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,6 @@ public function test_stops_processing_after_unsupported_elements() {
9191
*
9292
* @covers WP_HTML_Processor::next_tag
9393
* @covers WP_HTML_Processor::seek
94-
*
95-
* @throws WP_HTML_Unsupported_Exception
9694
*/
9795
public function test_clear_to_navigate_after_seeking() {
9896
$p = WP_HTML_Processor::create_fragment( '<div one><strong></strong></div><p><strong two></strong></p>' );

tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ public function data_single_tag_of_supported_elements() {
5656
'FIGURE',
5757
'FONT',
5858
'FOOTER',
59+
'H1',
60+
'H2',
61+
'H3',
62+
'H4',
63+
'H5',
64+
'H6',
5965
'HEADER',
6066
'HGROUP',
6167
'I',
@@ -142,12 +148,6 @@ public function data_unsupported_elements() {
142148
'FORM',
143149
'FRAME',
144150
'FRAMESET',
145-
'H1',
146-
'H2',
147-
'H3',
148-
'H4',
149-
'H5',
150-
'H6',
151151
'HEAD',
152152
'HR',
153153
'HTML',
@@ -352,6 +352,14 @@ public function data_html_target_with_breadcrumbs() {
352352
),
353353
'MAIN inside MAIN inside SPAN' => array( '<span><main><main target>', array( 'HTML', 'BODY', 'SPAN', 'MAIN', 'MAIN' ), 1 ),
354354
'MAIN next to unclosed P' => array( '<p><main target>', array( 'HTML', 'BODY', 'MAIN' ), 1 ),
355+
356+
// H1 - H6 close out _any_ H1 - H6 when encountering _any_ of H1 - H6, making this section surprising.
357+
'EM inside H3 after unclosed P' => array( '<p><h3><em target>Important Message</em></h3>', array( 'HTML', 'BODY', 'H3', 'EM' ), 1 ),
358+
'H4 after H2' => array( '<h2>Major</h2><h4 target>Minor</h4>', array( 'HTML', 'BODY', 'H4' ), 1 ),
359+
'H4 after unclosed H2' => array( '<h2>Major<h4 target>Minor</h3>', array( 'HTML', 'BODY', 'H4' ), 1 ),
360+
'H4 inside H2' => array( '<h2><span>Major<h4 target>Minor</h3></span>', array( 'HTML', 'BODY', 'H2', 'SPAN', 'H4' ), 1 ),
361+
'H5 after unclosed H4 inside H2' => array( '<h2><span>Major<h4>Minor</span></h3><h5 target>', array( 'HTML', 'BODY', 'H2', 'SPAN', 'H5' ), 1 ),
362+
'H5 after H4 inside H2' => array( '<h2><span>Major<h4>Minor</h4></span></h3><h5 target>', array( 'HTML', 'BODY', 'H5' ), 1 ),
355363
);
356364
}
357365

@@ -387,29 +395,29 @@ public function test_reports_if_tag_matches_breadcrumbs_of_various_specificity(
387395
public function data_html_with_breadcrumbs_of_various_specificity() {
388396
return array(
389397
// Test with void elements.
390-
'Inner IMG' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'figure', 'img' ), true ),
391-
'Inner IMG wildcard' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', '*', 'img' ), true ),
392-
'Inner IMG no wildcard' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'img' ), false ),
393-
'Full specification' => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'img' ), true ),
394-
'Invalid Full specification' => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'img' ), false ),
398+
'Inner IMG' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'figure', 'img' ), true ),
399+
'Inner IMG wildcard' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', '*', 'img' ), true ),
400+
'Inner IMG no wildcard' => array( '<div><span><figure><img target></figure></span></div>', array( 'span', 'img' ), false ),
401+
'Full specification' => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'img' ), true ),
402+
'Invalid Full specification' => array( '<div><span><figure><img target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'img' ), false ),
395403

396404
// Test also with non-void elements that open and close.
397-
'Inner P' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'figure', 'p' ), true ),
398-
'Inner P wildcard' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', '*', 'p' ), true ),
399-
'Inner P no wildcard' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'p' ), false ),
400-
'Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), true ),
401-
'Invalid Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),
405+
'Inner P' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'figure', 'p' ), true ),
406+
'Inner P wildcard' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', '*', 'p' ), true ),
407+
'Inner P no wildcard' => array( '<div><span><figure><p target></figure></span></div>', array( 'span', 'p' ), false ),
408+
'Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), true ),
409+
'Invalid Full specification (P)' => array( '<div><span><figure><p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),
402410

403411
// Ensure that matches aren't on tag closers.
404-
'Inner P' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'figure', 'p' ), false ),
405-
'Inner P wildcard' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', '*', 'p' ), false ),
406-
'Inner P no wildcard' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'p' ), false ),
407-
'Full specification (P)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), false ),
408-
'Invalid Full specification (P)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),
412+
'Inner P (Closer)' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'figure', 'p' ), false ),
413+
'Inner P wildcard (Closer)' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', '*', 'p' ), false ),
414+
'Inner P no wildcard (Closer)' => array( '<div><span><figure></p target></figure></span></div>', array( 'span', 'p' ), false ),
415+
'Full specification (P) (Closer)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'body', 'div', 'span', 'figure', 'p' ), false ),
416+
'Invalid Full specification (P) (Closer)' => array( '<div><span><figure></p target></figure></span></div>', array( 'html', 'div', 'span', 'figure', 'p' ), false ),
409417

410418
// Test wildcard behaviors.
411-
'Single wildcard element' => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( '*' ), true ),
412-
'Child of wildcard element' => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( 'SPAN', '*' ), true ),
419+
'Single wildcard element' => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( '*' ), true ),
420+
'Child of wildcard element' => array( '<figure><code><div><p><span><img target></span></p></div></code></figure>', array( 'SPAN', '*' ), true ),
413421
);
414422
}
415423

tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,6 @@ public function data_article_container_group() {
120120
* element in scope, that it skips the tag entirely.
121121
*
122122
* @ticket 58961
123-
*
124-
* @since 6.4.0
125-
*
126-
* @throws Exception
127123
*/
128124
public function test_in_body_skips_unexpected_button_closer() {
129125
$p = WP_HTML_Processor::create_fragment( '<div>Test</button></div>' );
@@ -145,10 +141,6 @@ public function test_in_body_skips_unexpected_button_closer() {
145141
* Verifies insertion of a BUTTON element when no existing BUTTON is already in scope.
146142
*
147143
* @ticket 58961
148-
*
149-
* @since 6.4.0
150-
*
151-
* @throws WP_HTML_Unsupported_Exception
152144
*/
153145
public function test_in_body_button_with_no_button_in_scope() {
154146
$p = WP_HTML_Processor::create_fragment( '<div><p>Click the button <button one>here</button>!</p></div><button two>not here</button>' );
@@ -174,8 +166,6 @@ public function test_in_body_button_with_no_button_in_scope() {
174166
* @ticket 58961
175167
*
176168
* @since 6.4.0
177-
*
178-
* @throws WP_HTML_Unsupported_Exception
179169
*/
180170
public function test_in_body_button_with_button_in_scope_as_parent() {
181171
$p = WP_HTML_Processor::create_fragment( '<div><p>Click the button <button one>almost<button two>here</button>!</p></div><button three>not here</button>' );
@@ -209,8 +199,6 @@ public function test_in_body_button_with_button_in_scope_as_parent() {
209199
* @ticket 58961
210200
*
211201
* @since 6.4.0
212-
*
213-
* @throws WP_HTML_Unsupported_Exception
214202
*/
215203
public function test_in_body_button_with_button_in_scope_as_ancestor() {
216204
$p = WP_HTML_Processor::create_fragment( '<div><button one><p>Click the button <span><button two>here</button>!</span></p></div><button three>not here</button>' );
@@ -236,7 +224,7 @@ public function test_in_body_button_with_button_in_scope_as_ancestor() {
236224
$this->assertSame( array( 'HTML', 'BODY', 'BUTTON' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting for third button.' );
237225
}
238226

239-
/*
227+
/**
240228
* Verifies that when "in body" and encountering "any other end tag"
241229
* that the HTML processor ignores the end tag if there's a special
242230
* element on the stack of open elements before the matching opening.
@@ -259,7 +247,7 @@ public function test_in_body_any_other_end_tag_with_unclosed_special_element() {
259247
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN', 'DIV' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting: SPAN should still be open and DIV should be its child.' );
260248
}
261249

262-
/*
250+
/**
263251
* Verifies that when "in body" and encountering "any other end tag"
264252
* that the HTML processor closes appropriate elements on the stack of
265253
* open elements up to the matching opening.
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
<?php
2+
/**
3+
* Unit tests covering WP_HTML_Processor compliance with HTML5 semantic parsing rules
4+
* for the H1 - H6 heading elements.
5+
*
6+
* @package WordPress
7+
* @subpackage HTML-API
8+
*
9+
* @since 6.5.0
10+
*
11+
* @group html-api
12+
*
13+
* @coversDefaultClass WP_HTML_Processor
14+
*/
15+
class Tests_HtmlApi_WpHtmlProcessorSemanticRulesHeadingElements extends WP_UnitTestCase {
16+
/*******************************************************************
17+
* RULES FOR "IN BODY" MODE
18+
*******************************************************************/
19+
20+
/**
21+
* Verifies that H1 through H6 elements generate implied end tags.
22+
*
23+
* @ticket 60060
24+
*
25+
* @covers WP_HTML_Processor::step
26+
*
27+
* @dataProvider data_heading_elements
28+
*
29+
* @param string $tag_name Name of H1 - H6 element under test.
30+
*/
31+
public function test_in_body_heading_element_closes_open_p_tag( $tag_name ) {
32+
$processor = WP_HTML_Processor::create_fragment(
33+
"<p>Open<{$tag_name}>Closed P</{$tag_name}><img></p>"
34+
);
35+
36+
$processor->next_tag( $tag_name );
37+
$this->assertSame(
38+
array( 'HTML', 'BODY', $tag_name ),
39+
$processor->get_breadcrumbs(),
40+
"Expected {$tag_name} to be a direct child of the BODY, having closed the open P element."
41+
);
42+
43+
$processor->next_tag( 'IMG' );
44+
$this->assertSame(
45+
array( 'HTML', 'BODY', 'IMG' ),
46+
$processor->get_breadcrumbs(),
47+
'Expected IMG to be a direct child of BODY, having closed the open P element.'
48+
);
49+
}
50+
51+
/**
52+
* Data provider.
53+
*
54+
* @return array[].
55+
*/
56+
public function data_heading_elements() {
57+
return array(
58+
'H1' => array( 'H1' ),
59+
'H2' => array( 'H2' ),
60+
'H3' => array( 'H3' ),
61+
'H4' => array( 'H4' ),
62+
'H5' => array( 'H5' ),
63+
'H6' => array( 'H5' ),
64+
);
65+
}
66+
67+
/**
68+
* Verifies that H1 through H6 elements close an open H1 through H6 element.
69+
*
70+
* @ticket 60060
71+
*
72+
* @covers WP_HTML_Processor::step
73+
*
74+
* @dataProvider data_heading_combinations
75+
*
76+
* @param string $first_heading H1 - H6 element appearing (unclosed) before the second.
77+
* @param string $second_heading H1 - H6 element appearing after the first.
78+
*/
79+
public function test_in_body_heading_element_closes_other_heading_elements( $first_heading, $second_heading ) {
80+
$processor = WP_HTML_Processor::create_fragment(
81+
"<div><{$first_heading} first> then <{$second_heading} second> and end </{$second_heading}><img></{$first_heading}></div>"
82+
);
83+
84+
while ( $processor->next_tag() && null === $processor->get_attribute( 'second' ) ) {
85+
continue;
86+
}
87+
88+
$this->assertTrue(
89+
$processor->get_attribute( 'second' ),
90+
"Failed to find expected {$second_heading} tag."
91+
);
92+
93+
$this->assertSame(
94+
array( 'HTML', 'BODY', 'DIV', $second_heading ),
95+
$processor->get_breadcrumbs(),
96+
"Expected {$second_heading} to be a direct child of the DIV, having closed the open {$first_heading} element."
97+
);
98+
99+
$processor->next_tag( 'IMG' );
100+
$this->assertSame(
101+
array( 'HTML', 'BODY', 'DIV', 'IMG' ),
102+
$processor->get_breadcrumbs(),
103+
"Expected IMG to be a direct child of DIV, having closed the open {$first_heading} element."
104+
);
105+
}
106+
107+
/**
108+
* Data provider.
109+
*
110+
* @return array[]
111+
*/
112+
public function data_heading_combinations() {
113+
$headings = array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' );
114+
115+
$combinations = array();
116+
117+
// Create all unique pairs of H1 - H6 elements.
118+
foreach ( $headings as $first_tag ) {
119+
foreach ( $headings as $second_tag ) {
120+
$combinations[ "{$first_tag} then {$second_tag}" ] = array( $first_tag, $second_tag );
121+
}
122+
}
123+
124+
return $combinations;
125+
}
126+
}

0 commit comments

Comments
 (0)