Skip to content

Commit 91e51f9

Browse files
committed
HTML API: Add support for BR, EMBED, & other tags.
Adds support for the following HTML elements to the HTML Processor: - AREA, BR, EMBED, KEYGEN, WBR - Only the opening BR tag is supported, as the invalid closer `</br>` involves more complicated rules, to be implemented later. Previously, these elements were not supported and the HTML Processor would bail when encountering them. With this patch it will proceed to parse an HTML document when encountering those tags as long as other normal conditions don't cause it to bail (such as complicated format reconstruction rules). Props jonsurrell, dmsnell Fixes #60283 git-svn-id: https://develop.svn.wordpress.org/trunk@57316 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 5815624 commit 91e51f9

4 files changed

Lines changed: 127 additions & 19 deletions

File tree

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -102,17 +102,17 @@
102102
* - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
103103
* - Custom elements: All custom elements are supported. :)
104104
* - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH.
105-
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
105+
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U, WBR.
106106
* - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
107107
* - Links: A.
108108
* - Lists: DD, DL, DT, LI, OL, LI.
109-
* - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
110-
* - Paragraph: P.
111-
* - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
109+
* - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
110+
* - Paragraph: BR, P.
111+
* - Phrasing elements: AREA, ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
112112
* - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION.
113113
* - Templating elements: SLOT.
114114
* - Text decoration: RUBY.
115-
* - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER.
115+
* - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, MULTICOL, NEXTID, SPACER.
116116
*
117117
* ### Supported markup
118118
*
@@ -934,12 +934,28 @@ private function step_in_body() {
934934
$this->run_adoption_agency_algorithm();
935935
return true;
936936

937+
/*
938+
* > An end tag whose tag name is "br"
939+
* > Parse error. Drop the attributes from the token, and act as described in the next
940+
* > entry; i.e. act as if this was a "br" start tag token with no attributes, rather
941+
* > than the end tag token that it actually is.
942+
*/
943+
case '-BR':
944+
$this->last_error = self::ERROR_UNSUPPORTED;
945+
throw new WP_HTML_Unsupported_Exception( 'Closing BR tags require unimplemented special handling.' );
946+
937947
/*
938948
* > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
939949
*/
950+
case '+AREA':
951+
case '+BR':
952+
case '+EMBED':
940953
case '+IMG':
954+
case '+KEYGEN':
955+
case '+WBR':
941956
$this->reconstruct_active_formatting_elements();
942957
$this->insert_html_element( $this->state->current_token );
958+
$this->state->frameset_ok = false;
943959
return true;
944960

945961
/*
@@ -977,21 +993,18 @@ private function step_in_body() {
977993
case 'BASEFONT':
978994
case 'BGSOUND':
979995
case 'BODY':
980-
case 'BR':
981996
case 'CAPTION':
982997
case 'COL':
983998
case 'COLGROUP':
984999
case 'DD':
9851000
case 'DT':
986-
case 'EMBED':
9871001
case 'FORM':
9881002
case 'FRAME':
9891003
case 'FRAMESET':
9901004
case 'HEAD':
9911005
case 'HTML':
9921006
case 'IFRAME':
9931007
case 'INPUT':
994-
case 'KEYGEN':
9951008
case 'LI':
9961009
case 'LINK':
9971010
case 'LISTING':
@@ -1031,7 +1044,6 @@ private function step_in_body() {
10311044
case 'TR':
10321045
case 'TRACK':
10331046
case 'UL':
1034-
case 'WBR':
10351047
case 'XMP':
10361048
$this->last_error = self::ERROR_UNSUPPORTED;
10371049
throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." );
@@ -1692,6 +1704,7 @@ public static function is_void( $tag_name ) {
16921704
'IMG' === $tag_name ||
16931705
'INPUT' === $tag_name ||
16941706
'LINK' === $tag_name ||
1707+
'KEYGEN' === $tag_name || // Obsolete but still treated as void.
16951708
'META' === $tag_name ||
16961709
'SOURCE' === $tag_name ||
16971710
'TRACK' === $tag_name ||

tests/phpunit/tests/html-api/wpHtmlProcessor.php

Lines changed: 80 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,86 @@ public function test_fails_to_reconstruct_formatting_elements() {
132132
$this->assertFalse( $p->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
133133
}
134134

135+
/**
136+
* Ensure non-nesting tags do not nest.
137+
*
138+
* @ticket 60283
139+
*
140+
* @covers WP_HTML_Processor::step_in_body
141+
* @covers WP_HTML_Processor::is_void
142+
*
143+
* @dataProvider data_void_tags
144+
*
145+
* @param string $tag_name Name of void tag under test.
146+
*/
147+
public function test_cannot_nest_void_tags( $tag_name ) {
148+
$processor = WP_HTML_Processor::create_fragment( "<{$tag_name}><div>" );
149+
150+
/*
151+
* This HTML represents the same as the following HTML,
152+
* assuming that it were provided `<img>` as the tag:
153+
*
154+
* <html>
155+
* <body>
156+
* <img>
157+
* <div></div>
158+
* </body>
159+
* </html>
160+
*/
161+
162+
$found_tag = $processor->next_tag();
163+
164+
if ( WP_HTML_Processor::ERROR_UNSUPPORTED === $processor->get_last_error() ) {
165+
$this->markTestSkipped( "Tag {$tag_name} is not supported." );
166+
}
167+
168+
$this->assertTrue(
169+
$found_tag,
170+
"Could not find first {$tag_name}."
171+
);
172+
173+
$this->assertSame(
174+
array( 'HTML', 'BODY', $tag_name ),
175+
$processor->get_breadcrumbs(),
176+
'Found incorrect nesting of first element.'
177+
);
178+
179+
$this->assertTrue(
180+
$processor->next_tag(),
181+
'Should have found the DIV as the second tag.'
182+
);
183+
184+
$this->assertSame(
185+
array( 'HTML', 'BODY', 'DIV' ),
186+
$processor->get_breadcrumbs(),
187+
"DIV should have been a sibling of the {$tag_name}."
188+
);
189+
}
190+
191+
/**
192+
* Data provider.
193+
*
194+
* @return array[]
195+
*/
196+
public function data_void_tags() {
197+
return array(
198+
'AREA' => array( 'AREA' ),
199+
'BASE' => array( 'BASE' ),
200+
'BR' => array( 'BR' ),
201+
'COL' => array( 'COL' ),
202+
'EMBED' => array( 'EMBED' ),
203+
'HR' => array( 'HR' ),
204+
'IMG' => array( 'IMG' ),
205+
'INPUT' => array( 'INPUT' ),
206+
'KEYGEN' => array( 'KEYGEN' ),
207+
'LINK' => array( 'LINK' ),
208+
'META' => array( 'META' ),
209+
'SOURCE' => array( 'SOURCE' ),
210+
'TRACK' => array( 'TRACK' ),
211+
'WBR' => array( 'WBR' ),
212+
);
213+
}
214+
135215
/**
136216
* Ensures that special handling of unsupported tags is cleaned up
137217
* as handling is implemented. Otherwise there's risk of leaving special
@@ -159,24 +239,20 @@ public function test_step_in_body_fails_on_unsupported_tags( $tag_name ) {
159239
public function data_unsupported_special_in_body_tags() {
160240
return array(
161241
'APPLET' => array( 'APPLET' ),
162-
'AREA' => array( 'AREA' ),
163242
'BASE' => array( 'BASE' ),
164243
'BASEFONT' => array( 'BASEFONT' ),
165244
'BGSOUND' => array( 'BGSOUND' ),
166245
'BODY' => array( 'BODY' ),
167-
'BR' => array( 'BR' ),
168246
'CAPTION' => array( 'CAPTION' ),
169247
'COL' => array( 'COL' ),
170248
'COLGROUP' => array( 'COLGROUP' ),
171-
'EMBED' => array( 'EMBED' ),
172249
'FORM' => array( 'FORM' ),
173250
'FRAME' => array( 'FRAME' ),
174251
'FRAMESET' => array( 'FRAMESET' ),
175252
'HEAD' => array( 'HEAD' ),
176253
'HTML' => array( 'HTML' ),
177254
'IFRAME' => array( 'IFRAME' ),
178255
'INPUT' => array( 'INPUT' ),
179-
'KEYGEN' => array( 'KEYGEN' ),
180256
'LINK' => array( 'LINK' ),
181257
'LISTING' => array( 'LISTING' ),
182258
'MARQUEE' => array( 'MARQUEE' ),
@@ -213,7 +289,6 @@ public function data_unsupported_special_in_body_tags() {
213289
'TITLE' => array( 'TITLE' ),
214290
'TR' => array( 'TR' ),
215291
'TRACK' => array( 'TRACK' ),
216-
'WBR' => array( 'WBR' ),
217292
'XMP' => array( 'XMP' ),
218293
);
219294
}

tests/phpunit/tests/html-api/wpHtmlProcessorBreadcrumbs.php

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -162,23 +162,19 @@ public function test_fails_when_encountering_unsupported_tag( $html ) {
162162
public function data_unsupported_elements() {
163163
$unsupported_elements = array(
164164
'APPLET', // Deprecated.
165-
'AREA',
166165
'BASE',
167166
'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal.
168167
'BODY',
169-
'BR',
170168
'CAPTION',
171169
'COL',
172170
'COLGROUP',
173-
'EMBED',
174171
'FORM',
175172
'FRAME',
176173
'FRAMESET',
177174
'HEAD',
178175
'HTML',
179176
'IFRAME',
180177
'INPUT',
181-
'KEYGEN', // Deprecated; void.
182178
'LINK',
183179
'LISTING', // Deprecated, use PRE instead.
184180
'MARQUEE', // Deprecated.
@@ -213,7 +209,6 @@ public function data_unsupported_elements() {
213209
'TITLE',
214210
'TR',
215211
'TRACK',
216-
'WBR',
217212
'XMP', // Deprecated, use PRE instead.
218213
);
219214

tests/phpunit/tests/html-api/wpHtmlProcessorSemanticRules.php

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,4 +392,29 @@ public function test_in_body_any_other_end_tag_with_unclosed_non_special_element
392392
$this->assertSame( 'DIV', $p->get_tag(), "Expected to find DIV element, but found {$p->get_tag()} instead." );
393393
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'DIV' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting: SPAN should be closed and DIV should be its sibling.' );
394394
}
395+
396+
/**
397+
* Ensures that support isn't accidentally partially added for the closing BR tag `</br>`.
398+
*
399+
* This tag closer has special rules and support shouldn't be added without implementing full support.
400+
*
401+
* > An end tag whose tag name is "br"
402+
* > Parse error. Drop the attributes from the token, and act as described in the next entry;
403+
* > i.e. act as if this was a "br" start tag token with no attributes, rather than the end
404+
* > tag token that it actually is.
405+
*
406+
* When this handling is implemented, this test should be removed. It's not incorporated
407+
* into the existing unsupported tag behavior test because the opening tag is supported;
408+
* only the closing tag isn't.
409+
*
410+
* @covers WP_HTML_Processor::step_in_body
411+
*
412+
* @ticket 60283
413+
*/
414+
public function test_br_end_tag_unsupported() {
415+
$p = WP_HTML_Processor::create_fragment( '</br>' );
416+
417+
$this->assertFalse( $p->next_tag(), 'Found a BR tag that should not be handled.' );
418+
$this->assertSame( WP_HTML_Processor::ERROR_UNSUPPORTED, $p->get_last_error() );
419+
}
395420
}

0 commit comments

Comments
 (0)