Skip to content

Commit 99eba5a

Browse files
committed
Remove support for Processing Instructions
Attempting to parse processing instructions conflicts with parsing bogus comments when a document may be incomplete, which might create a divergence in the HTML API from browser behavior.
1 parent 33289f3 commit 99eba5a

2 files changed

Lines changed: 7 additions & 81 deletions

File tree

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 1 addition & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,6 @@
321321
*
322322
* And there are non-elements which are atomic in nature but have no modifiable text.
323323
* - `DOCTYPE` nodes like `<DOCTYPE html>` which have no closing tag.
324-
* - XML Processing instruction nodes like `<?xml charset="utf8"?>`.
325324
* - The empty end tag `</>` which is ignored in the browser and DOM but exposed
326325
* to the HTML API.
327326
*
@@ -483,7 +482,6 @@ class WP_HTML_Tag_Processor {
483482
* | *Matched tag* | Found an HTML tag; it's possible to modify its attributes. |
484483
* | *Text node* | Found a #text node; this is plaintext and modifiable. |
485484
* | *CDATA node* | Found a CDATA section; this is modifiable. |
486-
* | *PI node* | Found a Processing Instruction; this is modifiable. |
487485
* | *Comment* | Found a comment or bogus comment; this is modifiable. |
488486
* | *Presumptuous* | Found an empty tag closer: `</>`. |
489487
* | *Funky comment* | Found a tag closer with an invalid tag name; this is modifiable. |
@@ -496,7 +494,6 @@ class WP_HTML_Tag_Processor {
496494
* @see WP_HTML_Tag_Processor::STATE_MATCHED_TAG
497495
* @see WP_HTML_Tag_Processor::STATE_TEXT_NODE
498496
* @see WP_HTML_Tag_Processor::STATE_CDATA_NODE
499-
* @see WP_HTML_Tag_Processor::STATE_PI_NODE
500497
* @see WP_HTML_Tag_Processor::STATE_COMMENT
501498
* @see WP_HTML_Tag_Processor::STATE_DOCTYPE
502499
* @see WP_HTML_Tag_Processor::STATE_PRESUMPTUOUS_TAG
@@ -806,7 +803,6 @@ public function next_tag( $query = null ) {
806803
* - a text node - the plaintext inside tags.
807804
* - an HTML comment.
808805
* - a DOCTYPE declaration.
809-
* - a processing instruction, e.g. `<?xml version="1.0" ?>`.
810806
*
811807
* The Tag Processor currently only supports the tag token.
812808
*
@@ -1723,9 +1719,6 @@ private function parse_next_tag() {
17231719
/*
17241720
* <? transitions to a bogus comment state – skip to the nearest >
17251721
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
1726-
*
1727-
* Although this becomes a DOM comment, the Tag Processor is going to treat
1728-
* it as a processing instruction to be able to treat it as the raw syntax.
17291722
*/
17301723
if ( '?' === $html[ $at + 1 ] ) {
17311724
$closer_at = strpos( $html, '>', $at + 2 );
@@ -1735,7 +1728,7 @@ private function parse_next_tag() {
17351728
return false;
17361729
}
17371730

1738-
$this->parser_state = self::STATE_PI_NODE;
1731+
$this->parser_state = self::STATE_COMMENT;
17391732
$this->token_length = $closer_at + 1 - $this->token_starts_at;
17401733
$this->text_starts_at = $this->token_starts_at + 2;
17411734
$this->text_length = $closer_at - $this->text_starts_at;
@@ -2532,7 +2525,6 @@ public function is_tag_closer() {
25322525
* - `#tag` when matched on a tag.
25332526
* - `#text` when matched on a text node.
25342527
* - `#cdata-section` when matched on a CDATA node.
2535-
* - `#processing-instruction` when matched on a processing instruction.
25362528
* - `#comment` when matched on a comment.
25372529
* - `#presumptuous-tag` when matched on an empty tag closer.
25382530
* - `#funky-comment` when matched on a funky comment.
@@ -2549,9 +2541,6 @@ public function get_token_type() {
25492541
case self::STATE_DOCTYPE:
25502542
return '#doctype';
25512543

2552-
case self::STATE_PI_NODE:
2553-
return '#processing-instruction';
2554-
25552544
default:
25562545
return $this->get_token_name();
25572546
}
@@ -2566,7 +2555,6 @@ public function get_token_type() {
25662555
*
25672556
* Dynamic names:
25682557
* - Uppercase tag name for tag matches.
2569-
* - Tag name for processing instructions.
25702558
* - `html` for DOCTYPE declarations.
25712559
*
25722560
* Note that if the Tag Processor is not matched on a token
@@ -2589,10 +2577,6 @@ public function get_token_name() {
25892577
case self::STATE_CDATA_NODE:
25902578
return '#cdata-section';
25912579

2592-
case self::STATE_PI_NODE:
2593-
// @todo add the PI tag.
2594-
return '?';
2595-
25962580
case self::STATE_COMMENT:
25972581
return '#comment';
25982582

@@ -3197,22 +3181,6 @@ private function matches() {
31973181
*/
31983182
const STATE_CDATA_NODE = 'STATE_CDATA_NODE';
31993183

3200-
/**
3201-
* Parser Processing Instruction State.
3202-
*
3203-
* Indicates that the parser has found a Processing Instruction and
3204-
* it's possible to read and modify its modifiable text. Note that in
3205-
* HTML there are no Processing Instruction nodes and they are treated
3206-
* as HTML comments. Nonetheless, the Tag Processor still recognizes
3207-
* them as they appear in the HTML stream and exposes them for
3208-
* inspection and modification.
3209-
*
3210-
* @since 6.5.0
3211-
*
3212-
* @access private
3213-
*/
3214-
const STATE_PI_NODE = 'STATE_PI_NODE';
3215-
32163184
/**
32173185
* Indicates that the parser has found an HTML comment and it's
32183186
* possible to read and modify its modifiable text.

tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php

Lines changed: 6 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ public function test_basic_assertion_element() {
8787
);
8888

8989
$attributes = $processor->get_attribute_names_with_prefix( '' );
90-
$attribute_list = array_map( array( 'self', 'quoted' ), $attributes );
90+
$attribute_list = array_map( 'Tests_HtmlApi_WpHtmlProcessor_Token_Scanning::quoted', $attributes );
9191
$this->assertSame(
9292
array( 'id', 'inert' ),
9393
$attributes,
@@ -127,7 +127,7 @@ public function test_basic_assertion_script_element() {
127127
);
128128

129129
$attributes = $processor->get_attribute_names_with_prefix( '' );
130-
$attribute_list = array_map( array( 'self', 'quoted' ), $attributes );
130+
$attribute_list = array_map( 'Tests_HtmlApi_WpHtmlProcessor_Token_Scanning::quoted', $attributes );
131131
$this->assertSame(
132132
array( 'type' ),
133133
$attributes,
@@ -178,7 +178,7 @@ public function test_basic_assertion_textarea_element() {
178178
);
179179

180180
$attributes = $processor->get_attribute_names_with_prefix( '' );
181-
$attribute_list = array_map( array( 'self', 'quoted' ), $attributes );
181+
$attribute_list = array_map( 'Tests_HtmlApi_WpHtmlProcessor_Token_Scanning::quoted', $attributes );
182182
$this->assertSame(
183183
array( 'rows', 'cols' ),
184184
$attributes,
@@ -224,7 +224,7 @@ public function test_basic_assertion_title_element() {
224224
);
225225

226226
$attributes = $processor->get_attribute_names_with_prefix( '' );
227-
$attribute_list = array_map( array( 'self', 'quoted' ), $attributes );
227+
$attribute_list = array_map( 'Tests_HtmlApi_WpHtmlProcessor_Token_Scanning::quoted', $attributes );
228228
$this->assertSame(
229229
array( 'class' ),
230230
$attributes,
@@ -273,7 +273,7 @@ public function test_basic_assertion_rawtext_elements( $tag_name ) {
273273
);
274274

275275
$attributes = $processor->get_attribute_names_with_prefix( '' );
276-
$attribute_list = array_map( array( 'self', 'quoted' ), $attributes );
276+
$attribute_list = array_map( 'Tests_HtmlApi_WpHtmlProcessor_Token_Scanning::quoted', $attributes );
277277
$this->assertSame(
278278
array( 'class' ),
279279
$attributes,
@@ -338,48 +338,6 @@ public function test_basic_assertion_cdata_section() {
338338
);
339339
}
340340

341-
/**
342-
* Ensures that normative Processing Instruction nodes are properly parsed.
343-
*
344-
* @ticket 60170
345-
*
346-
* @since 6.5.0
347-
*
348-
* @covers WP_HTML_Tag_Processor::next_token
349-
*/
350-
public function test_basic_assertion_processing_instruction() {
351-
$processor = WP_HTML_Processor::create_fragment( '<?xml charset="utf-8">' );
352-
$processor->next_token();
353-
354-
$this->assertSame(
355-
'#processing-instruction',
356-
$processor->get_token_type(),
357-
"Should have found PI node but found {$processor->get_token_type()} instead."
358-
);
359-
360-
$this->assertSame(
361-
'xml',
362-
$processor->get_token_name(),
363-
"Should have found PI tag as name but found {$processor->get_token_name()} instead."
364-
);
365-
366-
$this->assertNull(
367-
$processor->get_tag(),
368-
'Should not have been able to query tag name on non-element token.'
369-
);
370-
371-
$this->assertNull(
372-
$processor->get_attribute( 'type' ),
373-
'Should not have been able to query attributes on non-element token.'
374-
);
375-
376-
$this->assertSame(
377-
' charset="utf-8"',
378-
$processor->get_modifiable_text(),
379-
'Found incorrect modifiable text.'
380-
);
381-
}
382-
383341
/**
384342
* Ensures that common comments are properly parsed.
385343
*
@@ -436,7 +394,7 @@ public function data_common_comments() {
436394
return array(
437395
'Shortest comment' => array( '<!-->', '' ),
438396
'Short comment' => array( '<!--->', '' ),
439-
'Invalid PI node' => array( '<? missing>', ' missing' ),
397+
'Invalid PI node' => array( '<?/missing/>', '/missing/' ),
440398
'Invalid ! directive' => array( '<!something else>', 'something else' ),
441399
);
442400
}

0 commit comments

Comments
 (0)