Skip to content

Commit 204907b

Browse files
committed
Remove support for CDATA sections.
1 parent 99eba5a commit 204907b

3 files changed

Lines changed: 7 additions & 97 deletions

File tree

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 5 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,7 @@
313313
* - Comment nodes and nodes that became comments because of some syntax error. The
314314
* text for these nodes is the portion of the comment inside of the syntax. E.g. for
315315
* `<!-- comment -->` the text is `" comment "` (note that the spaces are part of it).
316-
* - `CDATA` sections, whose text is the content inside of the section itself. E.g. for
317-
* `<![CDATA[some content]]>` the text is `"some content"`.
316+
* For `<![CDATA[some content]]>` the text is `"[CDATA[some content]]"`.
318317
* - "Funky comments," which are a special case of invalid closing tags whose name is
319318
* invalid. The text for these nodes is the text that a browser would transform into
320319
* an HTML when parsing. E.g. for `</%post_author>` the text is `%post_author`.
@@ -481,7 +480,6 @@ class WP_HTML_Tag_Processor {
481480
* | *Incomplete* | The HTML ended in the middle of a token; nothing more can be parsed. |
482481
* | *Matched tag* | Found an HTML tag; it's possible to modify its attributes. |
483482
* | *Text node* | Found a #text node; this is plaintext and modifiable. |
484-
* | *CDATA node* | Found a CDATA section; this is modifiable. |
485483
* | *Comment* | Found a comment or bogus comment; this is modifiable. |
486484
* | *Presumptuous* | Found an empty tag closer: `</>`. |
487485
* | *Funky comment* | Found a tag closer with an invalid tag name; this is modifiable. |
@@ -493,7 +491,6 @@ class WP_HTML_Tag_Processor {
493491
* @see WP_HTML_Tag_Processor::STATE_INCOMPLETE
494492
* @see WP_HTML_Tag_Processor::STATE_MATCHED_TAG
495493
* @see WP_HTML_Tag_Processor::STATE_TEXT_NODE
496-
* @see WP_HTML_Tag_Processor::STATE_CDATA_NODE
497494
* @see WP_HTML_Tag_Processor::STATE_COMMENT
498495
* @see WP_HTML_Tag_Processor::STATE_DOCTYPE
499496
* @see WP_HTML_Tag_Processor::STATE_PRESUMPTUOUS_TAG
@@ -1620,36 +1617,6 @@ private function parse_next_tag() {
16201617
}
16211618
}
16221619

1623-
/*
1624-
* <![CDATA[ transitions to CDATA section state – skip to the nearest ]]>
1625-
* The CDATA is case-sensitive.
1626-
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
1627-
*/
1628-
if (
1629-
$doc_length > $at + 8 &&
1630-
'[' === $html[ $at + 2 ] &&
1631-
'C' === $html[ $at + 3 ] &&
1632-
'D' === $html[ $at + 4 ] &&
1633-
'A' === $html[ $at + 5 ] &&
1634-
'T' === $html[ $at + 6 ] &&
1635-
'A' === $html[ $at + 7 ] &&
1636-
'[' === $html[ $at + 8 ]
1637-
) {
1638-
$closer_at = strpos( $html, ']]>', $at + 9 );
1639-
if ( false === $closer_at ) {
1640-
$this->parser_state = self::STATE_INCOMPLETE;
1641-
1642-
return false;
1643-
}
1644-
1645-
$this->parser_state = self::STATE_CDATA_NODE;
1646-
$this->token_length = $closer_at + 4 - $this->token_starts_at;
1647-
$this->text_starts_at = $this->token_starts_at + 9;
1648-
$this->text_length = $closer_at - $this->text_starts_at;
1649-
$this->bytes_already_parsed = $closer_at + 3;
1650-
return true;
1651-
}
1652-
16531620
/*
16541621
* <!DOCTYPE transitions to DOCTYPE state – skip to the nearest >
16551622
* These are ASCII-case-insensitive.
@@ -2524,7 +2491,6 @@ public function is_tag_closer() {
25242491
* Possible values:
25252492
* - `#tag` when matched on a tag.
25262493
* - `#text` when matched on a text node.
2527-
* - `#cdata-section` when matched on a CDATA node.
25282494
* - `#comment` when matched on a comment.
25292495
* - `#presumptuous-tag` when matched on an empty tag closer.
25302496
* - `#funky-comment` when matched on a funky comment.
@@ -2574,9 +2540,6 @@ public function get_token_name() {
25742540
case self::STATE_TEXT_NODE:
25752541
return '#text';
25762542

2577-
case self::STATE_CDATA_NODE:
2578-
return '#cdata-section';
2579-
25802543
case self::STATE_COMMENT:
25812544
return '#comment';
25822545

@@ -2597,11 +2560,10 @@ public function get_token_name() {
25972560
* Modifiable text is text content that may be read and changed without
25982561
* changing the HTML structure of the document around it. This includes
25992562
* the contents of `#text` nodes in the HTML as well as the inner
2600-
* contents of HTML comments, CDATA sections, Processing Instructions,
2601-
* and others, even though these nodes aren't part of a parsed DOM tree.
2602-
* They also contain the contents of SCRIPT and STYLE tags, of TEXTAREA
2603-
* tags, and of any other section in an HTML document which cannot
2604-
* contain HTML markup (DATA).
2563+
* contents of HTML comments, Processing Instructions, and others, even
2564+
* though these nodes aren't part of a parsed DOM tree. They also contain
2565+
* the contents of SCRIPT and STYLE tags, of TEXTAREA tags, and of any
2566+
* other section in an HTML document which cannot contain HTML markup (DATA).
26052567
*
26062568
* If a token has no modifiable text then an empty string is returned to
26072569
* avoid needless crashing or type errors. An empty string does not mean
@@ -3165,22 +3127,6 @@ private function matches() {
31653127
*/
31663128
const STATE_TEXT_NODE = 'STATE_TEXT_NODE';
31673129

3168-
/**
3169-
* Parser CDATA Node State.
3170-
*
3171-
* Indicates that the parser has found a CDADA node and it's possible
3172-
* to read and modify its modifiable text. Note that in HTML there are
3173-
* no CDATA nodes outside foreign elements (SVG and MathML). Outside
3174-
* of foreign elements, they are treated as HTML comments. Nonetheless,
3175-
* the Tag Processor still recognizes them as they appear in the HTML
3176-
* stream and exposes them for inspection and modification.
3177-
*
3178-
* @since 6.5.0
3179-
*
3180-
* @access private
3181-
*/
3182-
const STATE_CDATA_NODE = 'STATE_CDATA_NODE';
3183-
31843130
/**
31853131
* Indicates that the parser has found an HTML comment and it's
31863132
* possible to read and modify its modifiable text.

tests/phpunit/tests/html-api/wpHtmlTagProcessor-token-scanning.php

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -302,42 +302,6 @@ public function data_rawtext_elements() {
302302
);
303303
}
304304

305-
/**
306-
* Ensures that normative CDATA sections are properly parsed.
307-
*
308-
* @ticket 60170
309-
*
310-
* @since 6.5.0
311-
*
312-
* @covers WP_HTML_Tag_Processor::next_token
313-
*/
314-
public function test_basic_assertion_cdata_section() {
315-
$processor = WP_HTML_Processor::create_fragment( '<![CDATA[this is <a comment>]]>' );
316-
$processor->next_token();
317-
318-
$this->assertSame(
319-
'#cdata-section',
320-
$processor->get_token_name(),
321-
"Should have found CDATA section name but found {$processor->get_token_name()} instead."
322-
);
323-
324-
$this->assertNull(
325-
$processor->get_tag(),
326-
'Should not have been able to query tag name on non-element token.'
327-
);
328-
329-
$this->assertNull(
330-
$processor->get_attribute( 'type' ),
331-
'Should not have been able to query attributes on non-element token.'
332-
);
333-
334-
$this->assertSame(
335-
'this is <a comment>',
336-
$processor->get_modifiable_text(),
337-
'Found incorrect modifiable text.'
338-
);
339-
}
340-
341305
/**
342306
* Ensures that common comments are properly parsed.
343307
*

tests/phpunit/tests/html-api/wpHtmlTagProcessor.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2363,6 +2363,7 @@ public function data_html_without_tags() {
23632363
'No tags' => array( 'this is nothing more than a text node' ),
23642364
'Text with comments' => array( 'One <!-- sneaky --> comment.' ),
23652365
'Empty tag closer' => array( '</>' ),
2366+
'CDATA as HTML comment' => array( '<![CDATA[this closes at the first &gt;]>' ),
23662367
'Processing instruction' => array( '<?xml version="1.0"?>' ),
23672368
'Combination XML-like' => array( '<!DOCTYPE xml><?xml version=""?><!-- this is not a real document. --><![CDATA[it only serves as a test]]>' ),
23682369
);
@@ -2416,7 +2417,6 @@ public function data_incomplete_syntax_elements() {
24162417
'Incomplete CDATA' => array( '<![CDATA[something inside of here needs to get out' ),
24172418
'Partial CDATA' => array( '<![CDA' ),
24182419
'Partially closed CDATA]' => array( '<![CDATA[cannot escape]' ),
2419-
'Partially closed CDATA]>' => array( '<![CDATA[cannot escape]>' ),
24202420
'Unclosed IFRAME' => array( '<iframe><div>' ),
24212421
'Unclosed NOEMBED' => array( '<noembed><div>' ),
24222422
'Unclosed NOFRAMES' => array( '<noframes><div>' ),
@@ -2513,7 +2513,7 @@ public function data_updating_attributes() {
25132513
),
25142514
'tag inside of CDATA' => array(
25152515
'input' => '<![CDATA[This <is> a <strong id="yes">HTML Tag</strong>]]><span>test</span>',
2516-
'expected' => '<![CDATA[This <is> a <strong id="yes">HTML Tag</strong>]]><span class="firstTag" foo="bar">test</span>',
2516+
'expected' => '<![CDATA[This <is> a <strong class="firstTag" foo="bar" id="yes">HTML Tag</strong>]]><span class="secondTag">test</span>',
25172517
),
25182518
);
25192519
}

0 commit comments

Comments
 (0)