Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 26 additions & 14 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ function ( WP_HTML_Token $token ): void {
$provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance );

$this->change_parsing_namespace( $token->integration_node_type ? 'html' : $token->namespace );
$this->change_parsing_namespace_for_node( $token );
}
);

Expand All @@ -417,11 +417,7 @@ function ( WP_HTML_Token $token ): void {

$adjusted_current_node = $this->get_adjusted_current_node();

if ( $adjusted_current_node ) {
$this->change_parsing_namespace( $adjusted_current_node->integration_node_type ? 'html' : $adjusted_current_node->namespace );
} else {
$this->change_parsing_namespace( 'html' );
}
$this->change_parsing_namespace_for_node( $adjusted_current_node );
}
);

Expand All @@ -435,6 +431,28 @@ function ( WP_HTML_Token $token ): void {
};
}

/**
* Switches tokenizer namespace state for the next token.
*
* HTML integration points parse start tags and character tokens according to
* HTML rules, but CDATA detection follows the adjusted current node's actual
* namespace.
*
* @since 7.1.0
* @ignore
*
* @param WP_HTML_Token|null $node Node controlling the next token's parsing context.
*/
private function change_parsing_namespace_for_node( ?WP_HTML_Token $node ): void {
if ( null === $node ) {
$this->change_parsing_namespace( 'html' );
return;
}

$this->change_parsing_namespace( $node->integration_node_type ? 'html' : $node->namespace );
$this->change_cdata_parsing_namespace( $node->namespace );
}

/**
* Creates a fragment processor at the current node.
*
Expand Down Expand Up @@ -570,9 +588,7 @@ private function create_fragment_at_current_node( string $html ) {
* This is important so that any push/pop from the stack of open
* elements does not change the parsing namespace.
*/
$fragment_processor->change_parsing_namespace(
$this->current_element->token->integration_node_type ? 'html' : $namespace
);
$fragment_processor->change_parsing_namespace_for_node( $this->current_element->token );

return $fragment_processor;
}
Expand Down Expand Up @@ -5704,11 +5720,7 @@ public function seek( $bookmark_name ): bool {
)
);

$this->change_parsing_namespace(
$this->context_node->integration_node_type
? 'html'
: $this->context_node->namespace
);
$this->change_parsing_namespace_for_node( $this->context_node );

if ( 'TEMPLATE' === $this->context_node->node_name ) {
$this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE;
Expand Down
41 changes: 36 additions & 5 deletions src/wp-includes/html-api/class-wp-html-tag-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -544,16 +544,28 @@ class WP_HTML_Tag_Processor {
* One of 'html', 'svg', or 'math'.
*
* Several parsing rules change based on whether the parser
* is inside foreign content, including whether CDATA sections
* are allowed and whether a self-closing flag indicates that
* an element has no content.
* is inside foreign content, including whether a self-closing
* flag indicates that an element has no content.
*
* @since 6.7.0
*
* @var string
*/
private $parsing_namespace = 'html';

/**
* Indicates the current node's namespace for CDATA section detection.
*
* HTML integration points follow HTML tokenization for start tags and
* character tokens, but CDATA sections are allowed based on the adjusted
* current node's actual namespace.
*
* @since 7.1.0
*
* @var string
*/
private $cdata_parsing_namespace = 'html';

/**
* What kind of syntax token became an HTML comment.
*
Expand Down Expand Up @@ -860,7 +872,26 @@ public function change_parsing_namespace( string $new_namespace ): bool {
return false;
}

$this->parsing_namespace = $new_namespace;
$this->parsing_namespace = $new_namespace;
$this->cdata_parsing_namespace = $new_namespace;
return true;
}

/**
* Switches the namespace context used for detecting CDATA sections.
*
* @since 7.1.0
*
* @param string $new_namespace One of 'html', 'svg', or 'math' indicating whether
* the adjusted current node can contain CDATA sections.
* @return bool Whether the namespace was valid and changed.
*/
protected function change_cdata_parsing_namespace( string $new_namespace ): bool {
if ( ! in_array( $new_namespace, array( 'html', 'math', 'svg' ), true ) ) {
return false;
}

$this->cdata_parsing_namespace = $new_namespace;
return true;
}

Expand Down Expand Up @@ -1917,7 +1948,7 @@ private function parse_next_tag(): bool {
}

if (
'html' !== $this->parsing_namespace &&
'html' !== $this->cdata_parsing_namespace &&
strlen( $html ) > $at + 8 &&
'[' === $html[ $at + 2 ] &&
'C' === $html[ $at + 3 ] &&
Expand Down
90 changes: 90 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -851,6 +851,51 @@ public function test_adjusts_for_html_integration_points_in_svg() {
);
}

/**
* Ensures that CDATA sections remain available inside SVG HTML integration points.
*
* @ticket 61576
*/
public function test_cdata_sections_in_svg_html_integration_points() {
$processor = WP_HTML_Processor::create_fragment(
'<svg><foreignObject><![CDATA[foo]]></foreignObject></svg>'
);

$this->assertTrue(
$processor->next_tag( 'foreignObject' ),
'Failed to find "foreignObject" under test: check test setup.'
);

$this->assertSame(
'svg',
$processor->get_namespace(),
'Found the wrong namespace for the "foreignObject" element.'
);

$this->assertTrue(
$processor->next_token(),
'Failed to find expected CDATA section.'
);

$this->assertSame(
'#cdata-section',
$processor->get_token_name(),
"Should have found a CDATA section but found {$processor->get_token_name()} instead."
);

$this->assertSame(
'svg',
$processor->get_namespace(),
'Found the wrong namespace for the CDATA section.'
);

$this->assertSame(
'foo',
$processor->get_modifiable_text(),
'Found incorrect CDATA content.'
);
}

/**
* Ensures that the processor correctly adjusts the namespace
* for elements inside MathML integration points.
Expand Down Expand Up @@ -911,6 +956,51 @@ public function test_adjusts_for_mathml_integration_points() {
);
}

/**
* Ensures that CDATA sections remain available inside MathML HTML integration points.
*
* @ticket 61576
*/
public function test_cdata_sections_in_mathml_html_integration_points() {
$processor = WP_HTML_Processor::create_fragment(
'<math><annotation-xml encoding="text/html"><![CDATA[x]]></annotation-xml></math>'
);

$this->assertTrue(
$processor->next_tag( 'ANNOTATION-XML' ),
'Failed to find "annotation-xml" under test: check test setup.'
);

$this->assertSame(
'math',
$processor->get_namespace(),
'Found the wrong namespace for the "annotation-xml" element.'
);

$this->assertTrue(
$processor->next_token(),
'Failed to find expected CDATA section.'
);

$this->assertSame(
'#cdata-section',
$processor->get_token_name(),
"Should have found a CDATA section but found {$processor->get_token_name()} instead."
);

$this->assertSame(
'math',
$processor->get_namespace(),
'Found the wrong namespace for the CDATA section.'
);

$this->assertSame(
'x',
$processor->get_modifiable_text(),
'Found incorrect CDATA content.'
);
}

/**
* Ensures that the processor stops correctly on a FORM tag closer token.
*
Expand Down
Loading