diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index 35d91fad3129c..2488e515cd778 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -404,7 +404,7 @@ function ( WP_HTML_Token $token ): void { $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance ); - $this->change_parsing_namespace( $token->integration_node_type ? 'html' : $token->namespace ); + $this->change_parsing_namespace_for_node( $token ); } ); @@ -417,11 +417,7 @@ function ( WP_HTML_Token $token ): void { $adjusted_current_node = $this->get_adjusted_current_node(); - if ( $adjusted_current_node ) { - $this->change_parsing_namespace( $adjusted_current_node->integration_node_type ? 'html' : $adjusted_current_node->namespace ); - } else { - $this->change_parsing_namespace( 'html' ); - } + $this->change_parsing_namespace_for_node( $adjusted_current_node ); } ); @@ -435,6 +431,28 @@ function ( WP_HTML_Token $token ): void { }; } + /** + * Switches tokenizer namespace state for the next token. + * + * HTML integration points parse start tags and character tokens according to + * HTML rules, but CDATA detection follows the adjusted current node's actual + * namespace. + * + * @since 7.1.0 + * @ignore + * + * @param WP_HTML_Token|null $node Node controlling the next token's parsing context. + */ + private function change_parsing_namespace_for_node( ?WP_HTML_Token $node ): void { + if ( null === $node ) { + $this->change_parsing_namespace( 'html' ); + return; + } + + $this->change_parsing_namespace( $node->integration_node_type ? 'html' : $node->namespace ); + $this->change_cdata_parsing_namespace( $node->namespace ); + } + /** * Creates a fragment processor at the current node. * @@ -570,9 +588,7 @@ private function create_fragment_at_current_node( string $html ) { * This is important so that any push/pop from the stack of open * elements does not change the parsing namespace. */ - $fragment_processor->change_parsing_namespace( - $this->current_element->token->integration_node_type ? 'html' : $namespace - ); + $fragment_processor->change_parsing_namespace_for_node( $this->current_element->token ); return $fragment_processor; } @@ -5704,11 +5720,7 @@ public function seek( $bookmark_name ): bool { ) ); - $this->change_parsing_namespace( - $this->context_node->integration_node_type - ? 'html' - : $this->context_node->namespace - ); + $this->change_parsing_namespace_for_node( $this->context_node ); if ( 'TEMPLATE' === $this->context_node->node_name ) { $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 77c1a471db5b1..549e757bf5fd0 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -544,9 +544,8 @@ class WP_HTML_Tag_Processor { * One of 'html', 'svg', or 'math'. * * Several parsing rules change based on whether the parser - * is inside foreign content, including whether CDATA sections - * are allowed and whether a self-closing flag indicates that - * an element has no content. + * is inside foreign content, including whether a self-closing + * flag indicates that an element has no content. * * @since 6.7.0 * @@ -554,6 +553,19 @@ class WP_HTML_Tag_Processor { */ private $parsing_namespace = 'html'; + /** + * Indicates the current node's namespace for CDATA section detection. + * + * HTML integration points follow HTML tokenization for start tags and + * character tokens, but CDATA sections are allowed based on the adjusted + * current node's actual namespace. + * + * @since 7.1.0 + * + * @var string + */ + private $cdata_parsing_namespace = 'html'; + /** * What kind of syntax token became an HTML comment. * @@ -860,7 +872,26 @@ public function change_parsing_namespace( string $new_namespace ): bool { return false; } - $this->parsing_namespace = $new_namespace; + $this->parsing_namespace = $new_namespace; + $this->cdata_parsing_namespace = $new_namespace; + return true; + } + + /** + * Switches the namespace context used for detecting CDATA sections. + * + * @since 7.1.0 + * + * @param string $new_namespace One of 'html', 'svg', or 'math' indicating whether + * the adjusted current node can contain CDATA sections. + * @return bool Whether the namespace was valid and changed. + */ + protected function change_cdata_parsing_namespace( string $new_namespace ): bool { + if ( ! in_array( $new_namespace, array( 'html', 'math', 'svg' ), true ) ) { + return false; + } + + $this->cdata_parsing_namespace = $new_namespace; return true; } @@ -1917,7 +1948,7 @@ private function parse_next_tag(): bool { } if ( - 'html' !== $this->parsing_namespace && + 'html' !== $this->cdata_parsing_namespace && strlen( $html ) > $at + 8 && '[' === $html[ $at + 2 ] && 'C' === $html[ $at + 3 ] && diff --git a/tests/phpunit/tests/html-api/wpHtmlProcessor.php b/tests/phpunit/tests/html-api/wpHtmlProcessor.php index a89014282df73..eb7bf0dbbbc32 100644 --- a/tests/phpunit/tests/html-api/wpHtmlProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlProcessor.php @@ -851,6 +851,51 @@ public function test_adjusts_for_html_integration_points_in_svg() { ); } + /** + * Ensures that CDATA sections remain available inside SVG HTML integration points. + * + * @ticket 61576 + */ + public function test_cdata_sections_in_svg_html_integration_points() { + $processor = WP_HTML_Processor::create_fragment( + 'foo' + ); + + $this->assertTrue( + $processor->next_tag( 'foreignObject' ), + 'Failed to find "foreignObject" under test: check test setup.' + ); + + $this->assertSame( + 'svg', + $processor->get_namespace(), + 'Found the wrong namespace for the "foreignObject" element.' + ); + + $this->assertTrue( + $processor->next_token(), + 'Failed to find expected CDATA section.' + ); + + $this->assertSame( + '#cdata-section', + $processor->get_token_name(), + "Should have found a CDATA section but found {$processor->get_token_name()} instead." + ); + + $this->assertSame( + 'svg', + $processor->get_namespace(), + 'Found the wrong namespace for the CDATA section.' + ); + + $this->assertSame( + 'foo', + $processor->get_modifiable_text(), + 'Found incorrect CDATA content.' + ); + } + /** * Ensures that the processor correctly adjusts the namespace * for elements inside MathML integration points. @@ -911,6 +956,51 @@ public function test_adjusts_for_mathml_integration_points() { ); } + /** + * Ensures that CDATA sections remain available inside MathML HTML integration points. + * + * @ticket 61576 + */ + public function test_cdata_sections_in_mathml_html_integration_points() { + $processor = WP_HTML_Processor::create_fragment( + 'x' + ); + + $this->assertTrue( + $processor->next_tag( 'ANNOTATION-XML' ), + 'Failed to find "annotation-xml" under test: check test setup.' + ); + + $this->assertSame( + 'math', + $processor->get_namespace(), + 'Found the wrong namespace for the "annotation-xml" element.' + ); + + $this->assertTrue( + $processor->next_token(), + 'Failed to find expected CDATA section.' + ); + + $this->assertSame( + '#cdata-section', + $processor->get_token_name(), + "Should have found a CDATA section but found {$processor->get_token_name()} instead." + ); + + $this->assertSame( + 'math', + $processor->get_namespace(), + 'Found the wrong namespace for the CDATA section.' + ); + + $this->assertSame( + 'x', + $processor->get_modifiable_text(), + 'Found incorrect CDATA content.' + ); + } + /** * Ensures that the processor stops correctly on a FORM tag closer token. *