Skip to content

Commit 7a34cea

Browse files
committed
HTML API: Normalize raw attribute carriage returns before serialization
1 parent 0d64955 commit 7a34cea

3 files changed

Lines changed: 105 additions & 7 deletions

File tree

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1443,7 +1443,7 @@ public function serialize_token(): string {
14431443
}
14441444

14451445
$html .= " {$qualified_attribute_name}";
1446-
$value = $this->get_attribute( $attribute_name );
1446+
$value = $this->get_attribute_for_serialization( $attribute_name );
14471447

14481448
if ( is_string( $value ) ) {
14491449
$html .= '="' . self::serialize_decoded_text( $value ) . '"';

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2359,13 +2359,14 @@ private function class_name_updates_to_attributes_updates(): void {
23592359
}
23602360

23612361
if ( false === $existing_class && isset( $this->attributes['class'] ) ) {
2362-
$existing_class = WP_HTML_Decoder::decode_attribute(
2363-
substr(
2364-
$this->html,
2365-
$this->attributes['class']->value_starts_at,
2366-
$this->attributes['class']->value_length
2367-
)
2362+
$existing_class = substr(
2363+
$this->html,
2364+
$this->attributes['class']->value_starts_at,
2365+
$this->attributes['class']->value_length
23682366
);
2367+
$existing_class = str_replace( "\r\n", "\n", $existing_class );
2368+
$existing_class = str_replace( "\r", "\n", $existing_class );
2369+
$existing_class = WP_HTML_Decoder::decode_attribute( $existing_class );
23692370
}
23702371

23712372
if ( false === $existing_class ) {
@@ -2829,6 +2830,51 @@ public function get_attribute( $name ) {
28292830
return WP_HTML_Decoder::decode_attribute( $raw_value );
28302831
}
28312832

2833+
/**
2834+
* Returns the value of an attribute, applying HTML input stream preprocessing.
2835+
*
2836+
* This is intended for serialization, where source HTML values have already
2837+
* passed through preprocessing before character references decode. Enqueued
2838+
* attribute updates are plaintext API values, so they are returned unchanged.
2839+
*
2840+
* @since 6.9.0
2841+
* @ignore
2842+
*
2843+
* @param string $name Name of attribute whose value is requested.
2844+
* @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
2845+
*/
2846+
protected function get_attribute_for_serialization( $name ) {
2847+
if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
2848+
return null;
2849+
}
2850+
2851+
$comparable = strtolower( $name );
2852+
2853+
if ( 'class' === $comparable ) {
2854+
$this->class_name_updates_to_attributes_updates();
2855+
}
2856+
2857+
$enqueued_value = $this->get_enqueued_attribute_value( $comparable );
2858+
if ( false !== $enqueued_value ) {
2859+
return $enqueued_value;
2860+
}
2861+
2862+
if ( ! isset( $this->attributes[ $comparable ] ) ) {
2863+
return null;
2864+
}
2865+
2866+
$attribute = $this->attributes[ $comparable ];
2867+
if ( true === $attribute->is_true ) {
2868+
return true;
2869+
}
2870+
2871+
$raw_value = substr( $this->html, $attribute->value_starts_at, $attribute->value_length );
2872+
$raw_value = str_replace( "\r\n", "\n", $raw_value );
2873+
$raw_value = str_replace( "\r", "\n", $raw_value );
2874+
2875+
return WP_HTML_Decoder::decode_attribute( $raw_value );
2876+
}
2877+
28322878
/**
28332879
* Gets lowercase names of all attributes matching a given prefix in the current tag.
28342880
*

tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,58 @@ public static function data_provider_decoded_carriage_returns() {
547547
);
548548
}
549549

550+
/**
551+
* Ensures that raw carriage returns in attribute values are serialized as line feeds.
552+
*
553+
* @ticket 65372
554+
*
555+
* @dataProvider data_provider_raw_attribute_carriage_returns
556+
*
557+
* @param string $input HTML input containing raw carriage returns.
558+
* @param string $expected Expected normalized output.
559+
*/
560+
public function test_normalize_serializes_raw_attribute_carriage_returns_as_line_feeds( string $input, string $expected ) {
561+
$normalized = WP_HTML_Processor::normalize( $input );
562+
563+
$this->assertSame( $expected, $normalized, 'Should have serialized raw attribute carriage returns as line feeds.' );
564+
$this->assertSame(
565+
$expected,
566+
WP_HTML_Processor::normalize( $normalized ),
567+
'Normalizing already-normalized HTML should not change raw attribute newlines.'
568+
);
569+
}
570+
571+
/**
572+
* Data provider.
573+
*
574+
* @return array[]
575+
*/
576+
public static function data_provider_raw_attribute_carriage_returns() {
577+
return array(
578+
'Raw carriage return' => array( "<p title=\"a\rb\"></p>", "<p title=\"a\nb\"></p>" ),
579+
'Raw CRLF pair' => array( "<p title=\"a\r\nb\"></p>", "<p title=\"a\nb\"></p>" ),
580+
);
581+
}
582+
583+
/**
584+
* Ensures that raw carriage returns are normalized before class updates are serialized.
585+
*
586+
* @ticket 65372
587+
*/
588+
public function test_serialize_token_normalizes_raw_class_carriage_returns_before_class_updates() {
589+
$processor = WP_HTML_Processor::create_fragment( "<p class=\"a\rb\"></p>" );
590+
591+
$this->assertTrue( $processor->next_tag( 'P' ), 'Should find the P element.' );
592+
593+
$processor->add_class( 'c' );
594+
595+
$this->assertSame(
596+
"<p class=\"a\nb c\">",
597+
$processor->serialize_token(),
598+
'Should have serialized raw class carriage returns as line feeds before adding classes.'
599+
);
600+
}
601+
550602
/**
551603
* Data provider.
552604
*

0 commit comments

Comments
 (0)