Skip to content

Commit a513738

Browse files
committed
HTML API: preserve adjusted foreign attributes on serialization.
Discovered during fuzz-testing of the HTML API. Adjusted foreign attributes, such as `xlink:href`, were being normalized with a space instead of a colon through `::serialize_token()`. This led to the creation of two attributes on output instead of the proper singular attribute. This patch corrects the issue by ensuring that the attribute namespace and name are separated by a colon when serializing. Developed in: WordPress#12140 Discussed in: https://core.trac.wordpress.org/ticket/65372 Props jonsurrell. See #65372. git-svn-id: https://develop.svn.wordpress.org/trunk@62492 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 46f3c09 commit a513738

3 files changed

Lines changed: 137 additions & 3 deletions

File tree

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1428,6 +1428,11 @@ public function serialize_token(): string {
14281428
$qualified_attribute_name = $this->get_qualified_attribute_name( $attribute_name );
14291429
$qualified_attribute_name = str_replace( "\x00", "\u{FFFD}", $qualified_attribute_name );
14301430
$qualified_attribute_name = wp_scrub_utf8( $qualified_attribute_name );
1431+
/**
1432+
* Spaces only appear via the foreign attribute adjustment table.
1433+
* @see WP_HTML_Tag_Processor::get_qualified_attribute_name()
1434+
*/
1435+
$serialized_attribute_name = str_replace( ' ', ':', $qualified_attribute_name );
14311436
if ( isset( $seen_attribute_names[ $qualified_attribute_name ] ) ) {
14321437
continue;
14331438
} else {
@@ -1436,13 +1441,13 @@ public function serialize_token(): string {
14361441

14371442
if (
14381443
$previous_attribute_was_true &&
1439-
isset( $qualified_attribute_name[0] ) &&
1440-
'=' === $qualified_attribute_name[0]
1444+
isset( $serialized_attribute_name[0] ) &&
1445+
'=' === $serialized_attribute_name[0]
14411446
) {
14421447
$html .= '=""';
14431448
}
14441449

1445-
$html .= " {$qualified_attribute_name}";
1450+
$html .= " {$serialized_attribute_name}";
14461451
$value = $this->get_attribute( $attribute_name );
14471452

14481453
if ( is_string( $value ) ) {

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3071,6 +3071,12 @@ public function get_qualified_tag_name(): ?string {
30713071
* Returns the adjusted attribute name for a given attribute, taking into
30723072
* account the current parsing context, whether HTML, SVG, or MathML.
30733073
*
3074+
* In SVG and MathML contexts, adjusted foreign attributes with a namespace
3075+
* prefix use a space between the prefix and local name. For example,
3076+
* `xlink:href` is returned as `xlink href`, while the unprefixed `xmlns`
3077+
* attribute is returned as `xmlns`. Non-adjusted attributes with a colon in
3078+
* their name, such as `foo:bar`, are returned unchanged.
3079+
*
30743080
* @since 6.7.0
30753081
*
30763082
* @param string $attribute_name Which attribute to adjust.

tests/phpunit/tests/html-api/wpHtmlProcessor-serialize.php

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,129 @@ public function test_duplicate_attributes_are_removed() {
108108
);
109109
}
110110

111+
/**
112+
* Ensures that adjusted foreign attributes are serialized with their namespace prefix.
113+
*
114+
* @ticket 65372
115+
*/
116+
public function test_serializes_adjusted_foreign_attributes_with_namespace_prefix(): void {
117+
$svg = '<svg><a xlink:actuate="onLoad" xlink:arcrole="arc" xlink:href="#target" xlink:role="role" xlink:show="new" xlink:title="title" xlink:type="simple" xml:lang="en" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"></a></svg>';
118+
119+
$this->assertSame(
120+
$svg,
121+
WP_HTML_Processor::normalize( $svg ),
122+
'Should have preserved all adjusted foreign attributes when normalizing.'
123+
);
124+
125+
$processor = WP_HTML_Processor::create_fragment( $svg );
126+
$this->assertTrue( $processor->next_token() );
127+
$this->assertSame( '<svg>', $processor->serialize_token(), 'Should serialize the opening SVG tag.' );
128+
$this->assertTrue( $processor->next_token() );
129+
$this->assertSame(
130+
'<a xlink:actuate="onLoad" xlink:arcrole="arc" xlink:href="#target" xlink:role="role" xlink:show="new" xlink:title="title" xlink:type="simple" xml:lang="en" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">',
131+
$processor->serialize_token(),
132+
'Should have serialized all adjusted foreign attributes with their namespace prefixes.'
133+
);
134+
}
135+
136+
/**
137+
* Ensures that non-adjusted foreign attributes retain their colon.
138+
*
139+
* @ticket 65372
140+
*
141+
* @dataProvider data_non_adjusted_foreign_attributes_with_colon
142+
*
143+
* @param string $svg SVG markup to normalize.
144+
* @param string $serialized_tag Expected serialized token.
145+
*/
146+
public function test_serializes_non_adjusted_foreign_attributes_with_colon( string $svg, string $serialized_tag ): void {
147+
$this->assertSame(
148+
$svg,
149+
WP_HTML_Processor::normalize( $svg ),
150+
'Should have preserved non-adjusted colon attributes when normalizing.'
151+
);
152+
153+
$processor = WP_HTML_Processor::create_fragment( $svg );
154+
$this->assertTrue( $processor->next_token() );
155+
$this->assertSame( '<svg>', $processor->serialize_token(), 'Should serialize the opening SVG tag.' );
156+
$this->assertTrue( $processor->next_token() );
157+
$this->assertSame(
158+
$serialized_tag,
159+
$processor->serialize_token(),
160+
'Should have preserved non-adjusted colon attributes when serializing the token.'
161+
);
162+
}
163+
164+
/**
165+
* Data provider.
166+
*
167+
* @return array<string, array{0: string, 1: string}>
168+
*/
169+
public static function data_non_adjusted_foreign_attributes_with_colon(): array {
170+
return array(
171+
'xlink control' => array(
172+
'<svg><a xlink:author="author" xlink:href="#target"></a></svg>',
173+
'<a xlink:author="author" xlink:href="#target">',
174+
),
175+
'xml control' => array(
176+
'<svg><a xml:id="id" xml:lang="en"></a></svg>',
177+
'<a xml:id="id" xml:lang="en">',
178+
),
179+
'xmlns control' => array(
180+
'<svg><a xmlns:foo="urn:foo" xmlns:xlink="http://www.w3.org/1999/xlink"></a></svg>',
181+
'<a xmlns:foo="urn:foo" xmlns:xlink="http://www.w3.org/1999/xlink">',
182+
),
183+
'source order' => array(
184+
'<svg><a foo:bar="baz" xlink:href="#target"></a></svg>',
185+
'<a foo:bar="baz" xlink:href="#target">',
186+
),
187+
);
188+
}
189+
190+
/**
191+
* Ensures that duplicate foreign attributes are removed upon serialization.
192+
*
193+
* @ticket 65372
194+
*
195+
* @dataProvider data_duplicate_foreign_attributes
196+
*
197+
* @param string $input HTML containing duplicate foreign attributes.
198+
* @param string $expected Expected normalized HTML.
199+
*/
200+
public function test_duplicate_foreign_attributes_are_removed( string $input, string $expected ): void {
201+
$this->assertSame(
202+
$expected,
203+
WP_HTML_Processor::normalize( $input ),
204+
'Should have removed all but the first copy of a foreign attribute when duplicates exist.'
205+
);
206+
}
207+
208+
/**
209+
* Data provider.
210+
*
211+
* @return array<string, array{0: string, 1: string}>
212+
*/
213+
public static function data_duplicate_foreign_attributes(): array {
214+
return array(
215+
'adjusted xlink duplicate' => array(
216+
'<svg><a xlink:href="#first" XLINK:HREF="#second"></a></svg>',
217+
'<svg><a xlink:href="#first"></a></svg>',
218+
),
219+
'adjusted xml duplicate' => array(
220+
'<svg><a xml:lang="en" XML:LANG="fr"></a></svg>',
221+
'<svg><a xml:lang="en"></a></svg>',
222+
),
223+
'non-adjusted colon duplicate' => array(
224+
'<svg><a foo:bar="one" FOO:BAR="two"></a></svg>',
225+
'<svg><a foo:bar="one"></a></svg>',
226+
),
227+
'adjusted and non-adjusted pair' => array(
228+
'<svg><a xlink:href="#target" xlink:author="author"></a></svg>',
229+
'<svg><a xlink:href="#target" xlink:author="author"></a></svg>',
230+
),
231+
);
232+
}
233+
111234
/**
112235
* Ensures that SCRIPT contents are not escaped, as they are not parsed like text nodes are.
113236
*

0 commit comments

Comments
 (0)