Skip to content

Commit f92da80

Browse files
committed
Fix attribute legacy reference follower checks
1 parent 6c29cfc commit f92da80

2 files changed

Lines changed: 80 additions & 4 deletions

File tree

src/wp-includes/html-api/class-wp-html-decoder.php

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -378,12 +378,14 @@ public static function read_character_reference( $context, $text, $at = 0, &$mat
378378
* character reference table but the match doesn't end in `;`.
379379
* It may be allowed if it's followed by something unambiguous.
380380
*/
381+
$follower_byte = $after_name < $length ? ord( $text[ $after_name ] ) : null;
381382
$ambiguous_follower = (
382-
$after_name < $length &&
383-
$name_at < $length &&
383+
null !== $follower_byte &&
384384
(
385-
ctype_alnum( $text[ $after_name ] ) ||
386-
'=' === $text[ $after_name ]
385+
( $follower_byte >= 0x30 && $follower_byte <= 0x39 ) ||
386+
( $follower_byte >= 0x41 && $follower_byte <= 0x5A ) ||
387+
( $follower_byte >= 0x61 && $follower_byte <= 0x7A ) ||
388+
0x3D === $follower_byte
387389
)
388390
);
389391

tests/phpunit/tests/html-api/wpHtmlDecoder.php

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,80 @@ static function ( int $errno, string $errstr ) use ( &$errors ) {
6161
$this->assertSame( "&\x00b", $decoded, 'Should have decoded the text without changing it.' );
6262
}
6363

64+
/**
65+
* Ensures semicolonless legacy references decode before non-ASCII UTF-8 bytes in attributes.
66+
*/
67+
public function test_semicolonless_legacy_reference_before_multibyte_attribute_follower() {
68+
$previous_locale = setlocale( LC_CTYPE, 0 );
69+
$affected_locale = setlocale( LC_CTYPE, 'C.UTF-8', 'en_US.UTF-8', 'de_DE.UTF-8', 'fr_FR.UTF-8' );
70+
71+
if ( false === $affected_locale || ! ctype_alnum( "\xC2" ) ) {
72+
if ( false !== $previous_locale ) {
73+
setlocale( LC_CTYPE, $previous_locale );
74+
}
75+
76+
$this->markTestSkipped( 'Requires an LC_CTYPE locale where ctype_alnum() classifies high-bit bytes as alphanumeric.' );
77+
}
78+
79+
$raw_attribute = "&Aacute\xC2\x80";
80+
81+
try {
82+
$this->assertSame(
83+
"\xC3\x81\xC2\x80",
84+
WP_HTML_Decoder::decode_attribute( $raw_attribute ),
85+
'Should have decoded the semicolonless legacy reference before a multibyte follower.'
86+
);
87+
88+
$match_byte_length = null;
89+
$this->assertSame(
90+
"\xC3\x81",
91+
WP_HTML_Decoder::read_character_reference( 'attribute', $raw_attribute, 0, $match_byte_length ),
92+
'Should have matched the semicolonless legacy reference before a multibyte follower.'
93+
);
94+
$this->assertSame( strlen( '&Aacute' ), $match_byte_length );
95+
} finally {
96+
if ( false !== $previous_locale ) {
97+
setlocale( LC_CTYPE, $previous_locale );
98+
}
99+
}
100+
}
101+
102+
/**
103+
* Ensures semicolonless legacy references remain ambiguous before ASCII alnum or equals.
104+
*
105+
* @dataProvider data_ambiguous_ascii_attribute_followers
106+
*
107+
* @param string $raw_attribute Raw attribute value with an ambiguous legacy reference follower.
108+
*/
109+
public function test_semicolonless_legacy_reference_before_ascii_attribute_follower_is_ambiguous( $raw_attribute ) {
110+
$this->assertSame(
111+
$raw_attribute,
112+
WP_HTML_Decoder::decode_attribute( $raw_attribute ),
113+
'Should not have decoded an ambiguous semicolonless legacy reference.'
114+
);
115+
116+
$match_byte_length = 'sentinel';
117+
$this->assertNull(
118+
WP_HTML_Decoder::read_character_reference( 'attribute', $raw_attribute, 0, $match_byte_length ),
119+
'Should not have matched an ambiguous semicolonless legacy reference.'
120+
);
121+
$this->assertSame( 'sentinel', $match_byte_length );
122+
}
123+
124+
/**
125+
* Data provider.
126+
*
127+
* @return array[].
128+
*/
129+
public static function data_ambiguous_ascii_attribute_followers() {
130+
return array(
131+
'ASCII digit' => array( '&Aacute0' ),
132+
'ASCII uppercase alpha' => array( '&AacuteA' ),
133+
'ASCII lowercase alpha' => array( '&Aacutea' ),
134+
'equals' => array( '&Aacute=' ),
135+
);
136+
}
137+
64138
/**
65139
* Ensures proper detection of attribute prefixes ignoring ASCII case.
66140
*

0 commit comments

Comments
 (0)