Skip to content

Commit faae963

Browse files
committed
Document attribute_starts_with()
1 parent d9ac3f7 commit faae963

1 file changed

Lines changed: 61 additions & 19 deletions

File tree

src/wp-includes/html-api/class-wp-html-decoder.php

Lines changed: 61 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,38 +10,82 @@
1010
* @since 6.6.0
1111
*/
1212
class WP_HTML_Decoder {
13-
public static function attribute_starts_with( $attribute_value, $search_text, $case_sensitivity ) {
14-
$length = strlen( $search_text );
15-
$loose_case = 'case-insensitive' === $case_sensitivity;
16-
$at = 0;
17-
$i = 0;
18-
19-
while ( $i < $length && $at < strlen( $attribute_value ) ) {
13+
/**
14+
* Indicates if an attribute value starts with a given raw string value.
15+
*
16+
* Use this method to determine if an attribute value starts with a given string, regardless
17+
* of how it might be encoded in HTML. For instance, `http:` could be represented as `http:`
18+
* or as `http&colon;` or as `&#x68;ttp:` or as `h&#116;tp&colon;`, or in many other ways.
19+
*
20+
* Example:
21+
*
22+
* $value = 'http&colon;//wordpress.org/';
23+
* true === WP_HTML_Decoder::attribute_starts_with( $value, 0, null, 'http:', 'case-insensitive' );
24+
* false === WP_HTML_Decoder::attribute_starts_with( $value, 0, null, 'https:', 'case-insensitive' );
25+
*
26+
* The `$value_at` and `$value_length` parameters may be used to avoid string allocations when the
27+
* attribute value is found within a larger string.
28+
*
29+
* Example:
30+
*
31+
* $html = '<a href="h&#116;tps://wordpress.org" title="&copy 2024">';
32+
* // 1 1 2 2 3 3 4 4 5 5
33+
* // 01234567890 5 0 5 0 5 0 5 0 5
34+
*
35+
* $href = array( 9, 26 ); // At, Length
36+
* $title = array( 44, 9 ); // At, Length
37+
*
38+
* true === WP_HTML_Decode::attribute_starts_with( $html, $href[0], $href[1], 'https://', 'case-insensitive' );
39+
* true === WP_HTML_Decode::attribute_starts_with( $html, $title[0], $title[1], '©' );
40+
*
41+
* false === WP_HTML_Decode::attribute_starts_with( $html, $href[0], $href[1], 'http://', 'case-insensitive' );
42+
* false === WP_HTML_Decode::attribute_starts_with( $html, $title[0], $title[1], '&copy' );
43+
*
44+
* @param string $raw_haystack String containing the raw non-decoded attribute value.
45+
* @param int $value_at How many bytes into the haystack where the attribute value begins.
46+
* @param int $value_length How many bytes the attribute value spans.
47+
* Passing `null` indicates that the value spans to the end of the string.
48+
* @param string $search_text Does the attribute value start with this plain string.
49+
* @param ?string $case_sensitivity Set to `case-insensitive` to ignore ASCII case when matching.
50+
*
51+
* @return bool Whether the attribute value starts with the given string.
52+
*/
53+
public static function attribute_starts_with( $raw_haystack, $value_at, $value_length, $search_text, $case_sensitivity = 'case-sensitive' ) {
54+
$search_length = strlen( $search_text );
55+
$loose_case = 'case-insensitive' === $case_sensitivity;
56+
$haystack_end = isset( $value_length ) ? ( $value_at + $value_length ) : strlen( $raw_haystack );
57+
$search_at = 0;
58+
59+
while ( $search_at < $search_length && $value_at < $haystack_end ) {
2060
$chars_match = $loose_case
21-
? strtolower( $attribute_value[ $at ] ) === strtolower( $search_text[ $i ] )
22-
: $attribute_value[ $at ] === $search_text[ $i ];
61+
? strtolower( $raw_haystack[ $value_at ] ) === strtolower( $search_text[ $search_at ] )
62+
: $raw_haystack[ $value_at ] === $search_text[ $search_at ];
2363

24-
$is_introducer = '&' === $attribute_value[ $at ];
64+
$is_introducer = '&' === $raw_haystack[ $value_at ];
2565
$next_chunk = $is_introducer
26-
? self::read_character_reference( $attribute_value, $at, false, $skip_bytes )
66+
? self::read_character_reference( $raw_haystack, $value_at, false, $skip_bytes )
2767
: false;
2868

69+
// If there's no character reference and the characters don't match, the match fails.
2970
if ( false === $next_chunk && ! $chars_match ) {
3071
return false;
3172
}
3273

74+
// If there's no character reference but the character do match, then it could still match.
3375
if ( false === $next_chunk && $chars_match ) {
34-
++$at;
35-
++$i;
76+
++$value_at;
77+
++$search_at;
3678
continue;
3779
}
3880

39-
if ( 0 !== substr_compare( $search_text, $next_chunk, $i, strlen( $next_chunk ), $loose_case ) ) {
81+
// If there is a character reference, then the decoded value must exactly match what follows in the search string.
82+
if ( 0 !== substr_compare( $search_text, $next_chunk, $search_at, strlen( $next_chunk ), $loose_case ) ) {
4083
return false;
4184
}
4285

43-
$at += $skip_bytes;
44-
$i += strlen( $next_chunk );
86+
// The character reference matched, so continue checking.
87+
$value_at += $skip_bytes;
88+
$search_at += strlen( $next_chunk );
4589
}
4690

4791
return true;
@@ -263,10 +307,9 @@ public static function read_character_reference( $text, $at, $allow_ambiguous_am
263307

264308
$after_name = $name_at + $name_length;
265309

266-
// If we have an un-ambiguous ampersand we can safely leave it in.
310+
// If the match ended with a semicolon then it should always be decoded.
267311
if ( ';' === $text[ $name_at + $name_length - 1 ] ) {
268312
$skip_bytes = $after_name - $at;
269-
// @todo bring back the WP_Token_Map so we can decode these.
270313
return $name;
271314
}
272315

@@ -287,7 +330,6 @@ public static function read_character_reference( $text, $at, $allow_ambiguous_am
287330
// It's non-ambiguous, safe to leave it in.
288331
if ( ! $ambiguous_follower ) {
289332
$skip_bytes = $after_name - $at;
290-
// @todo Bring back WP_Token_Map to replace properly.
291333
return $name;
292334
}
293335

0 commit comments

Comments
 (0)