Skip to content

Commit f991add

Browse files
committed
More documentation and a change from boolean to context indicator
1 parent 1a59f60 commit f991add

1 file changed

Lines changed: 59 additions & 12 deletions

File tree

src/wp-includes/html-api/class-wp-html-decoder.php

Lines changed: 59 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -152,15 +152,33 @@ public static function decode_attribute( $text, $at = 0, $length = null ) {
152152
}
153153

154154
/**
155-
* Decodes a span of HTML text, respecting the ambiguous ampersand rule.
155+
* Decodes a span of HTML text, depending on the context in which it's found.
156156
*
157-
* @param $allow_ambiguous_ampersands
158-
* @param $text
159-
* @param $at
160-
* @param $length
161-
* @return mixed|string
157+
* This is a low-level method; prefer calling WP_HTML_Decoder::decode_attribute() or
158+
* WP_HTML_Decoder::decode_text_node() instead.
159+
*
160+
* Example:
161+
*
162+
* '©' = WP_HTML_Decoder::decode( 'data', '©' );
163+
*
164+
* Use the `$at` and `$length` parameters to avoid string allocations when decoding a span
165+
* of text found within a larger document.
166+
*
167+
* Example:
168+
*
169+
* $link = WP_HTML_Decoder::decode( 'attribute', '<a href="http&colon;//wordpress.org">Click</a>', 9, 26 );
170+
* $link === 'http://wordpress.org';
171+
*
172+
* @since 6.6.0
173+
*
174+
* @param string $context `attribute` for decoding attribute values, `data` otherwise.
175+
* @param string $text Text document containing span of text to decode.
176+
* @param ?int $at Byte offset into text where span begins, defaults to the beginning.
177+
* @param ?int $length How many bytes the portion of the text spans.
178+
* The default value spans to the end of the text.
179+
* @return string Decoded string.
162180
*/
163-
public static function decode( $allow_ambiguous_ampersands, $text, $at = 0, $length = null ) {
181+
public static function decode( $context, $text, $at = 0, $length = null ) {
164182
$decoded = '';
165183
$end = isset( $length ) ? $at + $length : strlen( $text );
166184
$was_at = $at;
@@ -171,7 +189,7 @@ public static function decode( $allow_ambiguous_ampersands, $text, $at = 0, $len
171189
break;
172190
}
173191

174-
$character_reference = self::read_character_reference( $text, $next_character_reference_at, $allow_ambiguous_ampersands, $skip_bytes );
192+
$character_reference = self::read_character_reference( $context, $text, $next_character_reference_at, $skip_bytes );
175193
if ( isset( $character_reference ) ) {
176194
$at = $next_character_reference_at;
177195
$decoded .= substr( $text, $was_at, $at - $was_at );
@@ -195,7 +213,36 @@ public static function decode( $allow_ambiguous_ampersands, $text, $at = 0, $len
195213
return $decoded;
196214
}
197215

198-
public static function read_character_reference( $text, $at, $allow_ambiguous_ampersand, &$skip_bytes ) {
216+
/**
217+
* Attempt to read a character reference at the given location in a given string,
218+
* depending on the context in which it's found.
219+
*
220+
* If a character reference is found, this function will return the translated value
221+
* that the reference maps to. It will then set in `$skip_bytes` how many bytes of
222+
* input it read while consuming the character reference. This gives calling code the
223+
* opportunity to advance its cursor when traversing a string and decoding. It
224+
* indicates how long the character reference was.
225+
*
226+
* Example:
227+
*
228+
* null === WP_HTML_Decoder::read_character_reference( 'attribute', 'Ships&hellip;', 0 );
229+
* '…' === WP_HTML_Decoder::read_character_reference( 'attribute', 'Ships&hellip;', 5, $skip_bytes );
230+
* 8 === $skip_bytes;
231+
*
232+
* null === WP_HTML_Decoder::read_character_reference( 'attribute', '&notin;', 0 );
233+
* '¬' === WP_HTML_Decoder::read_character_reference( 'attribute', '&notin;', 0, $skip_bytes );
234+
* 4 === $skip_bytes;
235+
*
236+
* @since 6.6.0
237+
*
238+
* @param string $context `attribute` for decoding attribute values, `data` otherwise.
239+
* @param string $text Text document containing span of text to decode.
240+
* @param ?int $at Byte offset into text where span begins, defaults to the beginning.
241+
* @param ?int $skip_bytes How many bytes the decodable portion of the text spans.
242+
* The default value spans to the end of the text.
243+
* @return string|null Decoded character reference if found, otherwise `false`.
244+
*/
245+
public static function read_character_reference( $context, $text, $at, &$skip_bytes = null ) {
199246
global $html5_named_character_entity_set;
200247

201248
$length = strlen( $text );
@@ -375,9 +422,9 @@ public static function read_character_reference( $text, $at, $allow_ambiguous_am
375422
}
376423

377424
/*
378-
* At this point though have matched an entry in the named
425+
* At this point though there's a match for an entry in the named
379426
* character reference table but the match doesn't end in `;`.
380-
* We need to determine if the next letter makes it an ambiguous.
427+
* It may be allowed if it's followed by something unambiguous.
381428
*/
382429
$ambiguous_follower = (
383430
$after_name < $length &&
@@ -394,7 +441,7 @@ public static function read_character_reference( $text, $at, $allow_ambiguous_am
394441
return $name;
395442
}
396443

397-
if ( ! $allow_ambiguous_ampersand ) {
444+
if ( 'attribute' === $context ) {
398445
return null;
399446
}
400447

0 commit comments

Comments
 (0)