@@ -152,15 +152,33 @@ public static function decode_attribute( $text, $at = 0, $length = null ) {
152152 }
153153
154154 /**
155- * Decodes a span of HTML text, respecting the ambiguous ampersand rule .
155+ * Decodes a span of HTML text, depending on the context in which it's found .
156156 *
157- * @param $allow_ambiguous_ampersands
158- * @param $text
159- * @param $at
160- * @param $length
161- * @return mixed|string
157+ * This is a low-level method; prefer calling WP_HTML_Decoder::decode_attribute() or
158+ * WP_HTML_Decoder::decode_text_node() instead.
159+ *
160+ * Example:
161+ *
162+ * '©' = WP_HTML_Decoder::decode( 'data', '©' );
163+ *
164+ * Use the `$at` and `$length` parameters to avoid string allocations when decoding a span
165+ * of text found within a larger document.
166+ *
167+ * Example:
168+ *
169+ * $link = WP_HTML_Decoder::decode( 'attribute', '<a href="http://wordpress.org">Click</a>', 9, 26 );
170+ * $link === 'http://wordpress.org';
171+ *
172+ * @since 6.6.0
173+ *
174+ * @param string $context `attribute` for decoding attribute values, `data` otherwise.
175+ * @param string $text Text document containing span of text to decode.
176+ * @param ?int $at Byte offset into text where span begins, defaults to the beginning.
177+ * @param ?int $length How many bytes the portion of the text spans.
178+ * The default value spans to the end of the text.
179+ * @return string Decoded string.
162180 */
163- public static function decode ( $ allow_ambiguous_ampersands , $ text , $ at = 0 , $ length = null ) {
181+ public static function decode ( $ context , $ text , $ at = 0 , $ length = null ) {
164182 $ decoded = '' ;
165183 $ end = isset ( $ length ) ? $ at + $ length : strlen ( $ text );
166184 $ was_at = $ at ;
@@ -171,7 +189,7 @@ public static function decode( $allow_ambiguous_ampersands, $text, $at = 0, $len
171189 break ;
172190 }
173191
174- $ character_reference = self ::read_character_reference ( $ text , $ next_character_reference_at , $ allow_ambiguous_ampersands , $ skip_bytes );
192+ $ character_reference = self ::read_character_reference ( $ context , $ text , $ next_character_reference_at , $ skip_bytes );
175193 if ( isset ( $ character_reference ) ) {
176194 $ at = $ next_character_reference_at ;
177195 $ decoded .= substr ( $ text , $ was_at , $ at - $ was_at );
@@ -195,7 +213,36 @@ public static function decode( $allow_ambiguous_ampersands, $text, $at = 0, $len
195213 return $ decoded ;
196214 }
197215
198- public static function read_character_reference ( $ text , $ at , $ allow_ambiguous_ampersand , &$ skip_bytes ) {
216+ /**
217+ * Attempt to read a character reference at the given location in a given string,
218+ * depending on the context in which it's found.
219+ *
220+ * If a character reference is found, this function will return the translated value
221+ * that the reference maps to. It will then set in `$skip_bytes` how many bytes of
222+ * input it read while consuming the character reference. This gives calling code the
223+ * opportunity to advance its cursor when traversing a string and decoding. It
224+ * indicates how long the character reference was.
225+ *
226+ * Example:
227+ *
228+ * null === WP_HTML_Decoder::read_character_reference( 'attribute', 'Ships…', 0 );
229+ * '…' === WP_HTML_Decoder::read_character_reference( 'attribute', 'Ships…', 5, $skip_bytes );
230+ * 8 === $skip_bytes;
231+ *
232+ * null === WP_HTML_Decoder::read_character_reference( 'attribute', '∉', 0 );
233+ * '¬' === WP_HTML_Decoder::read_character_reference( 'attribute', '∉', 0, $skip_bytes );
234+ * 4 === $skip_bytes;
235+ *
236+ * @since 6.6.0
237+ *
238+ * @param string $context `attribute` for decoding attribute values, `data` otherwise.
239+ * @param string $text Text document containing span of text to decode.
240+ * @param ?int $at Byte offset into text where span begins, defaults to the beginning.
241+ * @param ?int $skip_bytes How many bytes the decodable portion of the text spans.
242+ * The default value spans to the end of the text.
243+ * @return string|null Decoded character reference if found, otherwise `false`.
244+ */
245+ public static function read_character_reference ( $ context , $ text , $ at , &$ skip_bytes = null ) {
199246 global $ html5_named_character_entity_set ;
200247
201248 $ length = strlen ( $ text );
@@ -375,9 +422,9 @@ public static function read_character_reference( $text, $at, $allow_ambiguous_am
375422 }
376423
377424 /*
378- * At this point though have matched an entry in the named
425+ * At this point though there's a match for an entry in the named
379426 * character reference table but the match doesn't end in `;`.
380- * We need to determine if the next letter makes it an ambiguous .
427+ * It may be allowed if it's followed by something unambiguous .
381428 */
382429 $ ambiguous_follower = (
383430 $ after_name < $ length &&
@@ -394,7 +441,7 @@ public static function read_character_reference( $text, $at, $allow_ambiguous_am
394441 return $ name ;
395442 }
396443
397- if ( ! $ allow_ambiguous_ampersand ) {
444+ if ( ' attribute ' === $ context ) {
398445 return null ;
399446 }
400447
0 commit comments