Feedback response

dmsnell · dmsnell · commit 603db009430e · 2023-12-11T00:13:22.000+01:00
diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -15,9 +15,6 @@
  *  - Prune the whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c".
  *    This would increase the size of the changes for some operations but leave more
  *    natural-looking output HTML.
- *  - Decode HTML character references within class names when matching. E.g. match having
- *    class `1<"2` needs to recognize `class="1&lt;&quot;2"`. Currently the Tag Processor
- *    will fail to find the right tag if the class name is encoded as such.
  *  - Properly decode HTML character references in `get_attribute()`. PHP's
  *    `html_entity_decode()` is wrong in a couple ways: it doesn't account for the
  *    no-ambiguous-ampersand rule, and it improperly handles the way semicolons may
@@ -375,14 +372,14 @@ class WP_HTML_Tag_Processor {
 	 *
 	 * @since {WP_VERSION}
 	 *
-	 * @see WP_HTML_Tag_Processor::STATE_UNKNOWN
+	 * @see WP_HTML_Tag_Processor::STATE_READY
 	 * @see WP_HTML_Tag_Processor::STATE_COMPLETE
 	 * @see WP_HTML_Tag_Processor::STATE_INCOMPLETE
 	 * @see WP_HTML_Tag_Processor::STATE_MATCHED_TAG
 	 *
 	 * @var string
 	 */
-	private $parser_state = self::STATE_UNKNOWN;
+	private $parser_state = self::STATE_READY;
 
 	/**
 	 * How many bytes from the original HTML document have been read and parsed.
@@ -663,7 +660,7 @@ public function next_token() {
 		 * The next step in the parsing loop determines the parsing state;
 		 * clear it so that state doesn't linger from the previous step.
 		 */
-		$this->parser_state = self::STATE_UNKNOWN;
+		$this->parser_state = self::STATE_READY;
 
 		if ( $this->bytes_already_parsed >= strlen( $this->html ) ) {
 			$this->parser_state = self::STATE_COMPLETE;
@@ -721,7 +718,8 @@ public function next_token() {
 				's' === $t || 'S' === $t ||
 				't' === $t || 'T' === $t ||
 				'x' === $t || 'X' === $t
-			) ) {
+			)
+		) {
 			$tag_name = $this->get_tag();
 
 			if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) {
@@ -2647,7 +2645,7 @@ private function matches() {
 		return true;
 	}
 
-	const STATE_UNKNOWN     = 'UNKNOWN: The parser is waiting for a state transition; it may not have started, or it may have been interrupted, or it may be waiting to restart after pausing.';
+	const STATE_READY       = 'READY: The parser is waiting for a state transition; it may not have started, or it may have been interrupted, or it may be waiting to restart after pausing.';
 	const STATE_COMPLETE    = 'COMPLETE: The parser has reached the end of the document without truncating any possible tokens. There is nothing left to scan.';
 	const STATE_INCOMPLETE  = 'INCOMPLETE: The parser has reached the end of the document but it appears as thought the HTML is truncated inside a token. It has backed up to the last-known complete state and will not continue parsing.';
 	const STATE_MATCHED_TAG = 'MATCHED_TAG: The parser has found a tag and paused to allow reading from and modifying its attributes.';