|
15 | 15 | * - Prune the whitespace when removing classes/attributes: e.g. "a b c" -> "c" not " c". |
16 | 16 | * This would increase the size of the changes for some operations but leave more |
17 | 17 | * natural-looking output HTML. |
18 | | - * - Decode HTML character references within class names when matching. E.g. match having |
19 | | - * class `1<"2` needs to recognize `class="1<"2"`. Currently the Tag Processor |
20 | | - * will fail to find the right tag if the class name is encoded as such. |
21 | 18 | * - Properly decode HTML character references in `get_attribute()`. PHP's |
22 | 19 | * `html_entity_decode()` is wrong in a couple ways: it doesn't account for the |
23 | 20 | * no-ambiguous-ampersand rule, and it improperly handles the way semicolons may |
@@ -375,14 +372,14 @@ class WP_HTML_Tag_Processor { |
375 | 372 | * |
376 | 373 | * @since {WP_VERSION} |
377 | 374 | * |
378 | | - * @see WP_HTML_Tag_Processor::STATE_UNKNOWN |
| 375 | + * @see WP_HTML_Tag_Processor::STATE_READY |
379 | 376 | * @see WP_HTML_Tag_Processor::STATE_COMPLETE |
380 | 377 | * @see WP_HTML_Tag_Processor::STATE_INCOMPLETE |
381 | 378 | * @see WP_HTML_Tag_Processor::STATE_MATCHED_TAG |
382 | 379 | * |
383 | 380 | * @var string |
384 | 381 | */ |
385 | | - private $parser_state = self::STATE_UNKNOWN; |
| 382 | + private $parser_state = self::STATE_READY; |
386 | 383 |
|
387 | 384 | /** |
388 | 385 | * How many bytes from the original HTML document have been read and parsed. |
@@ -663,7 +660,7 @@ public function next_token() { |
663 | 660 | * The next step in the parsing loop determines the parsing state; |
664 | 661 | * clear it so that state doesn't linger from the previous step. |
665 | 662 | */ |
666 | | - $this->parser_state = self::STATE_UNKNOWN; |
| 663 | + $this->parser_state = self::STATE_READY; |
667 | 664 |
|
668 | 665 | if ( $this->bytes_already_parsed >= strlen( $this->html ) ) { |
669 | 666 | $this->parser_state = self::STATE_COMPLETE; |
@@ -721,7 +718,8 @@ public function next_token() { |
721 | 718 | 's' === $t || 'S' === $t || |
722 | 719 | 't' === $t || 'T' === $t || |
723 | 720 | 'x' === $t || 'X' === $t |
724 | | - ) ) { |
| 721 | + ) |
| 722 | + ) { |
725 | 723 | $tag_name = $this->get_tag(); |
726 | 724 |
|
727 | 725 | if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) { |
@@ -2647,7 +2645,7 @@ private function matches() { |
2647 | 2645 | return true; |
2648 | 2646 | } |
2649 | 2647 |
|
2650 | | - const STATE_UNKNOWN = 'UNKNOWN: The parser is waiting for a state transition; it may not have started, or it may have been interrupted, or it may be waiting to restart after pausing.'; |
| 2648 | + const STATE_READY = 'READY: The parser is waiting for a state transition; it may not have started, or it may have been interrupted, or it may be waiting to restart after pausing.'; |
2651 | 2649 | const STATE_COMPLETE = 'COMPLETE: The parser has reached the end of the document without truncating any possible tokens. There is nothing left to scan.'; |
2652 | 2650 | const STATE_INCOMPLETE = 'INCOMPLETE: The parser has reached the end of the document but it appears as thought the HTML is truncated inside a token. It has backed up to the last-known complete state and will not continue parsing.'; |
2653 | 2651 | const STATE_MATCHED_TAG = 'MATCHED_TAG: The parser has found a tag and paused to allow reading from and modifying its attributes.'; |
|
0 commit comments