@@ -378,6 +378,31 @@ class WP_HTML_Tag_Processor {
378378 */
379379 private $ is_closing_tag ;
380380
381+ /**
382+ * What kind of node was parsed in the last step while scanning through the document,
383+ * or if the parser hasn't paused on a matched token, then `null`.
384+ *
385+ * Can be one of:
386+ * - WP_HTML_Tag_Processor::ELEMENT_NODE
387+ * - WP_HTML_Tag_Processor::TEXT_NODE
388+ * - WP_HTML_Tag_Processor::CDATA_SECTION_NODE
389+ * - WP_HTML_Tag_Processor::PROCESSING_INSTRUCTION_NODE
390+ * - WP_HTML_Tag_Processor::COMMENT_NODE
391+ * - WP_HTML_Tag_Processor::DOCUMENT_TYPE_NODE
392+ * - WP_HTML_Tag_Processor::WP_FUNKY_COMMENT_NODE
393+ *
394+ * @var string|null
395+ */
396+ private $ last_token_type = null ;
397+
398+ /**
399+ * In what mode the parser should resume after pausing,
400+ * or if not paused on a matched token, then `null`.
401+ *
402+ * @var string|null
403+ */
404+ private $ continuation_state = null ;
405+
381406 /**
382407 * Lazily-built index of attributes found within an HTML tag, keyed by the attribute name.
383408 *
@@ -567,7 +592,7 @@ public function next_tag( $query = null ) {
567592 return false ;
568593 }
569594 $ this ->tag_ends_at = $ tag_ends_at ;
570- $ this ->bytes_already_parsed = $ tag_ends_at ;
595+ $ this ->bytes_already_parsed = min ( strlen ( $ this -> html ) - 1 , $ tag_ends_at + 1 ) ;
571596
572597 // Finally, check if the parsed tag and its attributes match the search query.
573598 if ( $ this ->matches () ) {
@@ -2447,4 +2472,17 @@ private function matches() {
24472472
24482473 return true ;
24492474 }
2475+
2476+ // Constants that would otherwise be noisy at the top of the file.
2477+
2478+ const ELEMENT_NODE = 'NodeType.1.ELEMENT_NODE ' ;
2479+ const TEXT_NODE = 'NodeType.3.TEXT_NODE ' ;
2480+ const CDATA_SECTION_NODE = 'NodeType.4.CDATA_SECTION_NODE ' ;
2481+ const PROCESSING_INSTRUCTION_NODE = 'NodeType.7.PROCESSING_INSTRUCTION_NODE ' ;
2482+ const COMMENT_NODE = 'NodeType.8.COMMENT_NODE ' ;
2483+ const DOCUMENT_TYPE_NODE = 'NodeType.10.DOCUMENT_TYPE_NODE ' ;
2484+ const WP_FUNKY_COMMENT_NODE = 'NodeType.0_1.WP_FUNKY_COMMENT_NODE ' ;
2485+
2486+ const STATE_COMPLETE = 'The parser has finished scanning through the document. ' ;
2487+ const STATE_IN_TAG = 'The parser has found a valid tag name and needs to continue parsing attributes. ' ;
24502488}
0 commit comments