@@ -927,9 +927,11 @@ public function next_token() {
927927 * the closing to tag to point to the opening of the special atomic
928928 * tag sequence.
929929 */
930- $ tag_name_starts_at = $ this ->tag_name_starts_at ;
931- $ tag_name_length = $ this ->tag_name_length ;
932- $ tag_ends_at = $ this ->token_starts_at + $ this ->token_length ;
930+ $ tag_name_starts_at = $ this ->tag_name_starts_at ;
931+ $ tag_name_length = $ this ->tag_name_length ;
932+ $ tag_ends_at = $ this ->token_starts_at + $ this ->token_length ;
933+ $ attributes = $ this ->attributes ;
934+ $ duplicate_attributes = $ this ->duplicate_attributes ;
933935
934936 // Find the closing tag.
935937 $ found_closer = false ;
@@ -965,12 +967,14 @@ public function next_token() {
965967 * functions that skip the contents have moved all the internal cursors past
966968 * the inner content of the tag.
967969 */
968- $ this ->token_starts_at = $ was_at ;
969- $ this ->token_length = $ this ->bytes_already_parsed - $ this ->token_starts_at ;
970- $ this ->text_starts_at = $ tag_ends_at + 1 ;
971- $ this ->text_length = $ this ->tag_name_starts_at - $ this ->text_starts_at ;
972- $ this ->tag_name_starts_at = $ tag_name_starts_at ;
973- $ this ->tag_name_length = $ tag_name_length ;
970+ $ this ->token_starts_at = $ was_at ;
971+ $ this ->token_length = $ this ->bytes_already_parsed - $ this ->token_starts_at ;
972+ $ this ->text_starts_at = $ tag_ends_at + 1 ;
973+ $ this ->text_length = $ this ->tag_name_starts_at - $ this ->text_starts_at ;
974+ $ this ->tag_name_starts_at = $ tag_name_starts_at ;
975+ $ this ->tag_name_length = $ tag_name_length ;
976+ $ this ->attributes = $ attributes ;
977+ $ this ->duplicate_attributes = $ duplicate_attributes ;
974978
975979 return true ;
976980 }
@@ -1685,12 +1689,19 @@ private function parse_next_tag() {
16851689 * to the bogus comment state - skip to the nearest >. If no closer is
16861690 * found then the HTML was truncated inside the markup declaration.
16871691 */
1688- $ at = strpos ( $ html , '> ' , $ at + 1 );
1689- if ( false === $ at ) {
1692+ $ closer_at = strpos ( $ html , '> ' , $ at + 1 );
1693+ if ( false === $ closer_at ) {
16901694 $ this ->parser_state = self ::STATE_INCOMPLETE ;
16911695
16921696 return false ;
16931697 }
1698+
1699+ $ this ->parser_state = self ::STATE_COMMENT ;
1700+ $ this ->token_length = $ closer_at + 1 - $ this ->token_starts_at ;
1701+ $ this ->text_starts_at = $ this ->token_starts_at + 2 ;
1702+ $ this ->text_length = $ closer_at - $ this ->text_starts_at ;
1703+ $ this ->bytes_already_parsed = $ closer_at + 1 ;
1704+ return true ;
16941705 }
16951706
16961707 /*
@@ -1712,6 +1723,9 @@ private function parse_next_tag() {
17121723 /*
17131724 * <? transitions to a bogus comment state – skip to the nearest >
17141725 * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
1726+ *
1727+ * Although this becomes a DOM comment, the Tag Processor is going to treat
1728+ * it as a processing instruction to be able to treat it as the raw syntax.
17151729 */
17161730 if ( '? ' === $ html [ $ at + 1 ] ) {
17171731 $ closer_at = strpos ( $ html , '> ' , $ at + 2 );
@@ -1721,7 +1735,7 @@ private function parse_next_tag() {
17211735 return false ;
17221736 }
17231737
1724- $ this ->parser_state = self ::STATE_DOCTYPE ;
1738+ $ this ->parser_state = self ::STATE_PI_NODE ;
17251739 $ this ->token_length = $ closer_at + 1 - $ this ->token_starts_at ;
17261740 $ this ->text_starts_at = $ this ->token_starts_at + 2 ;
17271741 $ this ->text_length = $ closer_at - $ this ->text_starts_at ;
0 commit comments