101101 *
102102 * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
103103 * - Custom elements: All custom elements are supported. :)
104- * - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH.
105- * - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
104+ * - Form elements: BUTTON, DATALIST, FIELDSET, INPUT, LABEL, LEGEND, METER, PROGRESS, SEARCH.
105+ * - Formatting elements: B, BIG, CODE, EM, FONT, I, PRE, SMALL, STRIKE, STRONG, TT, U, WBR .
106106 * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
107107 * - Links: A.
108- * - Lists: DD, DL, DT, LI, OL, LI .
109- * - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
110- * - Paragraph: P.
111- * - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
112- * - Sectioning elements: ARTICLE, ASIDE, NAV, SECTION.
108+ * - Lists: DD, DL, DT, LI, OL, UL .
109+ * - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, SOURCE, TRACK , VIDEO.
110+ * - Paragraph: BR, P.
111+ * - Phrasing elements: ABBR, AREA, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
112+ * - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION.
113113 * - Templating elements: SLOT.
114114 * - Text decoration: RUBY.
115- * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER.
115+ * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, LISTING, MULTICOL, NEXTID, PARAM , SPACER.
116116 *
117117 * ### Supported markup
118118 *
@@ -149,17 +149,6 @@ class Gutenberg_HTML_Processor_6_5 extends Gutenberg_HTML_Tag_Processor_6_5 {
149149 */
150150 const MAX_BOOKMARKS = 100 ;
151151
152- /**
153- * Static query for instructing the Tag Processor to visit every token.
154- *
155- * @access private
156- *
157- * @since 6.4.0
158- *
159- * @var array
160- */
161- const VISIT_EVERYTHING = array ( 'tag_closers ' => 'visit ' );
162-
163152 /**
164153 * Holds the working state of the parser, including the stack of
165154 * open elements and the stack of active formatting elements.
@@ -424,6 +413,30 @@ public function next_tag( $query = null ) {
424413 return false ;
425414 }
426415
416+ /**
417+ * Ensures internal accounting is maintained for HTML semantic rules while
418+ * the underlying Tag Processor class is seeking to a bookmark.
419+ *
420+ * This doesn't currently have a way to represent non-tags and doesn't process
421+ * semantic rules for text nodes. For access to the raw tokens consider using
422+ * WP_HTML_Tag_Processor instead.
423+ *
424+ * @since 6.5.0 Added for internal support; do not use.
425+ *
426+ * @access private
427+ *
428+ * @return bool
429+ */
430+ public function next_token () {
431+ $ found_a_token = parent ::next_token ();
432+
433+ if ( '#tag ' === $ this ->get_token_type () ) {
434+ $ this ->step ( self ::REPROCESS_CURRENT_NODE );
435+ }
436+
437+ return $ found_a_token ;
438+ }
439+
427440 /**
428441 * Indicates if the currently-matched tag matches the given breadcrumbs.
429442 *
@@ -520,7 +533,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
520533 $ this ->state ->stack_of_open_elements ->pop ();
521534 }
522535
523- parent ::next_tag ( self ::VISIT_EVERYTHING );
536+ while ( parent ::next_token () && '#tag ' !== $ this ->get_token_type () ) {
537+ continue ;
538+ }
524539 }
525540
526541 // Finish stepping when there are no more tokens in the document.
@@ -684,10 +699,12 @@ private function step_in_body() {
684699 case '-FOOTER ' :
685700 case '-HEADER ' :
686701 case '-HGROUP ' :
702+ case '-LISTING ' :
687703 case '-MAIN ' :
688704 case '-MENU ' :
689705 case '-NAV ' :
690706 case '-OL ' :
707+ case '-PRE ' :
691708 case '-SEARCH ' :
692709 case '-SECTION ' :
693710 case '-SUMMARY ' :
@@ -732,6 +749,18 @@ private function step_in_body() {
732749 $ this ->insert_html_element ( $ this ->state ->current_token );
733750 return true ;
734751
752+ /*
753+ * > A start tag whose tag name is one of: "pre", "listing"
754+ */
755+ case '+PRE ' :
756+ case '+LISTING ' :
757+ if ( $ this ->state ->stack_of_open_elements ->has_p_in_button_scope () ) {
758+ $ this ->close_a_p_element ();
759+ }
760+ $ this ->insert_html_element ( $ this ->state ->current_token );
761+ $ this ->state ->frameset_ok = false ;
762+ return true ;
763+
735764 /*
736765 * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
737766 */
@@ -934,11 +963,64 @@ private function step_in_body() {
934963 $ this ->run_adoption_agency_algorithm ();
935964 return true ;
936965
966+ /*
967+ * > An end tag whose tag name is "br"
968+ * > Parse error. Drop the attributes from the token, and act as described in the next
969+ * > entry; i.e. act as if this was a "br" start tag token with no attributes, rather
970+ * > than the end tag token that it actually is.
971+ */
972+ case '-BR ' :
973+ $ this ->last_error = self ::ERROR_UNSUPPORTED ;
974+ throw new WP_HTML_Unsupported_Exception ( 'Closing BR tags require unimplemented special handling. ' );
975+
937976 /*
938977 * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
939978 */
979+ case '+AREA ' :
980+ case '+BR ' :
981+ case '+EMBED ' :
940982 case '+IMG ' :
983+ case '+KEYGEN ' :
984+ case '+WBR ' :
941985 $ this ->reconstruct_active_formatting_elements ();
986+ $ this ->insert_html_element ( $ this ->state ->current_token );
987+ $ this ->state ->frameset_ok = false ;
988+ return true ;
989+
990+ /*
991+ * > A start tag whose tag name is "input"
992+ */
993+ case '+INPUT ' :
994+ $ this ->reconstruct_active_formatting_elements ();
995+ $ this ->insert_html_element ( $ this ->state ->current_token );
996+ $ type_attribute = $ this ->get_attribute ( 'type ' );
997+ /*
998+ * > If the token does not have an attribute with the name "type", or if it does,
999+ * > but that attribute's value is not an ASCII case-insensitive match for the
1000+ * > string "hidden", then: set the frameset-ok flag to "not ok".
1001+ */
1002+ if ( ! is_string ( $ type_attribute ) || 'hidden ' !== strtolower ( $ type_attribute ) ) {
1003+ $ this ->state ->frameset_ok = false ;
1004+ }
1005+ return true ;
1006+
1007+ /*
1008+ * > A start tag whose tag name is "hr"
1009+ */
1010+ case '+HR ' :
1011+ if ( $ this ->state ->stack_of_open_elements ->has_p_in_button_scope () ) {
1012+ $ this ->close_a_p_element ();
1013+ }
1014+ $ this ->insert_html_element ( $ this ->state ->current_token );
1015+ $ this ->state ->frameset_ok = false ;
1016+ return true ;
1017+
1018+ /*
1019+ * > A start tag whose tag name is one of: "param", "source", "track"
1020+ */
1021+ case '+PARAM ' :
1022+ case '+SOURCE ' :
1023+ case '+TRACK ' :
9421024 $ this ->insert_html_element ( $ this ->state ->current_token );
9431025 return true ;
9441026 }
@@ -961,30 +1043,20 @@ private function step_in_body() {
9611043 */
9621044 switch ( $ tag_name ) {
9631045 case 'APPLET ' :
964- case 'AREA ' :
9651046 case 'BASE ' :
9661047 case 'BASEFONT ' :
9671048 case 'BGSOUND ' :
9681049 case 'BODY ' :
969- case 'BR ' :
9701050 case 'CAPTION ' :
9711051 case 'COL ' :
9721052 case 'COLGROUP ' :
973- case 'DD ' :
974- case 'DT ' :
975- case 'EMBED ' :
9761053 case 'FORM ' :
9771054 case 'FRAME ' :
9781055 case 'FRAMESET ' :
9791056 case 'HEAD ' :
980- case 'HR ' :
9811057 case 'HTML ' :
9821058 case 'IFRAME ' :
983- case 'INPUT ' :
984- case 'KEYGEN ' :
985- case 'LI ' :
9861059 case 'LINK ' :
987- case 'LISTING ' :
9881060 case 'MARQUEE ' :
9891061 case 'MATH ' :
9901062 case 'META ' :
@@ -993,20 +1065,16 @@ private function step_in_body() {
9931065 case 'NOFRAMES ' :
9941066 case 'NOSCRIPT ' :
9951067 case 'OBJECT ' :
996- case 'OL ' :
9971068 case 'OPTGROUP ' :
9981069 case 'OPTION ' :
999- case 'PARAM ' :
10001070 case 'PLAINTEXT ' :
1001- case 'PRE ' :
10021071 case 'RB ' :
10031072 case 'RP ' :
10041073 case 'RT ' :
10051074 case 'RTC ' :
10061075 case 'SARCASM ' :
10071076 case 'SCRIPT ' :
10081077 case 'SELECT ' :
1009- case 'SOURCE ' :
10101078 case 'STYLE ' :
10111079 case 'SVG ' :
10121080 case 'TABLE ' :
@@ -1019,9 +1087,6 @@ private function step_in_body() {
10191087 case 'THEAD ' :
10201088 case 'TITLE ' :
10211089 case 'TR ' :
1022- case 'TRACK ' :
1023- case 'UL ' :
1024- case 'WBR ' :
10251090 case 'XMP ' :
10261091 $ this ->last_error = self ::ERROR_UNSUPPORTED ;
10271092 throw new WP_HTML_Unsupported_Exception ( "Cannot process {$ tag_name } element. " );
@@ -1675,14 +1740,19 @@ public static function is_void( $tag_name ) {
16751740 return (
16761741 'AREA ' === $ tag_name ||
16771742 'BASE ' === $ tag_name ||
1743+ 'BASEFONT ' === $ tag_name || // Obsolete but still treated as void.
1744+ 'BGSOUND ' === $ tag_name || // Obsolete but still treated as void.
16781745 'BR ' === $ tag_name ||
16791746 'COL ' === $ tag_name ||
16801747 'EMBED ' === $ tag_name ||
1748+ 'FRAME ' === $ tag_name ||
16811749 'HR ' === $ tag_name ||
16821750 'IMG ' === $ tag_name ||
16831751 'INPUT ' === $ tag_name ||
1752+ 'KEYGEN ' === $ tag_name || // Obsolete but still treated as void.
16841753 'LINK ' === $ tag_name ||
16851754 'META ' === $ tag_name ||
1755+ 'PARAM ' === $ tag_name || // Obsolete but still treated as void.
16861756 'SOURCE ' === $ tag_name ||
16871757 'TRACK ' === $ tag_name ||
16881758 'WBR ' === $ tag_name
0 commit comments