@@ -92,12 +92,6 @@ public function data_external_html5lib_tests() {
9292 continue ;
9393 }
9494
95- // These tests contain no tags, which isn't yet
96- // supported by the HTML API.
97- if ( 'comments01.dat ' === $ entry ) {
98- continue ;
99- }
100-
10195 foreach ( self ::parse_html5_dat_testfile ( $ test_dir . $ entry ) as $ k => $ test ) {
10296 // strip .dat extension from filename
10397 $ test_suite = substr ( $ entry , 0 , -4 );
@@ -118,34 +112,74 @@ public function data_external_html5lib_tests() {
118112 */
119113 public static function build_html5_treelike_string ( $ fragment_context , $ html ) {
120114 $ p = WP_HTML_Processor::create_fragment ( $ html , "< {$ fragment_context }> " );
115+
121116 if ( null === $ p ) {
122117 return null ;
123118 }
124119
125120 $ output = "<html> \n <head> \n <body> \n" ;
126- while ( $ p ->next_tag () ) {
127- $ indent = '' ;
128121
129- // Breadcrumbs include this tag, so skip 1 nesting level.
130- foreach ( $ p ->get_breadcrumbs () as $ index => $ _ ) {
131- if ( $ index ) {
132- $ indent .= ' ' ;
133- }
122+ // If we haven't set our bookmark, assume we're 2 levels deep:
123+ // html > body > [position]
124+ $ indent_level = 2 ;
125+ $ indent = ' ' ;
126+
127+ while ( $ p ->next_token () ) {
128+ if ( $ p ->get_last_error () !== null ) {
129+ return null ;
134130 }
135- $ t = strtolower ( $ p ->get_tag () );
136- $ output .= "{$ indent }< {$ t }> \n" ;
137-
138- $ attribute_names = $ p ->get_attribute_names_with_prefix ( '' );
139- sort ( $ attribute_names , SORT_STRING );
140-
141- foreach ( $ attribute_names as $ attribute_name ) {
142- $ val = $ p ->get_attribute ( $ attribute_name );
143- // Attributes with no value are `true` with the HTML API,
144- // We map use the empty string value in the tree structure.
145- if ( true === $ val ) {
146- $ val = '' ;
147- }
148- $ output .= "{$ indent } {$ attribute_name }= \"{$ val }\"\n" ;
131+
132+ switch ( $ p ->get_token_type () ) {
133+ case '#tag ' :
134+ if ( $ p ->is_tag_closer () ) {
135+ $ indent_level --;
136+ break ;
137+ }
138+
139+ $ indent_level = count ( $ p ->get_breadcrumbs () );
140+
141+ $ t = strtolower ( $ p ->get_tag () );
142+ $ output .= str_repeat ( $ indent , $ indent_level - 1 ) . "< {$ t }> \n" ;
143+
144+ $ attribute_names = $ p ->get_attribute_names_with_prefix ( '' );
145+ if ( $ attribute_names ) {
146+ sort ( $ attribute_names , SORT_STRING );
147+
148+ foreach ( $ attribute_names as $ attribute_name ) {
149+ $ val = $ p ->get_attribute ( $ attribute_name );
150+ // Attributes with no value are `true` with the HTML API,
151+ // We map use the empty string value in the tree structure.
152+ if ( true === $ val ) {
153+ $ val = '' ;
154+ }
155+ $ output .= str_repeat ( $ indent , $ indent_level ) . "{$ attribute_name }= \"{$ val }\"\n" ;
156+ }
157+ }
158+ break ;
159+
160+ case '#text ' :
161+ $ output .= str_repeat ( $ indent , $ indent_level ) . "\"{$ p ->get_modifiable_text ()}\"\n" ;
162+ break ;
163+
164+ case '#cdata-section ' :
165+ break ;
166+
167+ case '#processing-instruction ' :
168+ break ;
169+
170+ case '#comment ' :
171+ // Comments must be "<" then "!-- " then the data then " -->".
172+ $ output .= str_repeat ( $ indent , $ indent_level ) . "<!-- {$ p ->get_modifiable_text ()} --> \n" ;
173+ break ;
174+
175+ case '#doctype ' :
176+ break ;
177+
178+ case '#presumptuous-tag ' :
179+ break ;
180+
181+ case '#funky-comment ' :
182+ break ;
149183 }
150184 }
151185
@@ -157,7 +191,7 @@ public static function build_html5_treelike_string( $fragment_context, $html ) {
157191 return null ;
158192 }
159193
160- return $ output ;
194+ return $ output . "\n" ;
161195 }
162196
163197 /**
@@ -256,40 +290,9 @@ public static function parse_html5_dat_testfile( $filename ) {
256290 */
257291 case 'document ' :
258292 if ( '| ' === $ line [0 ] ) {
259- $ candidate = substr ( $ line , 2 );
260-
261- // Remove leading spaces and the trailing newline
262- $ trimmed = ltrim ( substr ( $ candidate , 0 , -1 ) );
263-
264- // Text: "…
265- if ( '" ' === $ trimmed [0 ] ) {
266- // Skip for now
267- break ;
268- }
269-
270- // Attribute: name="value"
271- if ( '" ' === $ trimmed [ strlen ( $ trimmed ) - 1 ] ) {
272- $ test_dom .= $ candidate ;
273- break ;
274- }
275-
276- // Tags: <tag-name>
277- // Comments: <!-- comment text -->
278- // Doctypes: <!DOCTYPE … >
279- // Processing instructions: <?target >
280- if ( '< ' === $ trimmed [0 ] && '> ' === $ trimmed [ strlen ( $ trimmed ) - 1 ] ) {
281- // Tags: <tag-name>
282- if ( ctype_alpha ( $ trimmed [1 ] ) ) {
283- $ test_dom .= $ candidate ;
284- break ;
285- }
286- // Skip everything else for now
287- break ;
288- }
293+ $ test_dom .= substr ( $ line , 2 );
289294 } else {
290- // This is a text node that includes unescaped newlines.
291- // Everything else should be singles lines starting with "| ".
292- // @todo Skip for now, add to $test_dom when we handle text nodes.
295+ $ test_dom .= $ line ;
293296 }
294297 break ;
295298 }
0 commit comments