@@ -94,12 +94,6 @@ public function data_external_html5lib_tests() {
9494 continue ;
9595 }
9696
97- // These tests contain no tags, which isn't yet
98- // supported by the HTML API.
99- if ( 'comments01.dat ' === $ entry ) {
100- continue ;
101- }
102-
10397 foreach ( self ::parse_html5_dat_testfile ( $ test_dir . $ entry ) as $ k => $ test ) {
10498 // strip .dat extension from filename
10599 $ test_suite = substr ( $ entry , 0 , -4 );
@@ -120,34 +114,74 @@ public function data_external_html5lib_tests() {
120114 */
121115 public static function build_html5_treelike_string ( $ fragment_context , $ html ) {
122116 $ p = WP_HTML_Processor::create_fragment ( $ html , "< {$ fragment_context }> " );
117+
123118 if ( null === $ p ) {
124119 return null ;
125120 }
126121
127122 $ output = "<html> \n <head> \n <body> \n" ;
128- while ( $ p ->next_tag () ) {
129- $ indent = '' ;
130123
131- // Breadcrumbs include this tag, so skip 1 nesting level.
132- foreach ( $ p ->get_breadcrumbs () as $ index => $ _ ) {
133- if ( $ index ) {
134- $ indent .= ' ' ;
135- }
124+ // If we haven't set our bookmark, assume we're 2 levels deep:
125+ // html > body > [position]
126+ $ indent_level = 2 ;
127+ $ indent = ' ' ;
128+
129+ while ( $ p ->next_token () ) {
130+ if ( $ p ->get_last_error () !== null ) {
131+ return null ;
136132 }
137- $ t = strtolower ( $ p ->get_tag () );
138- $ output .= "{$ indent }< {$ t }> \n" ;
139-
140- $ attribute_names = $ p ->get_attribute_names_with_prefix ( '' );
141- sort ( $ attribute_names , SORT_STRING );
142-
143- foreach ( $ attribute_names as $ attribute_name ) {
144- $ val = $ p ->get_attribute ( $ attribute_name );
145- // Attributes with no value are `true` with the HTML API,
146- // We map use the empty string value in the tree structure.
147- if ( true === $ val ) {
148- $ val = '' ;
149- }
150- $ output .= "{$ indent } {$ attribute_name }= \"{$ val }\"\n" ;
133+
134+ switch ( $ p ->get_token_type () ) {
135+ case '#tag ' :
136+ if ( $ p ->is_tag_closer () ) {
137+ $ indent_level --;
138+ break ;
139+ }
140+
141+ $ indent_level = count ( $ p ->get_breadcrumbs () );
142+
143+ $ t = strtolower ( $ p ->get_tag () );
144+ $ output .= str_repeat ( $ indent , $ indent_level - 1 ) . "< {$ t }> \n" ;
145+
146+ $ attribute_names = $ p ->get_attribute_names_with_prefix ( '' );
147+ if ( $ attribute_names ) {
148+ sort ( $ attribute_names , SORT_STRING );
149+
150+ foreach ( $ attribute_names as $ attribute_name ) {
151+ $ val = $ p ->get_attribute ( $ attribute_name );
152+ // Attributes with no value are `true` with the HTML API,
153+ // We map use the empty string value in the tree structure.
154+ if ( true === $ val ) {
155+ $ val = '' ;
156+ }
157+ $ output .= str_repeat ( $ indent , $ indent_level ) . "{$ attribute_name }= \"{$ val }\"\n" ;
158+ }
159+ }
160+ break ;
161+
162+ case '#text ' :
163+ $ output .= str_repeat ( $ indent , $ indent_level ) . "\"{$ p ->get_modifiable_text ()}\"\n" ;
164+ break ;
165+
166+ case '#cdata-section ' :
167+ break ;
168+
169+ case '#processing-instruction ' :
170+ break ;
171+
172+ case '#comment ' :
173+ // Comments must be "<" then "!-- " then the data then " -->".
174+ $ output .= str_repeat ( $ indent , $ indent_level ) . "<!-- {$ p ->get_modifiable_text ()} --> \n" ;
175+ break ;
176+
177+ case '#doctype ' :
178+ break ;
179+
180+ case '#presumptuous-tag ' :
181+ break ;
182+
183+ case '#funky-comment ' :
184+ break ;
151185 }
152186 }
153187
@@ -159,7 +193,7 @@ public static function build_html5_treelike_string( $fragment_context, $html ) {
159193 return null ;
160194 }
161195
162- return $ output ;
196+ return $ output . "\n" ;
163197 }
164198
165199 /**
@@ -258,40 +292,9 @@ public static function parse_html5_dat_testfile( $filename ) {
258292 */
259293 case 'document ' :
260294 if ( '| ' === $ line [0 ] ) {
261- $ candidate = substr ( $ line , 2 );
262-
263- // Remove leading spaces and the trailing newline
264- $ trimmed = ltrim ( substr ( $ candidate , 0 , -1 ) );
265-
266- // Text: "…
267- if ( '" ' === $ trimmed [0 ] ) {
268- // Skip for now
269- break ;
270- }
271-
272- // Attribute: name="value"
273- if ( '" ' === $ trimmed [ strlen ( $ trimmed ) - 1 ] ) {
274- $ test_dom .= $ candidate ;
275- break ;
276- }
277-
278- // Tags: <tag-name>
279- // Comments: <!-- comment text -->
280- // Doctypes: <!DOCTYPE … >
281- // Processing instructions: <?target >
282- if ( '< ' === $ trimmed [0 ] && '> ' === $ trimmed [ strlen ( $ trimmed ) - 1 ] ) {
283- // Tags: <tag-name>
284- if ( ctype_alpha ( $ trimmed [1 ] ) ) {
285- $ test_dom .= $ candidate ;
286- break ;
287- }
288- // Skip everything else for now
289- break ;
290- }
295+ $ test_dom .= substr ( $ line , 2 );
291296 } else {
292- // This is a text node that includes unescaped newlines.
293- // Everything else should be singles lines starting with "| ".
294- // @todo Skip for now, add to $test_dom when we handle text nodes.
297+ $ test_dom .= $ line ;
295298 }
296299 break ;
297300 }
0 commit comments