2121 * @group html-api-html5lib-tests
2222 */
2323class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
24- /**
25- * The HTML Processor only accepts HTML in document <body>.
26- * Do not run tests that look for anything in document <head>.
27- */
28- const SKIP_HEAD_TESTS = true ;
29-
3024 /**
3125 * Skip specific tests that may not be supported or have known issues.
3226 */
3327 const SKIP_TESTS = array (
34- 'adoption01/line0046 ' => 'Unimplemented: Reconstruction of active formatting elements. ' ,
35- 'adoption01/line0159 ' => 'Unimplemented: Reconstruction of active formatting elements. ' ,
36- 'adoption01/line0318 ' => 'Unimplemented: Reconstruction of active formatting elements. ' ,
37- 'template/line0885 ' => 'Unimplemented: no parsing of attributes on context node. ' ,
38- 'tests1/line0720 ' => 'Unimplemented: Reconstruction of active formatting elements. ' ,
39- 'tests15/line0001 ' => 'Unimplemented: Reconstruction of active formatting elements. ' ,
40- 'tests15/line0022 ' => 'Unimplemented: Reconstruction of active formatting elements. ' ,
41- 'tests15/line0068 ' => 'Unimplemented: no support outside of IN BODY yet. ' ,
42- 'tests2/line0650 ' => 'Whitespace only test never enters "in body" parsing mode. ' ,
43- 'tests19/line0965 ' => 'Unimplemented: no support outside of IN BODY yet. ' ,
44- 'tests23/line0001 ' => 'Unimplemented: Reconstruction of active formatting elements. ' ,
45- 'tests23/line0041 ' => 'Unimplemented: Reconstruction of active formatting elements. ' ,
46- 'tests23/line0069 ' => 'Unimplemented: Reconstruction of active formatting elements. ' ,
47- 'tests23/line0101 ' => 'Unimplemented: Reconstruction of active formatting elements. ' ,
48- 'tests26/line0263 ' => 'Bug: An active formatting element should be created for a trailing text node. ' ,
49- 'webkit01/line0231 ' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags. ' ,
50- 'webkit02/line0013 ' => "Asserting behavior with scripting flag enabled, which this parser doesn't support. " ,
51- 'webkit01/line0300 ' => 'Unimplemented: no support outside of IN BODY yet. ' ,
52- 'webkit01/line0310 ' => 'Unimplemented: no support outside of IN BODY yet. ' ,
53- 'webkit01/line0336 ' => 'Unimplemented: no support outside of IN BODY yet. ' ,
54- 'webkit01/line0349 ' => 'Unimplemented: no support outside of IN BODY yet. ' ,
55- 'webkit01/line0362 ' => 'Unimplemented: no support outside of IN BODY yet. ' ,
56- 'webkit01/line0375 ' => 'Unimplemented: no support outside of IN BODY yet. ' ,
28+ 'comments01/line0155 ' => 'Unimplemented: Need to access raw comment text on non-normative comments. ' ,
29+ 'comments01/line0169 ' => 'Unimplemented: Need to access raw comment text on non-normative comments. ' ,
30+ 'html5test-com/line0129 ' => 'Unimplemented: Need to access raw comment text on non-normative comments. ' ,
31+ 'noscript01/line0014 ' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags. ' ,
32+ 'tests1/line0692 ' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly ' ,
33+ 'tests14/line0022 ' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags. ' ,
34+ 'tests14/line0055 ' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags. ' ,
35+ 'tests19/line0965 ' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly. ' ,
36+ 'tests19/line1079 ' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags. ' ,
37+ 'tests2/line0207 ' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags. ' ,
38+ 'tests2/line0686 ' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags. ' ,
39+ 'tests2/line0709 ' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags. ' ,
40+ 'tests5/line0013 ' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly. ' ,
41+ 'tests5/line0077 ' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly. ' ,
42+ 'tests5/line0091 ' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly ' ,
43+ 'webkit01/line0231 ' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags. ' ,
5744 );
5845
5946 /**
@@ -68,14 +55,40 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
6855 * @param string $html Given test HTML.
6956 * @param string $expected_tree Tree structure of parsed HTML.
7057 */
71- public function test_parse ( $ fragment_context , $ html , $ expected_tree ) {
58+ public function test_parse ( ? string $ fragment_context , string $ html , string $ expected_tree ) {
7259 $ processed_tree = self ::build_tree_representation ( $ fragment_context , $ html );
7360
7461 if ( null === $ processed_tree ) {
7562 $ this ->markTestSkipped ( 'Test includes unsupported markup. ' );
7663 }
64+ $ fragment_detail = $ fragment_context ? " in context < {$ fragment_context }> " : '' ;
65+
66+ /*
67+ * The HTML processor does not produce html, head, body tags if the processor does not reach them.
68+ * HTML tree construction will always produce these tags, the HTML API does not at this time.
69+ */
70+ $ auto_generated_html_head_body = "<html> \n <head> \n <body> \n\n" ;
71+ $ auto_generated_head_body = " <head> \n <body> \n\n" ;
72+ $ auto_generated_body = " <body> \n\n" ;
73+ if ( str_ends_with ( $ expected_tree , $ auto_generated_html_head_body ) && ! str_ends_with ( $ processed_tree , $ auto_generated_html_head_body ) ) {
74+ if ( str_ends_with ( $ processed_tree , "<html> \n <head> \n\n" ) ) {
75+ $ processed_tree = substr_replace ( $ processed_tree , " <body> \n\n" , -1 );
76+ } elseif ( str_ends_with ( $ processed_tree , "<html> \n\n" ) ) {
77+ $ processed_tree = substr_replace ( $ processed_tree , " <head> \n <body> \n\n" , -1 );
78+ } else {
79+ $ processed_tree = substr_replace ( $ processed_tree , $ auto_generated_html_head_body , -1 );
80+ }
81+ } elseif ( str_ends_with ( $ expected_tree , $ auto_generated_head_body ) && ! str_ends_with ( $ processed_tree , $ auto_generated_head_body ) ) {
82+ if ( str_ends_with ( $ processed_tree , "<head> \n\n" ) ) {
83+ $ processed_tree = substr_replace ( $ processed_tree , " <body> \n\n" , -1 );
84+ } else {
85+ $ processed_tree = substr_replace ( $ processed_tree , $ auto_generated_head_body , -1 );
86+ }
87+ } elseif ( str_ends_with ( $ expected_tree , $ auto_generated_body ) && ! str_ends_with ( $ processed_tree , $ auto_generated_body ) ) {
88+ $ processed_tree = substr_replace ( $ processed_tree , $ auto_generated_body , -1 );
89+ }
7790
78- $ this ->assertSame ( $ expected_tree , $ processed_tree , "HTML was not processed correctly: \n{$ html }" );
91+ $ this ->assertSame ( $ expected_tree , $ processed_tree , "HTML was not processed correctly { $ fragment_detail } : \n{$ html }" );
7992 }
8093
8194 /**
@@ -100,7 +113,9 @@ public function data_external_html5lib_tests() {
100113 $ line = str_pad ( strval ( $ test [0 ] ), 4 , '0 ' , STR_PAD_LEFT );
101114 $ test_name = "{$ test_suite }/line {$ line }" ;
102115
103- if ( self ::should_skip_test ( $ test_name , $ test [3 ] ) ) {
116+ $ test_context_element = $ test [1 ];
117+
118+ if ( self ::should_skip_test ( $ test_context_element , $ test_name , $ test [3 ] ) ) {
104119 continue ;
105120 }
106121
@@ -118,15 +133,9 @@ public function data_external_html5lib_tests() {
118133 *
119134 * @return bool True if the test case should be skipped. False otherwise.
120135 */
121- private static function should_skip_test ( $ test_name , $ expected_tree ): bool {
122- if ( self ::SKIP_HEAD_TESTS ) {
123- $ html_start = "<html> \n <head> \n <body> \n" ;
124- if (
125- strlen ( $ expected_tree ) < strlen ( $ html_start ) ||
126- substr ( $ expected_tree , 0 , strlen ( $ html_start ) ) !== $ html_start
127- ) {
128- return true ;
129- }
136+ private static function should_skip_test ( ?string $ test_context_element , string $ test_name , string $ expected_tree ): bool {
137+ if ( null !== $ test_context_element && 'body ' !== $ test_context_element ) {
138+ return true ;
130139 }
131140
132141 if ( array_key_exists ( $ test_name , self ::SKIP_TESTS ) ) {
@@ -146,15 +155,18 @@ private static function should_skip_test( $test_name, $expected_tree ): bool {
146155 private static function build_tree_representation ( ?string $ fragment_context , string $ html ) {
147156 $ processor = $ fragment_context
148157 ? WP_HTML_Processor::create_fragment ( $ html , "< {$ fragment_context }> " )
149- : WP_HTML_Processor::create_fragment ( $ html );
158+ : WP_HTML_Processor::create_full_parser ( $ html );
150159 if ( null === $ processor ) {
151160 return null ;
152161 }
153162
154- $ output = "<html> \n <head> \n <body> \n" ;
155-
156- // Initially, assume we're 2 levels deep at: html > body > [position]
157- $ indent_level = 2 ;
163+ /*
164+ * The fragment parser will start in 2 levels deep at: html > body > [position]
165+ * and requires adjustment to initial parameters.
166+ * The full parser will not.
167+ */
168+ $ output = $ fragment_context ? "<html> \n <head> \n <body> \n" : '' ;
169+ $ indent_level = $ fragment_context ? 2 : 0 ;
158170 $ indent = ' ' ;
159171 $ was_text = null ;
160172 $ text_node = '' ;
@@ -238,6 +250,11 @@ private static function build_tree_representation( ?string $fragment_context, st
238250 $ text_node .= $ processor ->get_modifiable_text ();
239251 break ;
240252
253+ case '#funky-comment ' :
254+ // Comments must be "<" then "!-- " then the data then " -->".
255+ $ output .= str_repeat ( $ indent , $ indent_level ) . "<!-- {$ processor ->get_modifiable_text ()} --> \n" ;
256+ break ;
257+
241258 case '#comment ' :
242259 switch ( $ processor ->get_comment_type () ) {
243260 case WP_HTML_Processor::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT :
@@ -250,6 +267,10 @@ private static function build_tree_representation( ?string $fragment_context, st
250267 $ comment_text_content = "[CDATA[ {$ processor ->get_modifiable_text ()}]] " ;
251268 break ;
252269
270+ case WP_HTML_Processor::COMMENT_AS_PI_NODE_LOOKALIKE :
271+ $ comment_text_content = "? {$ processor ->get_tag ()}{$ processor ->get_modifiable_text ()}? " ;
272+ break ;
273+
253274 default :
254275 throw new Error ( "Unhandled comment type for tree construction: {$ processor ->get_comment_type ()}" );
255276 }
@@ -301,6 +322,7 @@ public static function parse_html5_dat_testfile( $filename ) {
301322 $ test_html = '' ;
302323 $ test_dom = '' ;
303324 $ test_context_element = null ;
325+ $ test_script_flag = false ;
304326 $ test_line_number = 0 ;
305327
306328 while ( false !== ( $ line = fgets ( $ handle ) ) ) {
@@ -309,8 +331,12 @@ public static function parse_html5_dat_testfile( $filename ) {
309331 if ( '# ' === $ line [0 ] ) {
310332 // Finish section.
311333 if ( "#data \n" === $ line ) {
312- // Yield when switching from a previous state.
313- if ( $ state ) {
334+ /*
335+ * Yield when switching from a previous state.
336+ * Do not yield tests with the scripting flag enabled. The scripting flag
337+ * is always disabled in the HTML API.
338+ */
339+ if ( $ state && ! $ test_script_flag ) {
314340 yield array (
315341 $ test_line_number ,
316342 $ test_context_element ,
@@ -325,6 +351,10 @@ public static function parse_html5_dat_testfile( $filename ) {
325351 $ test_html = '' ;
326352 $ test_dom = '' ;
327353 $ test_context_element = null ;
354+ $ test_script_flag = false ;
355+ }
356+ if ( "#script-on \n" === $ line ) {
357+ $ test_script_flag = true ;
328358 }
329359
330360 $ state = trim ( substr ( $ line , 1 ) );
@@ -376,7 +406,15 @@ public static function parse_html5_dat_testfile( $filename ) {
376406 */
377407 case 'document ' :
378408 if ( '| ' === $ line [0 ] ) {
379- $ test_dom .= substr ( $ line , 2 );
409+ /*
410+ * The next_token() method these tests rely on do not stop
411+ * at doctype nodes. Strip doctypes from output.
412+ * @todo Restore this line if and when the processor
413+ * exposes doctypes.
414+ */
415+ if ( '| <!DOCTYPE ' !== substr ( $ line , 0 , 12 ) ) {
416+ $ test_dom .= substr ( $ line , 2 );
417+ }
380418 } else {
381419 // This is a text node that includes unescaped newlines.
382420 // Everything else should be singles lines starting with "| ".
0 commit comments