Skip to content

Commit 9928cd6

Browse files
committed
HTML API: Use full parser in html5lib tests.
Previously the `html5lib` tests have only run in the fragment parser mode, assuming IN BODY context. This limited the number of tests which could run and was a result of the HTML Processor only supporting the IN BODY fragment parser. In [58836], however, a full parser was added to the HTML Processor. In this patch the full parser is utilized in order to run more of the previously-skipped tests, asserting more behaviors in the HTML parsing. Developed in #7117 Discussed in https://core.trac.wordpress.org/ticket/61646 Props: dmsnell, jonsurrell. See #61646. git-svn-id: https://develop.svn.wordpress.org/trunk@58859 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 534b830 commit 9928cd6

1 file changed

Lines changed: 87 additions & 49 deletions

File tree

tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php

Lines changed: 87 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -21,39 +21,26 @@
2121
* @group html-api-html5lib-tests
2222
*/
2323
class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
24-
/**
25-
* The HTML Processor only accepts HTML in document <body>.
26-
* Do not run tests that look for anything in document <head>.
27-
*/
28-
const SKIP_HEAD_TESTS = true;
29-
3024
/**
3125
* Skip specific tests that may not be supported or have known issues.
3226
*/
3327
const SKIP_TESTS = array(
34-
'adoption01/line0046' => 'Unimplemented: Reconstruction of active formatting elements.',
35-
'adoption01/line0159' => 'Unimplemented: Reconstruction of active formatting elements.',
36-
'adoption01/line0318' => 'Unimplemented: Reconstruction of active formatting elements.',
37-
'template/line0885' => 'Unimplemented: no parsing of attributes on context node.',
38-
'tests1/line0720' => 'Unimplemented: Reconstruction of active formatting elements.',
39-
'tests15/line0001' => 'Unimplemented: Reconstruction of active formatting elements.',
40-
'tests15/line0022' => 'Unimplemented: Reconstruction of active formatting elements.',
41-
'tests15/line0068' => 'Unimplemented: no support outside of IN BODY yet.',
42-
'tests2/line0650' => 'Whitespace only test never enters "in body" parsing mode.',
43-
'tests19/line0965' => 'Unimplemented: no support outside of IN BODY yet.',
44-
'tests23/line0001' => 'Unimplemented: Reconstruction of active formatting elements.',
45-
'tests23/line0041' => 'Unimplemented: Reconstruction of active formatting elements.',
46-
'tests23/line0069' => 'Unimplemented: Reconstruction of active formatting elements.',
47-
'tests23/line0101' => 'Unimplemented: Reconstruction of active formatting elements.',
48-
'tests26/line0263' => 'Bug: An active formatting element should be created for a trailing text node.',
49-
'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
50-
'webkit02/line0013' => "Asserting behavior with scripting flag enabled, which this parser doesn't support.",
51-
'webkit01/line0300' => 'Unimplemented: no support outside of IN BODY yet.',
52-
'webkit01/line0310' => 'Unimplemented: no support outside of IN BODY yet.',
53-
'webkit01/line0336' => 'Unimplemented: no support outside of IN BODY yet.',
54-
'webkit01/line0349' => 'Unimplemented: no support outside of IN BODY yet.',
55-
'webkit01/line0362' => 'Unimplemented: no support outside of IN BODY yet.',
56-
'webkit01/line0375' => 'Unimplemented: no support outside of IN BODY yet.',
28+
'comments01/line0155' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
29+
'comments01/line0169' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
30+
'html5test-com/line0129' => 'Unimplemented: Need to access raw comment text on non-normative comments.',
31+
'noscript01/line0014' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
32+
'tests1/line0692' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly',
33+
'tests14/line0022' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
34+
'tests14/line0055' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
35+
'tests19/line0965' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
36+
'tests19/line1079' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
37+
'tests2/line0207' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
38+
'tests2/line0686' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
39+
'tests2/line0709' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
40+
'tests5/line0013' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
41+
'tests5/line0077' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly.',
42+
'tests5/line0091' => 'Bug: Mixed whitespace, non-whitespace text in head not split correctly',
43+
'webkit01/line0231' => 'Unimplemented: This parser does not add missing attributes to existing HTML or BODY tags.',
5744
);
5845

5946
/**
@@ -68,14 +55,40 @@ class Tests_HtmlApi_Html5lib extends WP_UnitTestCase {
6855
* @param string $html Given test HTML.
6956
* @param string $expected_tree Tree structure of parsed HTML.
7057
*/
71-
public function test_parse( $fragment_context, $html, $expected_tree ) {
58+
public function test_parse( ?string $fragment_context, string $html, string $expected_tree ) {
7259
$processed_tree = self::build_tree_representation( $fragment_context, $html );
7360

7461
if ( null === $processed_tree ) {
7562
$this->markTestSkipped( 'Test includes unsupported markup.' );
7663
}
64+
$fragment_detail = $fragment_context ? " in context <{$fragment_context}>" : '';
65+
66+
/*
67+
* The HTML processor does not produce html, head, body tags if the processor does not reach them.
68+
* HTML tree construction will always produce these tags, the HTML API does not at this time.
69+
*/
70+
$auto_generated_html_head_body = "<html>\n <head>\n <body>\n\n";
71+
$auto_generated_head_body = " <head>\n <body>\n\n";
72+
$auto_generated_body = " <body>\n\n";
73+
if ( str_ends_with( $expected_tree, $auto_generated_html_head_body ) && ! str_ends_with( $processed_tree, $auto_generated_html_head_body ) ) {
74+
if ( str_ends_with( $processed_tree, "<html>\n <head>\n\n" ) ) {
75+
$processed_tree = substr_replace( $processed_tree, " <body>\n\n", -1 );
76+
} elseif ( str_ends_with( $processed_tree, "<html>\n\n" ) ) {
77+
$processed_tree = substr_replace( $processed_tree, " <head>\n <body>\n\n", -1 );
78+
} else {
79+
$processed_tree = substr_replace( $processed_tree, $auto_generated_html_head_body, -1 );
80+
}
81+
} elseif ( str_ends_with( $expected_tree, $auto_generated_head_body ) && ! str_ends_with( $processed_tree, $auto_generated_head_body ) ) {
82+
if ( str_ends_with( $processed_tree, "<head>\n\n" ) ) {
83+
$processed_tree = substr_replace( $processed_tree, " <body>\n\n", -1 );
84+
} else {
85+
$processed_tree = substr_replace( $processed_tree, $auto_generated_head_body, -1 );
86+
}
87+
} elseif ( str_ends_with( $expected_tree, $auto_generated_body ) && ! str_ends_with( $processed_tree, $auto_generated_body ) ) {
88+
$processed_tree = substr_replace( $processed_tree, $auto_generated_body, -1 );
89+
}
7790

78-
$this->assertSame( $expected_tree, $processed_tree, "HTML was not processed correctly:\n{$html}" );
91+
$this->assertSame( $expected_tree, $processed_tree, "HTML was not processed correctly{$fragment_detail}:\n{$html}" );
7992
}
8093

8194
/**
@@ -100,7 +113,9 @@ public function data_external_html5lib_tests() {
100113
$line = str_pad( strval( $test[0] ), 4, '0', STR_PAD_LEFT );
101114
$test_name = "{$test_suite}/line{$line}";
102115

103-
if ( self::should_skip_test( $test_name, $test[3] ) ) {
116+
$test_context_element = $test[1];
117+
118+
if ( self::should_skip_test( $test_context_element, $test_name, $test[3] ) ) {
104119
continue;
105120
}
106121

@@ -118,15 +133,9 @@ public function data_external_html5lib_tests() {
118133
*
119134
* @return bool True if the test case should be skipped. False otherwise.
120135
*/
121-
private static function should_skip_test( $test_name, $expected_tree ): bool {
122-
if ( self::SKIP_HEAD_TESTS ) {
123-
$html_start = "<html>\n <head>\n <body>\n";
124-
if (
125-
strlen( $expected_tree ) < strlen( $html_start ) ||
126-
substr( $expected_tree, 0, strlen( $html_start ) ) !== $html_start
127-
) {
128-
return true;
129-
}
136+
private static function should_skip_test( ?string $test_context_element, string $test_name, string $expected_tree ): bool {
137+
if ( null !== $test_context_element && 'body' !== $test_context_element ) {
138+
return true;
130139
}
131140

132141
if ( array_key_exists( $test_name, self::SKIP_TESTS ) ) {
@@ -146,15 +155,18 @@ private static function should_skip_test( $test_name, $expected_tree ): bool {
146155
private static function build_tree_representation( ?string $fragment_context, string $html ) {
147156
$processor = $fragment_context
148157
? WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" )
149-
: WP_HTML_Processor::create_fragment( $html );
158+
: WP_HTML_Processor::create_full_parser( $html );
150159
if ( null === $processor ) {
151160
return null;
152161
}
153162

154-
$output = "<html>\n <head>\n <body>\n";
155-
156-
// Initially, assume we're 2 levels deep at: html > body > [position]
157-
$indent_level = 2;
163+
/*
164+
* The fragment parser will start in 2 levels deep at: html > body > [position]
165+
* and requires adjustment to initial parameters.
166+
* The full parser will not.
167+
*/
168+
$output = $fragment_context ? "<html>\n <head>\n <body>\n" : '';
169+
$indent_level = $fragment_context ? 2 : 0;
158170
$indent = ' ';
159171
$was_text = null;
160172
$text_node = '';
@@ -238,6 +250,11 @@ private static function build_tree_representation( ?string $fragment_context, st
238250
$text_node .= $processor->get_modifiable_text();
239251
break;
240252

253+
case '#funky-comment':
254+
// Comments must be "<" then "!-- " then the data then " -->".
255+
$output .= str_repeat( $indent, $indent_level ) . "<!-- {$processor->get_modifiable_text()} -->\n";
256+
break;
257+
241258
case '#comment':
242259
switch ( $processor->get_comment_type() ) {
243260
case WP_HTML_Processor::COMMENT_AS_ABRUPTLY_CLOSED_COMMENT:
@@ -250,6 +267,10 @@ private static function build_tree_representation( ?string $fragment_context, st
250267
$comment_text_content = "[CDATA[{$processor->get_modifiable_text()}]]";
251268
break;
252269

270+
case WP_HTML_Processor::COMMENT_AS_PI_NODE_LOOKALIKE:
271+
$comment_text_content = "?{$processor->get_tag()}{$processor->get_modifiable_text()}?";
272+
break;
273+
253274
default:
254275
throw new Error( "Unhandled comment type for tree construction: {$processor->get_comment_type()}" );
255276
}
@@ -301,6 +322,7 @@ public static function parse_html5_dat_testfile( $filename ) {
301322
$test_html = '';
302323
$test_dom = '';
303324
$test_context_element = null;
325+
$test_script_flag = false;
304326
$test_line_number = 0;
305327

306328
while ( false !== ( $line = fgets( $handle ) ) ) {
@@ -309,8 +331,12 @@ public static function parse_html5_dat_testfile( $filename ) {
309331
if ( '#' === $line[0] ) {
310332
// Finish section.
311333
if ( "#data\n" === $line ) {
312-
// Yield when switching from a previous state.
313-
if ( $state ) {
334+
/*
335+
* Yield when switching from a previous state.
336+
* Do not yield tests with the scripting flag enabled. The scripting flag
337+
* is always disabled in the HTML API.
338+
*/
339+
if ( $state && ! $test_script_flag ) {
314340
yield array(
315341
$test_line_number,
316342
$test_context_element,
@@ -325,6 +351,10 @@ public static function parse_html5_dat_testfile( $filename ) {
325351
$test_html = '';
326352
$test_dom = '';
327353
$test_context_element = null;
354+
$test_script_flag = false;
355+
}
356+
if ( "#script-on\n" === $line ) {
357+
$test_script_flag = true;
328358
}
329359

330360
$state = trim( substr( $line, 1 ) );
@@ -376,7 +406,15 @@ public static function parse_html5_dat_testfile( $filename ) {
376406
*/
377407
case 'document':
378408
if ( '|' === $line[0] ) {
379-
$test_dom .= substr( $line, 2 );
409+
/*
410+
* The next_token() method these tests rely on do not stop
411+
* at doctype nodes. Strip doctypes from output.
412+
* @todo Restore this line if and when the processor
413+
* exposes doctypes.
414+
*/
415+
if ( '| <!DOCTYPE ' !== substr( $line, 0, 12 ) ) {
416+
$test_dom .= substr( $line, 2 );
417+
}
380418
} else {
381419
// This is a text node that includes unescaped newlines.
382420
// Everything else should be singles lines starting with "| ".

0 commit comments

Comments
 (0)