Skip to content

Commit fbca73f

Browse files
committed
Implement scan all tokens handling
1 parent 1953a0f commit fbca73f

1 file changed

Lines changed: 64 additions & 61 deletions

File tree

tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php

Lines changed: 64 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,6 @@ public function data_external_html5lib_tests() {
9292
continue;
9393
}
9494

95-
// These tests contain no tags, which isn't yet
96-
// supported by the HTML API.
97-
if ( 'comments01.dat' === $entry ) {
98-
continue;
99-
}
100-
10195
foreach ( self::parse_html5_dat_testfile( $test_dir . $entry ) as $k => $test ) {
10296
// strip .dat extension from filename
10397
$test_suite = substr( $entry, 0, -4 );
@@ -118,34 +112,74 @@ public function data_external_html5lib_tests() {
118112
*/
119113
public static function build_html5_treelike_string( $fragment_context, $html ) {
120114
$p = WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" );
115+
121116
if ( null === $p ) {
122117
return null;
123118
}
124119

125120
$output = "<html>\n <head>\n <body>\n";
126-
while ( $p->next_tag() ) {
127-
$indent = '';
128121

129-
// Breadcrumbs include this tag, so skip 1 nesting level.
130-
foreach ( $p->get_breadcrumbs() as $index => $_ ) {
131-
if ( $index ) {
132-
$indent .= ' ';
133-
}
122+
// If we haven't set our bookmark, assume we're 2 levels deep:
123+
// html > body > [position]
124+
$indent_level = 2;
125+
$indent = ' ';
126+
127+
while ( $p->next_token() ) {
128+
if ( $p->get_last_error() !== null ) {
129+
return null;
134130
}
135-
$t = strtolower( $p->get_tag() );
136-
$output .= "{$indent}<{$t}>\n";
137-
138-
$attribute_names = $p->get_attribute_names_with_prefix( '' );
139-
sort( $attribute_names, SORT_STRING );
140-
141-
foreach ( $attribute_names as $attribute_name ) {
142-
$val = $p->get_attribute( $attribute_name );
143-
// Attributes with no value are `true` with the HTML API,
144-
// We map use the empty string value in the tree structure.
145-
if ( true === $val ) {
146-
$val = '';
147-
}
148-
$output .= "{$indent} {$attribute_name}=\"{$val}\"\n";
131+
132+
switch ( $p->get_token_type() ) {
133+
case '#tag':
134+
if ( $p->is_tag_closer() ) {
135+
$indent_level--;
136+
break;
137+
}
138+
139+
$indent_level = count( $p->get_breadcrumbs() );
140+
141+
$t = strtolower( $p->get_tag() );
142+
$output .= str_repeat( $indent, $indent_level - 1 ) . "<{$t}>\n";
143+
144+
$attribute_names = $p->get_attribute_names_with_prefix( '' );
145+
if ( $attribute_names ) {
146+
sort( $attribute_names, SORT_STRING );
147+
148+
foreach ( $attribute_names as $attribute_name ) {
149+
$val = $p->get_attribute( $attribute_name );
150+
// Attributes with no value are `true` with the HTML API,
151+
// We map use the empty string value in the tree structure.
152+
if ( true === $val ) {
153+
$val = '';
154+
}
155+
$output .= str_repeat( $indent, $indent_level ) . "{$attribute_name}=\"{$val}\"\n";
156+
}
157+
}
158+
break;
159+
160+
case '#text':
161+
$output .= str_repeat( $indent, $indent_level ) . "\"{$p->get_modifiable_text()}\"\n";
162+
break;
163+
164+
case '#cdata-section':
165+
break;
166+
167+
case '#processing-instruction':
168+
break;
169+
170+
case '#comment':
171+
// Comments must be "<" then "!-- " then the data then " -->".
172+
$output .= str_repeat( $indent, $indent_level ) . "<!-- {$p->get_modifiable_text()} -->\n";
173+
break;
174+
175+
case '#doctype':
176+
break;
177+
178+
case '#presumptuous-tag':
179+
break;
180+
181+
case '#funky-comment':
182+
break;
149183
}
150184
}
151185

@@ -157,7 +191,7 @@ public static function build_html5_treelike_string( $fragment_context, $html ) {
157191
return null;
158192
}
159193

160-
return $output;
194+
return $output . "\n";
161195
}
162196

163197
/**
@@ -256,40 +290,9 @@ public static function parse_html5_dat_testfile( $filename ) {
256290
*/
257291
case 'document':
258292
if ( '|' === $line[0] ) {
259-
$candidate = substr( $line, 2 );
260-
261-
// Remove leading spaces and the trailing newline
262-
$trimmed = ltrim( substr( $candidate, 0, -1 ) );
263-
264-
// Text: "…
265-
if ( '"' === $trimmed[0] ) {
266-
// Skip for now
267-
break;
268-
}
269-
270-
// Attribute: name="value"
271-
if ( '"' === $trimmed[ strlen( $trimmed ) - 1 ] ) {
272-
$test_dom .= $candidate;
273-
break;
274-
}
275-
276-
// Tags: <tag-name>
277-
// Comments: <!-- comment text -->
278-
// Doctypes: <!DOCTYPE … >
279-
// Processing instructions: <?target >
280-
if ( '<' === $trimmed[0] && '>' === $trimmed[ strlen( $trimmed ) - 1 ] ) {
281-
// Tags: <tag-name>
282-
if ( ctype_alpha( $trimmed[1] ) ) {
283-
$test_dom .= $candidate;
284-
break;
285-
}
286-
// Skip everything else for now
287-
break;
288-
}
293+
$test_dom .= substr( $line, 2 );
289294
} else {
290-
// This is a text node that includes unescaped newlines.
291-
// Everything else should be singles lines starting with "| ".
292-
// @todo Skip for now, add to $test_dom when we handle text nodes.
295+
$test_dom .= $line;
293296
}
294297
break;
295298
}

0 commit comments

Comments
 (0)