Skip to content

Commit ff1d816

Browse files
committed
Implement scan all tokens handling
1 parent c0e547f commit ff1d816

1 file changed

Lines changed: 64 additions & 61 deletions

File tree

tests/phpunit/tests/html-api/wpHtmlProcessorHtml5lib.php

Lines changed: 64 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,6 @@ public function data_external_html5lib_tests() {
9494
continue;
9595
}
9696

97-
// These tests contain no tags, which isn't yet
98-
// supported by the HTML API.
99-
if ( 'comments01.dat' === $entry ) {
100-
continue;
101-
}
102-
10397
foreach ( self::parse_html5_dat_testfile( $test_dir . $entry ) as $k => $test ) {
10498
// strip .dat extension from filename
10599
$test_suite = substr( $entry, 0, -4 );
@@ -120,34 +114,74 @@ public function data_external_html5lib_tests() {
120114
*/
121115
public static function build_html5_treelike_string( $fragment_context, $html ) {
122116
$p = WP_HTML_Processor::create_fragment( $html, "<{$fragment_context}>" );
117+
123118
if ( null === $p ) {
124119
return null;
125120
}
126121

127122
$output = "<html>\n <head>\n <body>\n";
128-
while ( $p->next_tag() ) {
129-
$indent = '';
130123

131-
// Breadcrumbs include this tag, so skip 1 nesting level.
132-
foreach ( $p->get_breadcrumbs() as $index => $_ ) {
133-
if ( $index ) {
134-
$indent .= ' ';
135-
}
124+
// If we haven't set our bookmark, assume we're 2 levels deep:
125+
// html > body > [position]
126+
$indent_level = 2;
127+
$indent = ' ';
128+
129+
while ( $p->next_token() ) {
130+
if ( $p->get_last_error() !== null ) {
131+
return null;
136132
}
137-
$t = strtolower( $p->get_tag() );
138-
$output .= "{$indent}<{$t}>\n";
139-
140-
$attribute_names = $p->get_attribute_names_with_prefix( '' );
141-
sort( $attribute_names, SORT_STRING );
142-
143-
foreach ( $attribute_names as $attribute_name ) {
144-
$val = $p->get_attribute( $attribute_name );
145-
// Attributes with no value are `true` with the HTML API,
146-
// We map use the empty string value in the tree structure.
147-
if ( true === $val ) {
148-
$val = '';
149-
}
150-
$output .= "{$indent} {$attribute_name}=\"{$val}\"\n";
133+
134+
switch ( $p->get_token_type() ) {
135+
case '#tag':
136+
if ( $p->is_tag_closer() ) {
137+
$indent_level--;
138+
break;
139+
}
140+
141+
$indent_level = count( $p->get_breadcrumbs() );
142+
143+
$t = strtolower( $p->get_tag() );
144+
$output .= str_repeat( $indent, $indent_level - 1 ) . "<{$t}>\n";
145+
146+
$attribute_names = $p->get_attribute_names_with_prefix( '' );
147+
if ( $attribute_names ) {
148+
sort( $attribute_names, SORT_STRING );
149+
150+
foreach ( $attribute_names as $attribute_name ) {
151+
$val = $p->get_attribute( $attribute_name );
152+
// Attributes with no value are `true` with the HTML API,
153+
// We map use the empty string value in the tree structure.
154+
if ( true === $val ) {
155+
$val = '';
156+
}
157+
$output .= str_repeat( $indent, $indent_level ) . "{$attribute_name}=\"{$val}\"\n";
158+
}
159+
}
160+
break;
161+
162+
case '#text':
163+
$output .= str_repeat( $indent, $indent_level ) . "\"{$p->get_modifiable_text()}\"\n";
164+
break;
165+
166+
case '#cdata-section':
167+
break;
168+
169+
case '#processing-instruction':
170+
break;
171+
172+
case '#comment':
173+
// Comments must be "<" then "!-- " then the data then " -->".
174+
$output .= str_repeat( $indent, $indent_level ) . "<!-- {$p->get_modifiable_text()} -->\n";
175+
break;
176+
177+
case '#doctype':
178+
break;
179+
180+
case '#presumptuous-tag':
181+
break;
182+
183+
case '#funky-comment':
184+
break;
151185
}
152186
}
153187

@@ -159,7 +193,7 @@ public static function build_html5_treelike_string( $fragment_context, $html ) {
159193
return null;
160194
}
161195

162-
return $output;
196+
return $output . "\n";
163197
}
164198

165199
/**
@@ -258,40 +292,9 @@ public static function parse_html5_dat_testfile( $filename ) {
258292
*/
259293
case 'document':
260294
if ( '|' === $line[0] ) {
261-
$candidate = substr( $line, 2 );
262-
263-
// Remove leading spaces and the trailing newline
264-
$trimmed = ltrim( substr( $candidate, 0, -1 ) );
265-
266-
// Text: "…
267-
if ( '"' === $trimmed[0] ) {
268-
// Skip for now
269-
break;
270-
}
271-
272-
// Attribute: name="value"
273-
if ( '"' === $trimmed[ strlen( $trimmed ) - 1 ] ) {
274-
$test_dom .= $candidate;
275-
break;
276-
}
277-
278-
// Tags: <tag-name>
279-
// Comments: <!-- comment text -->
280-
// Doctypes: <!DOCTYPE … >
281-
// Processing instructions: <?target >
282-
if ( '<' === $trimmed[0] && '>' === $trimmed[ strlen( $trimmed ) - 1 ] ) {
283-
// Tags: <tag-name>
284-
if ( ctype_alpha( $trimmed[1] ) ) {
285-
$test_dom .= $candidate;
286-
break;
287-
}
288-
// Skip everything else for now
289-
break;
290-
}
295+
$test_dom .= substr( $line, 2 );
291296
} else {
292-
// This is a text node that includes unescaped newlines.
293-
// Everything else should be singles lines starting with "| ".
294-
// @todo Skip for now, add to $test_dom when we handle text nodes.
297+
$test_dom .= $line;
295298
}
296299
break;
297300
}

0 commit comments

Comments
 (0)