Skip to content

Commit 4516dcc

Browse files
committed
WIP: Move actual token serialization into a single-unit method.
1 parent 7d10ed6 commit 4516dcc

1 file changed

Lines changed: 92 additions & 80 deletions

File tree

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 92 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,110 +1122,122 @@ public function serialize(): ?string {
11221122

11231123
$html = '';
11241124
while ( $this->next_token() ) {
1125-
$token_type = $this->get_token_type();
1125+
$html .= $this->serialize_token();
1126+
}
11261127

1127-
switch ( $token_type ) {
1128-
case '#text':
1129-
$html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
1130-
break;
1128+
if ( null !== $this->get_last_error() ) {
1129+
wp_trigger_error(
1130+
__METHOD__,
1131+
"Cannot serialize HTML Processor with parsing error: {$this->get_last_error()}.",
1132+
E_USER_WARNING
1133+
);
1134+
return null;
1135+
}
11311136

1132-
// Unlike the `<>` which is interpreted as plaintext, this is ignored entirely.
1133-
case '#presumptuous-tag':
1134-
break;
1137+
return $html;
1138+
}
11351139

1136-
case '#funky-comment':
1137-
$html .= "<!--{$this->get_modifiable_text()}-->";
1138-
break;
1140+
/**
1141+
* Serializes a token.
1142+
*
1143+
* @return string Serialization of token, or empty string if no serialization exists.
1144+
*/
1145+
protected function serialize_token(): string {
1146+
$html = '';
1147+
$token_type = $this->get_token_type();
11391148

1140-
case '#comment':
1141-
switch ( $this->get_comment_type() ) {
1142-
case WP_HTML_Tag_Processor::COMMENT_AS_CDATA_LOOKALIKE:
1143-
$html .= "<!--[CDATA[{$this->get_modifiable_text()}]]-->";
1144-
break;
1149+
switch ( $token_type ) {
1150+
case '#text':
1151+
$html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
1152+
break;
11451153

1146-
case WP_HTML_Tag_Processor::COMMENT_AS_PI_NODE_LOOKALIKE:
1147-
$html .= "<!--?{$this->get_tag()}{$this->get_modifiable_text()}?-->";
1148-
break;
1154+
// Unlike the `<>` which is interpreted as plaintext, this is ignored entirely.
1155+
case '#presumptuous-tag':
1156+
break;
11491157

1150-
default:
1151-
$html .= "<!--{$this->get_modifiable_text()}-->";
1152-
}
1153-
break;
1158+
case '#funky-comment':
1159+
$html .= "<!--{$this->get_modifiable_text()}-->";
1160+
break;
11541161

1155-
case '#cdata-section':
1156-
$html .= "<![CDATA[{$this->get_modifiable_text()}]]>";
1157-
break;
1162+
case '#comment':
1163+
switch ( $this->get_comment_type() ) {
1164+
case WP_HTML_Tag_Processor::COMMENT_AS_CDATA_LOOKALIKE:
1165+
$html .= "<!--[CDATA[{$this->get_modifiable_text()}]]-->";
1166+
break;
11581167

1159-
case 'html':
1160-
$html .= '<!DOCTYPE html>';
1161-
break;
1162-
}
1168+
case WP_HTML_Tag_Processor::COMMENT_AS_PI_NODE_LOOKALIKE:
1169+
$html .= "<!--?{$this->get_tag()}{$this->get_modifiable_text()}?-->";
1170+
break;
11631171

1164-
if ( '#tag' !== $token_type ) {
1165-
continue;
1166-
}
1172+
default:
1173+
$html .= "<!--{$this->get_modifiable_text()}-->";
1174+
}
1175+
break;
11671176

1168-
$tag_name = $this->get_tag();
1169-
$in_html = 'html' === $this->get_namespace();
1170-
$qualified_name = $in_html ? strtolower( $tag_name ) : $this->get_qualified_tag_name();
1177+
case '#cdata-section':
1178+
$html .= "<![CDATA[{$this->get_modifiable_text()}]]>";
1179+
break;
11711180

1172-
if ( $this->is_tag_closer() ) {
1173-
$html .= "</{$qualified_name}>";
1174-
continue;
1175-
}
1181+
case 'html':
1182+
$html .= '<!DOCTYPE html>';
1183+
break;
1184+
}
11761185

1177-
$attribute_names = $this->get_attribute_names_with_prefix( '' );
1178-
if ( ! isset( $attribute_names ) ) {
1179-
$html .= "<{$qualified_name}>";
1180-
continue;
1181-
}
1186+
if ( '#tag' !== $token_type ) {
1187+
return $html;
1188+
}
11821189

1183-
$html .= "<{$qualified_name}";
1184-
foreach ( $attribute_names as $attribute_name ) {
1185-
$html .= " {$this->get_qualified_attribute_name( $attribute_name )}";
1186-
$value = $this->get_attribute( $attribute_name );
1190+
$tag_name = $this->get_tag();
1191+
$in_html = 'html' === $this->get_namespace();
1192+
$qualified_name = $in_html ? strtolower( $tag_name ) : $this->get_qualified_tag_name();
11871193

1188-
if ( is_string( $value ) ) {
1189-
$html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"';
1190-
}
1191-
}
1194+
if ( $this->is_tag_closer() ) {
1195+
$html .= "</{$qualified_name}>";
1196+
return $html;
1197+
}
1198+
1199+
$attribute_names = $this->get_attribute_names_with_prefix( '' );
1200+
if ( ! isset( $attribute_names ) ) {
1201+
$html .= "<{$qualified_name}>";
1202+
return $html;
1203+
}
1204+
1205+
$html .= "<{$qualified_name}";
1206+
foreach ( $attribute_names as $attribute_name ) {
1207+
$html .= " {$this->get_qualified_attribute_name( $attribute_name )}";
1208+
$value = $this->get_attribute( $attribute_name );
11921209

1193-
if ( ! $in_html && $this->has_self_closing_flag() ) {
1194-
$html .= ' /';
1210+
if ( is_string( $value ) ) {
1211+
$html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"';
11951212
}
1213+
}
11961214

1197-
$html .= '>';
1215+
if ( ! $in_html && $this->has_self_closing_flag() ) {
1216+
$html .= ' /';
1217+
}
11981218

1199-
// Flush out self-contained elements.
1200-
if ( $in_html && in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) {
1201-
$text = $this->get_modifiable_text();
1219+
$html .= '>';
12021220

1203-
switch ( $tag_name ) {
1204-
case 'IFRAME':
1205-
case 'NOEMBED':
1206-
case 'NOFRAMES':
1207-
$text = '';
1208-
break;
1221+
// Flush out self-contained elements.
1222+
if ( $in_html && in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) {
1223+
$text = $this->get_modifiable_text();
12091224

1210-
case 'SCRIPT':
1211-
case 'STYLE':
1212-
break;
1225+
switch ( $tag_name ) {
1226+
case 'IFRAME':
1227+
case 'NOEMBED':
1228+
case 'NOFRAMES':
1229+
$text = '';
1230+
break;
12131231

1214-
default:
1215-
$text = htmlspecialchars( $text, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
1216-
}
1232+
case 'SCRIPT':
1233+
case 'STYLE':
1234+
break;
12171235

1218-
$html .= "{$text}</{$qualified_name}>";
1236+
default:
1237+
$text = htmlspecialchars( $text, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
12191238
}
1220-
}
12211239

1222-
if ( null !== $this->get_last_error() ) {
1223-
wp_trigger_error(
1224-
__METHOD__,
1225-
"Cannot serialize HTML Processor with parsing error: {$this->get_last_error()}.",
1226-
E_USER_WARNING
1227-
);
1228-
return null;
1240+
$html .= "{$text}</{$qualified_name}>";
12291241
}
12301242

12311243
return $html;

0 commit comments

Comments
 (0)