Skip to content

Commit c5f1924

Browse files
committed
Add basic unit test suite.
1 parent 4516dcc commit c5f1924

1 file changed

Lines changed: 252 additions & 0 deletions

File tree

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
<?php
2+
/**
3+
* Unit tests covering WP_HTML_Processor serialization functionality.
4+
*
5+
* @package WordPress
6+
* @subpackage HTML-API
7+
*
8+
* @since 6.7.0
9+
*/
10+
11+
/**
12+
* @group html-api
13+
*
14+
* @coversDefaultClass WP_HTML_Processor
15+
*/
16+
class Tests_HtmlApi_WpHtmlProcessor_Serialize extends WP_UnitTestCase {
17+
/**
18+
* Ensures that basic text is properly encoded when serialized.
19+
*
20+
* @ticket 62036
21+
*/
22+
public function test_properly_encodes_text() {
23+
$this->assertSame(
24+
WP_HTML_Processor::normalize( "apples > or\x00anges" ),
25+
'apples &gt; oranges',
26+
'Should have returned an HTML string with applicable characters properly encoded.'
27+
);
28+
}
29+
30+
/**
31+
* Ensures that unclosed elements are explicitly closed to ensure proper HTML isolation.
32+
*
33+
* When thinking about embedding HTML fragments into others, it's important that unclosed
34+
* elements aren't left dangling, otherwise a snippet of HTML may "swallow" parts of the
35+
* document that follow it.
36+
*
37+
* @ticket 62036
38+
*/
39+
public function test_closes_unclosed_elements_at_end() {
40+
$this->assertSame(
41+
WP_HTML_Processor::normalize( '<div>' ),
42+
'<div></div>',
43+
'Should have provided the explicit closer to the un-closed DIV element.'
44+
);
45+
}
46+
47+
/**
48+
* Ensures that boolean attributes remain boolean and do not gain values.
49+
*
50+
* @ticket 62036
51+
*/
52+
public function test_boolean_attributes_remain_boolean() {
53+
$this->assertSame(
54+
WP_HTML_Processor::normalize( '<input disabled>' ),
55+
'<input disabled>',
56+
'Should have preserved the boolean attribute upon serialization.'
57+
);
58+
}
59+
60+
/**
61+
* Ensures that attributes with values result in double-quoted attribute values.
62+
*
63+
* @ticket 62036
64+
*/
65+
public function test_attributes_are_double_quoted() {
66+
$this->assertSame(
67+
WP_HTML_Processor::normalize( '<p id=3></p>' ),
68+
'<p id="3"></p>',
69+
'Should double-quote all attribute values.'
70+
);
71+
}
72+
73+
/**
74+
* Ensures that self-closing flags on HTML void elements are not serialized, to
75+
* prevent risk of conflating the flag with unquoted attribute values.
76+
*
77+
* Example:
78+
*
79+
* BR element with "class" attribute having value "clear"
80+
* <br class="clear"/>
81+
*
82+
* BR element with "class" attribute having value "clear"
83+
* <br class=clear />
84+
*
85+
* BR element with "class" attribute having value "clear/"
86+
* <br class=clear/>
87+
*
88+
* @ticket 62036
89+
*/
90+
public function test_void_elements_get_no_dangerous_self_closing_flag() {
91+
$this->assertSame(
92+
WP_HTML_Processor::normalize( '<br class="clear"/>' ),
93+
'<br class="clear">',
94+
'Should have removed dangerous self-closing flag on HTML void element.'
95+
);
96+
}
97+
98+
/**
99+
* Ensures that duplicate attributes are removed upon serialization.
100+
*
101+
* @ticket 62036
102+
*/
103+
public function test_duplicate_attributes_are_removed() {
104+
$this->assertSame(
105+
WP_HTML_Processor::normalize( '<div one=1 one="one" one=\'won\' one>'),
106+
'<div one="1"></div>',
107+
'Should have removed all but the first copy of an attribute when duplicates exist.'
108+
);
109+
}
110+
111+
/**
112+
* Ensures that SCRIPT contents are not escaped, as they are not parsed like text nodes are.
113+
*
114+
* @ticket 62036
115+
*/
116+
public function test_script_contents_are_not_escaped() {
117+
$this->assertSame(
118+
WP_HTML_Processor::normalize( "<script>apples > or\x00anges</script>" ),
119+
"<script>apples > or\u{FFFD}anges</script>",
120+
'Should have preserved text inside a SCRIPT element, except for replacing NULL bytes.'
121+
);
122+
}
123+
124+
/**
125+
* Ensures that STYLE contents are not escaped, as they are not parsed like text nodes are.
126+
*
127+
* @ticket 62036
128+
*/
129+
public function test_style_contents_are_not_escaped() {
130+
$this->assertSame(
131+
WP_HTML_Processor::normalize( "<style>apples > or\x00anges</style>" ),
132+
"<style>apples > or\u{FFFD}anges</style>",
133+
'Should have preserved text inside a STYLE element, except for replacing NULL bytes.'
134+
);
135+
}
136+
137+
public function test_unexpected_closing_tags_are_removed() {
138+
$this->assertSame(
139+
WP_HTML_Processor::normalize( 'one</div>two</span>three' ),
140+
'onetwothree',
141+
'Should have removed unpected closing tags.'
142+
);
143+
}
144+
145+
/**
146+
* Ensures that self-closing elements in foreign content retain their self-closing flag.
147+
*
148+
* @ticket 62036
149+
*/
150+
public function test_self_closing_foreign_elements_retain_their_self_closing_flag() {
151+
$this->assertSame(
152+
WP_HTML_Processor::normalize( '<svg><g><g /></svg>' ),
153+
'<svg><g><g /></g></svg>',
154+
'Should have closed unclosed G element, but preserved the self-closing nature of the other G element.'
155+
);
156+
}
157+
158+
/**
159+
* Ensures that incomplete syntax elements at the end of an HTML string are removed from
160+
* the serialization, since these are often vectors of exploits for the successive HTML.
161+
*
162+
* @ticket 62036
163+
*
164+
* @dataProvider data_incomplete_syntax_tokens
165+
*
166+
* @param string $incomplete_token An incomplete HTML syntax token.
167+
*/
168+
public function test_should_remove_incomplete_input_from_end( string $incomplete_token ) {
169+
$this->assertSame(
170+
WP_HTML_Processor::normalize( "content{$incomplete_token}" ),
171+
'content',
172+
'Should have removed the incomplete token from the end of the input.'
173+
);
174+
}
175+
176+
/**
177+
* Data provider.
178+
*
179+
* @return array[]
180+
*/
181+
public static function data_incomplete_syntax_tokens() {
182+
return array(
183+
'Comment opener' => array( '<!--' ),
184+
'Bogus comment opener' => array( '<![sneaky[' ),
185+
'Incomplete tag' => array( '<my-custom status="pending"' ),
186+
'SCRIPT opening tag' => array( '<script>' ),
187+
);
188+
}
189+
190+
/**
191+
* Ensures that presumptuous tag openers are treated as plaintext.
192+
*
193+
* @ticket 62036
194+
*/
195+
public function test_encodes_presumptuous_opening_tags() {
196+
$this->assertSame(
197+
WP_HTML_Processor::normalize( '<>' ),
198+
'&lt;&gt;',
199+
'Should have encoded the invalid presumptuous opening tag as plaintext.'
200+
);
201+
}
202+
203+
/**
204+
* Ensures that presumptuous tag closers are skipped in serialization.
205+
*
206+
* @ticket 62036
207+
*/
208+
public function test_skips_presumptuous_closing_tags() {
209+
$this->assertSame(
210+
WP_HTML_Processor::normalize( '</>' ),
211+
'',
212+
'Should have completely ignored the presumptuous tag closer.'
213+
);
214+
}
215+
216+
/**
217+
* Ensures that invalid or "bogus" comments in HTML are normalized to their proper normative form.
218+
*
219+
* @ticket 62036
220+
*
221+
* @dataProvider data_bogus_comments
222+
*
223+
* @param string $opening Start of bogus comment, e.g. "<!".
224+
* @param string $comment_text Comment content, as reported in a browser.
225+
* @param string $closing End of bogus comment, e.g. ">".
226+
*/
227+
public function test_normalizes_bogus_comment_forms( string $opening, string $comment_text, string $closing ) {
228+
$this->assertSame(
229+
WP_HTML_Processor::normalize( "{$opening}{$comment_text}{$closing}" ),
230+
"<!--{$comment_text}-->",
231+
'Should have replaced the invalid comment syntax with normative syntax.'
232+
);
233+
}
234+
235+
/**
236+
* Data provider.
237+
*
238+
* @return array[]
239+
*/
240+
public function data_bogus_comments() {
241+
return array(
242+
'False DOCTYPE' => array( '<!', 'html', '>' ),
243+
'CDATA look-alike' => array( '<!', '[CDATA[inside]]', '>' ),
244+
'Immediately-closed markup instruction' => array( '<!', '?', '>' ),
245+
'Warning Symbol' => array( '<!', '', '>' ),
246+
'PHP block look-alike' => array( '<?', 'php foo(); ?', '>' ),
247+
'Funky comment' => array( '</', '%display-name', '>' ),
248+
'XML Processing Instruction look-alike' => array( '<', '?xml foo ', '>' ),
249+
);
250+
}
251+
}
252+

0 commit comments

Comments
 (0)