|
| 1 | +<?php |
| 2 | +/** |
| 3 | + * Unit tests covering WP_HTML_Processor serialization functionality. |
| 4 | + * |
| 5 | + * @package WordPress |
| 6 | + * @subpackage HTML-API |
| 7 | + * |
| 8 | + * @since 6.7.0 |
| 9 | + */ |
| 10 | + |
| 11 | +/** |
| 12 | + * @group html-api |
| 13 | + * |
| 14 | + * @coversDefaultClass WP_HTML_Processor |
| 15 | + */ |
| 16 | +class Tests_HtmlApi_WpHtmlProcessor_Serialize extends WP_UnitTestCase { |
| 17 | + /** |
| 18 | + * Ensures that basic text is properly encoded when serialized. |
| 19 | + * |
| 20 | + * @ticket 62036 |
| 21 | + */ |
| 22 | + public function test_properly_encodes_text() { |
| 23 | + $this->assertSame( |
| 24 | + WP_HTML_Processor::normalize( "apples > or\x00anges" ), |
| 25 | + 'apples > oranges', |
| 26 | + 'Should have returned an HTML string with applicable characters properly encoded.' |
| 27 | + ); |
| 28 | + } |
| 29 | + |
| 30 | + /** |
| 31 | + * Ensures that unclosed elements are explicitly closed to ensure proper HTML isolation. |
| 32 | + * |
| 33 | + * When thinking about embedding HTML fragments into others, it's important that unclosed |
| 34 | + * elements aren't left dangling, otherwise a snippet of HTML may "swallow" parts of the |
| 35 | + * document that follow it. |
| 36 | + * |
| 37 | + * @ticket 62036 |
| 38 | + */ |
| 39 | + public function test_closes_unclosed_elements_at_end() { |
| 40 | + $this->assertSame( |
| 41 | + WP_HTML_Processor::normalize( '<div>' ), |
| 42 | + '<div></div>', |
| 43 | + 'Should have provided the explicit closer to the un-closed DIV element.' |
| 44 | + ); |
| 45 | + } |
| 46 | + |
| 47 | + /** |
| 48 | + * Ensures that boolean attributes remain boolean and do not gain values. |
| 49 | + * |
| 50 | + * @ticket 62036 |
| 51 | + */ |
| 52 | + public function test_boolean_attributes_remain_boolean() { |
| 53 | + $this->assertSame( |
| 54 | + WP_HTML_Processor::normalize( '<input disabled>' ), |
| 55 | + '<input disabled>', |
| 56 | + 'Should have preserved the boolean attribute upon serialization.' |
| 57 | + ); |
| 58 | + } |
| 59 | + |
| 60 | + /** |
| 61 | + * Ensures that attributes with values result in double-quoted attribute values. |
| 62 | + * |
| 63 | + * @ticket 62036 |
| 64 | + */ |
| 65 | + public function test_attributes_are_double_quoted() { |
| 66 | + $this->assertSame( |
| 67 | + WP_HTML_Processor::normalize( '<p id=3></p>' ), |
| 68 | + '<p id="3"></p>', |
| 69 | + 'Should double-quote all attribute values.' |
| 70 | + ); |
| 71 | + } |
| 72 | + |
| 73 | + /** |
| 74 | + * Ensures that self-closing flags on HTML void elements are not serialized, to |
| 75 | + * prevent risk of conflating the flag with unquoted attribute values. |
| 76 | + * |
| 77 | + * Example: |
| 78 | + * |
| 79 | + * BR element with "class" attribute having value "clear" |
| 80 | + * <br class="clear"/> |
| 81 | + * |
| 82 | + * BR element with "class" attribute having value "clear" |
| 83 | + * <br class=clear /> |
| 84 | + * |
| 85 | + * BR element with "class" attribute having value "clear/" |
| 86 | + * <br class=clear/> |
| 87 | + * |
| 88 | + * @ticket 62036 |
| 89 | + */ |
| 90 | + public function test_void_elements_get_no_dangerous_self_closing_flag() { |
| 91 | + $this->assertSame( |
| 92 | + WP_HTML_Processor::normalize( '<br class="clear"/>' ), |
| 93 | + '<br class="clear">', |
| 94 | + 'Should have removed dangerous self-closing flag on HTML void element.' |
| 95 | + ); |
| 96 | + } |
| 97 | + |
| 98 | + /** |
| 99 | + * Ensures that duplicate attributes are removed upon serialization. |
| 100 | + * |
| 101 | + * @ticket 62036 |
| 102 | + */ |
| 103 | + public function test_duplicate_attributes_are_removed() { |
| 104 | + $this->assertSame( |
| 105 | + WP_HTML_Processor::normalize( '<div one=1 one="one" one=\'won\' one>'), |
| 106 | + '<div one="1"></div>', |
| 107 | + 'Should have removed all but the first copy of an attribute when duplicates exist.' |
| 108 | + ); |
| 109 | + } |
| 110 | + |
| 111 | + /** |
| 112 | + * Ensures that SCRIPT contents are not escaped, as they are not parsed like text nodes are. |
| 113 | + * |
| 114 | + * @ticket 62036 |
| 115 | + */ |
| 116 | + public function test_script_contents_are_not_escaped() { |
| 117 | + $this->assertSame( |
| 118 | + WP_HTML_Processor::normalize( "<script>apples > or\x00anges</script>" ), |
| 119 | + "<script>apples > or\u{FFFD}anges</script>", |
| 120 | + 'Should have preserved text inside a SCRIPT element, except for replacing NULL bytes.' |
| 121 | + ); |
| 122 | + } |
| 123 | + |
| 124 | + /** |
| 125 | + * Ensures that STYLE contents are not escaped, as they are not parsed like text nodes are. |
| 126 | + * |
| 127 | + * @ticket 62036 |
| 128 | + */ |
| 129 | + public function test_style_contents_are_not_escaped() { |
| 130 | + $this->assertSame( |
| 131 | + WP_HTML_Processor::normalize( "<style>apples > or\x00anges</style>" ), |
| 132 | + "<style>apples > or\u{FFFD}anges</style>", |
| 133 | + 'Should have preserved text inside a STYLE element, except for replacing NULL bytes.' |
| 134 | + ); |
| 135 | + } |
| 136 | + |
| 137 | + public function test_unexpected_closing_tags_are_removed() { |
| 138 | + $this->assertSame( |
| 139 | + WP_HTML_Processor::normalize( 'one</div>two</span>three' ), |
| 140 | + 'onetwothree', |
| 141 | + 'Should have removed unpected closing tags.' |
| 142 | + ); |
| 143 | + } |
| 144 | + |
| 145 | + /** |
| 146 | + * Ensures that self-closing elements in foreign content retain their self-closing flag. |
| 147 | + * |
| 148 | + * @ticket 62036 |
| 149 | + */ |
| 150 | + public function test_self_closing_foreign_elements_retain_their_self_closing_flag() { |
| 151 | + $this->assertSame( |
| 152 | + WP_HTML_Processor::normalize( '<svg><g><g /></svg>' ), |
| 153 | + '<svg><g><g /></g></svg>', |
| 154 | + 'Should have closed unclosed G element, but preserved the self-closing nature of the other G element.' |
| 155 | + ); |
| 156 | + } |
| 157 | + |
| 158 | + /** |
| 159 | + * Ensures that incomplete syntax elements at the end of an HTML string are removed from |
| 160 | + * the serialization, since these are often vectors of exploits for the successive HTML. |
| 161 | + * |
| 162 | + * @ticket 62036 |
| 163 | + * |
| 164 | + * @dataProvider data_incomplete_syntax_tokens |
| 165 | + * |
| 166 | + * @param string $incomplete_token An incomplete HTML syntax token. |
| 167 | + */ |
| 168 | + public function test_should_remove_incomplete_input_from_end( string $incomplete_token ) { |
| 169 | + $this->assertSame( |
| 170 | + WP_HTML_Processor::normalize( "content{$incomplete_token}" ), |
| 171 | + 'content', |
| 172 | + 'Should have removed the incomplete token from the end of the input.' |
| 173 | + ); |
| 174 | + } |
| 175 | + |
| 176 | + /** |
| 177 | + * Data provider. |
| 178 | + * |
| 179 | + * @return array[] |
| 180 | + */ |
| 181 | + public static function data_incomplete_syntax_tokens() { |
| 182 | + return array( |
| 183 | + 'Comment opener' => array( '<!--' ), |
| 184 | + 'Bogus comment opener' => array( '<![sneaky[' ), |
| 185 | + 'Incomplete tag' => array( '<my-custom status="pending"' ), |
| 186 | + 'SCRIPT opening tag' => array( '<script>' ), |
| 187 | + ); |
| 188 | + } |
| 189 | + |
| 190 | + /** |
| 191 | + * Ensures that presumptuous tag openers are treated as plaintext. |
| 192 | + * |
| 193 | + * @ticket 62036 |
| 194 | + */ |
| 195 | + public function test_encodes_presumptuous_opening_tags() { |
| 196 | + $this->assertSame( |
| 197 | + WP_HTML_Processor::normalize( '<>' ), |
| 198 | + '<>', |
| 199 | + 'Should have encoded the invalid presumptuous opening tag as plaintext.' |
| 200 | + ); |
| 201 | + } |
| 202 | + |
| 203 | + /** |
| 204 | + * Ensures that presumptuous tag closers are skipped in serialization. |
| 205 | + * |
| 206 | + * @ticket 62036 |
| 207 | + */ |
| 208 | + public function test_skips_presumptuous_closing_tags() { |
| 209 | + $this->assertSame( |
| 210 | + WP_HTML_Processor::normalize( '</>' ), |
| 211 | + '', |
| 212 | + 'Should have completely ignored the presumptuous tag closer.' |
| 213 | + ); |
| 214 | + } |
| 215 | + |
| 216 | + /** |
| 217 | + * Ensures that invalid or "bogus" comments in HTML are normalized to their proper normative form. |
| 218 | + * |
| 219 | + * @ticket 62036 |
| 220 | + * |
| 221 | + * @dataProvider data_bogus_comments |
| 222 | + * |
| 223 | + * @param string $opening Start of bogus comment, e.g. "<!". |
| 224 | + * @param string $comment_text Comment content, as reported in a browser. |
| 225 | + * @param string $closing End of bogus comment, e.g. ">". |
| 226 | + */ |
| 227 | + public function test_normalizes_bogus_comment_forms( string $opening, string $comment_text, string $closing ) { |
| 228 | + $this->assertSame( |
| 229 | + WP_HTML_Processor::normalize( "{$opening}{$comment_text}{$closing}" ), |
| 230 | + "<!--{$comment_text}-->", |
| 231 | + 'Should have replaced the invalid comment syntax with normative syntax.' |
| 232 | + ); |
| 233 | + } |
| 234 | + |
| 235 | + /** |
| 236 | + * Data provider. |
| 237 | + * |
| 238 | + * @return array[] |
| 239 | + */ |
| 240 | + public function data_bogus_comments() { |
| 241 | + return array( |
| 242 | + 'False DOCTYPE' => array( '<!', 'html', '>' ), |
| 243 | + 'CDATA look-alike' => array( '<!', '[CDATA[inside]]', '>' ), |
| 244 | + 'Immediately-closed markup instruction' => array( '<!', '?', '>' ), |
| 245 | + 'Warning Symbol' => array( '<!', '', '>' ), |
| 246 | + 'PHP block look-alike' => array( '<?', 'php foo(); ?', '>' ), |
| 247 | + 'Funky comment' => array( '</', '%display-name', '>' ), |
| 248 | + 'XML Processing Instruction look-alike' => array( '<', '?xml foo ', '>' ), |
| 249 | + ); |
| 250 | + } |
| 251 | +} |
| 252 | + |
0 commit comments