@@ -260,16 +260,115 @@ public function test_style_contents_are_not_escaped() {
260260 /**
261261 * Ensures that XMP contents are not escaped, as they are not parsed like text nodes are.
262262 *
263- * @ticket 62036
263+ * XMP contents are parsed as raw text: character references are never decoded.
264+ * Escaping the contents would change the document, e.g. a "<" would be replaced
265+ * by the literal text "<" after serializing and re-parsing.
266+ *
267+ * @ticket 65372
264268 */
265269 public function test_xmp_contents_are_not_escaped () {
266270 $ this ->assertSame (
267- WP_HTML_Processor:: normalize ( "<xmp>apples > or \x00 anges & pears < plums </xmp> " ) ,
268- "<xmp>apples > or \u{FFFD} anges & pears < plums </xmp> " ,
271+ "<xmp>1 < 2 & apples > or \u{FFFD} anges </xmp> " ,
272+ WP_HTML_Processor:: normalize ( "<xmp>1 < 2 & apples > or \x00 anges </xmp> " ) ,
269273 'Should have preserved text inside an XMP element, except for replacing NULL bytes. '
270274 );
271275 }
272276
277+ /**
278+ * Ensures that the contents of IFRAME, NOEMBED, and NOFRAMES elements are
279+ * preserved when serializing.
280+ *
281+ * These elements contain raw text which is part of the parsed document.
282+ * Dropping it would change the document's contents across a serialize and
283+ * re-parse cycle.
284+ *
285+ * @ticket 65372
286+ *
287+ * @dataProvider data_rawtext_elements_with_contents
288+ *
289+ * @param string $html Normalized HTML containing a rawtext element with contents.
290+ */
291+ public function test_rawtext_element_contents_are_preserved_when_normalizing ( string $ html ) {
292+ $ this ->assertSame (
293+ $ html ,
294+ WP_HTML_Processor::normalize ( $ html ),
295+ 'Should have preserved the rawtext element contents. '
296+ );
297+ }
298+
299+ /**
300+ * Data provider.
301+ *
302+ * @return array[]
303+ */
304+ public static function data_rawtext_elements_with_contents () {
305+ return array (
306+ 'IFRAME with following text ' => array ( '<iframe>x</iframe>y ' ),
307+ 'NOEMBED with following text ' => array ( '<noembed>x</noembed>y ' ),
308+ 'NOFRAMES with following text ' => array ( '<section><noframes>x</noframes>y</section> ' ),
309+ 'NOFRAMES before comment ' => array ( '<section><noframes>x</noframes><!----></section> ' ),
310+ 'IFRAME with markup-like contents ' => array ( '<iframe><div>inert</div></iframe> ' ),
311+ 'NOEMBED with character reference ' => array ( '<noembed>&</noembed> ' ),
312+ 'IFRAME in foreign content ' => array ( '<svg><iframe>1 < 2</iframe></svg> ' ),
313+ );
314+ }
315+
316+ /**
317+ * Ensures that the contents of IFRAME, NOEMBED, and NOFRAMES elements are
318+ * preserved when serializing full documents, including NOFRAMES elements
319+ * in the HEAD or after a FRAMESET.
320+ *
321+ * @ticket 65372
322+ *
323+ * @dataProvider data_full_documents_with_rawtext_elements
324+ *
325+ * @param string $html Input HTML document.
326+ * @param string $expected Expected serialization of the full document.
327+ */
328+ public function test_rawtext_element_contents_are_preserved_in_full_documents ( string $ html , string $ expected ) {
329+ $ processor = WP_HTML_Processor::create_full_parser ( $ html );
330+
331+ $ this ->assertSame (
332+ $ expected ,
333+ $ processor ->serialize (),
334+ 'Should have preserved the rawtext element contents. '
335+ );
336+ }
337+
338+ /**
339+ * Data provider.
340+ *
341+ * @return array[]
342+ */
343+ public static function data_full_documents_with_rawtext_elements () {
344+ return array (
345+ 'IFRAME in BODY ' => array (
346+ '<iframe>x</iframe>y ' ,
347+ '<html><head></head><body><iframe>x</iframe>y</body></html> ' ,
348+ ),
349+ 'NOEMBED in BODY ' => array (
350+ 'a<noembed>x</noembed> ' ,
351+ '<html><head></head><body>a<noembed>x</noembed></body></html> ' ,
352+ ),
353+ 'NOFRAMES in BODY ' => array (
354+ 'a<noframes>x</noframes> ' ,
355+ '<html><head></head><body>a<noframes>x</noframes></body></html> ' ,
356+ ),
357+ 'NOFRAMES in HEAD ' => array (
358+ '<head><noframes>x</noframes></head>z ' ,
359+ '<html><head><noframes>x</noframes></head><body>z</body></html> ' ,
360+ ),
361+ 'NOFRAMES in FRAMESET ' => array (
362+ '<html><frameset><noframes>x</noframes> ' ,
363+ '<html><head></head><frameset><noframes>x</noframes></frameset></html> ' ,
364+ ),
365+ 'IFRAME before a comment ' => array (
366+ '<h3><div><small><dd><iframe>x</iframe><!----> ' ,
367+ '<html><head></head><body><h3><div><small><dd><iframe>x</iframe><!----></dd></small></div></h3></body></html> ' ,
368+ ),
369+ );
370+ }
371+
273372 public function test_unexpected_closing_tags_are_removed () {
274373 $ this ->assertSame (
275374 WP_HTML_Processor::normalize ( 'one</div>two</span>three ' ),
@@ -447,6 +546,10 @@ public static function data_tokens_with_null_bytes() {
447546 'Foreign content text ' => array ( "<svg>one \x00two</svg> " , "<svg>one \u{FFFD}two</svg> " ),
448547 'SCRIPT content ' => array ( "<script>alert( \x00)</script> " , "<script>alert( \u{FFFD})</script> " ),
449548 'STYLE content ' => array ( "<style> \x00 {}</style> " , "<style> \u{FFFD} {}</style> " ),
549+ 'IFRAME content ' => array ( "<iframe>a \x00b</iframe> " , "<iframe>a \u{FFFD}b</iframe> " ),
550+ 'NOEMBED content ' => array ( "<noembed>a \x00b</noembed> " , "<noembed>a \u{FFFD}b</noembed> " ),
551+ 'NOFRAMES content ' => array ( "<noframes>a \x00b</noframes> " , "<noframes>a \u{FFFD}b</noframes> " ),
552+ 'XMP content ' => array ( "<xmp>a \x00b</xmp> " , "<xmp>a \u{FFFD}b</xmp> " ),
450553 'Comment text ' => array ( "<!-- \x00 --> " , "<!-- \u{FFFD} --> " ),
451554 );
452555 }
0 commit comments