@@ -322,7 +322,7 @@ public function __construct( $html, $use_the_static_create_methods_instead = nul
322322 * be found or if content in the document caused the processor
323323 * to give up and abort processing.
324324 *
325- * Example
325+ * Example:
326326 *
327327 * $processor = WP_HTML_Processor::create_fragment( '<template><strong><button><em><p><em>' );
328328 * false === $processor->next_tag();
@@ -473,6 +473,93 @@ public function matches_breadcrumbs( $breadcrumbs ) {
473473 return false ;
474474 }
475475
476+ /**
477+ * Returns the raw HTML content inside a matched tag.
478+ *
479+ * "Markup" differs from inner HTML in that it returns the raw HTML inside the matched tag.
480+ * This means that it's possible this returns HTML without matching tags, or with HTML attributes
481+ * serialized differently than a DOM API would return.
482+ *
483+ * Example:
484+ *
485+ * $processor = WP_HTML_Processor::createFragment( '<div><p>Inside <em>P</em> <i>tags</div>' );
486+ * $processor->next_tag( 'P' );
487+ * 'Inside <em>P</em> <i>tags' === $processor->get_raw_inner_markup();
488+ *
489+ * @since 6.4.0
490+ *
491+ * @throws Exception When unable to allocate a bookmark for internal tracking of the open tag.
492+ *
493+ * @return string|null The inner markup if available, else NULL.
494+ */
495+ public function get_raw_inner_markup () {
496+ if ( null === $ this ->get_tag () ) {
497+ return null ;
498+ }
499+
500+ $ this ->set_bookmark ( 'start ' );
501+ $ found_tag = $ this ->step_until_tag_is_closed ();
502+ $ this ->set_bookmark ( 'end ' );
503+
504+ if ( $ found_tag ) {
505+ $ inner_markup = $ this ->substr_bookmarks ( 'after ' , 'start ' , 'before ' , 'end ' );
506+ } else {
507+ // If there's no closing tag then the inner markup continues to the end of the document.
508+ $ inner_markup = $ this ->substr_bookmark ( 'after ' , 'start ' );
509+ }
510+
511+ $ this ->seek ( 'start ' );
512+ $ this ->release_bookmark ( 'start ' );
513+ $ this ->release_bookmark ( 'end ' );
514+
515+ return $ inner_markup ;
516+ }
517+
518+ /**
519+ * Returns the raw HTML content around a matched tag, including the tag itself.
520+ *
521+ * "Markup" differs from outer HTML in that it returns the raw HTML inside the matched tag.
522+ * This means that it's possible this returns HTML without matching tags, or with HTML attributes
523+ * serialized differently than a DOM API would return.
524+ *
525+ * Example:
526+ *
527+ * $processor = WP_HTML_Processor::createFragment( '<div><p>Inside <em>P</em> <i>tags</div>' );
528+ * $processor->next_tag( 'P' );
529+ * '<p>Inside <em>P</em> <i>tags' === $processor->get_raw_inner_markup();
530+ *
531+ * @since 6.4.0
532+ *
533+ * @throws Exception When unable to allocate a bookmark for internal tracking of the open tag.
534+ *
535+ * @return string|null The outer markup if available, else NULL.
536+ */
537+ public function get_raw_outer_markup () {
538+ if ( null === $ this ->get_tag () ) {
539+ return null ;
540+ }
541+
542+ $ this ->set_bookmark ( 'start ' );
543+ $ start_tag = $ this ->current_token ->node_name ;
544+ $ found_tag = $ this ->step_until_tag_is_closed ();
545+ $ this ->set_bookmark ( 'end ' );
546+
547+ if ( $ found_tag ) {
548+ $ did_close = $ this ->get_tag () === $ start_tag && $ this ->is_tag_closer ();
549+ $ end_position = $ did_close ? 'after ' : 'before ' ;
550+ $ outer_markup = $ this ->substr_bookmarks ( 'before ' , 'start ' , $ end_position , 'end ' );
551+ } else {
552+ // If there's no closing tag then the outer markup continues to the end of the document.
553+ $ outer_markup = $ this ->substr_bookmark ( 'before ' , 'start ' );
554+ }
555+
556+ $ this ->seek ( 'start ' );
557+ $ this ->release_bookmark ( 'start ' );
558+ $ this ->release_bookmark ( 'end ' );
559+
560+ return $ outer_markup ;
561+ }
562+
476563 /**
477564 * Steps through the HTML document and stop at the next tag, if any.
478565 *
@@ -512,12 +599,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
512599 $ this ->state ->stack_of_open_elements ->pop ();
513600 }
514601
515- parent ::next_tag ( self ::VISIT_EVERYTHING );
516- }
517-
518- // Finish stepping when there are no more tokens in the document.
519- if ( null === $ this ->get_tag () ) {
520- return false ;
602+ if ( ! parent ::next_tag ( self ::VISIT_EVERYTHING ) ) {
603+ return false ;
604+ }
521605 }
522606
523607 $ this ->state ->current_token = new WP_HTML_Token (
@@ -555,7 +639,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
555639 * to avoid creating the array copy on tag iteration. If this is done, it would likely
556640 * be more useful to walk up the stack when yielding instead of starting at the top.
557641 *
558- * Example
642+ * Example:
559643 *
560644 * $processor = WP_HTML_Processor::create_fragment( '<p><strong><em><img></em></strong></p>' );
561645 * $processor->next_tag( 'IMG' );
@@ -800,6 +884,98 @@ private function bookmark_tag() {
800884 return "{$ this ->bookmark_counter }" ;
801885 }
802886
887+ /**
888+ * Steps through the HTML document until the current open tag is closed.
889+ *
890+ * @since 6.4.0
891+ *
892+ * @throws Exception When unable to allocate bookmark for internal tracking.
893+ *
894+ * @return bool|null true if a closing tag was found, false if not, and null if not starting at a matched tag.
895+ */
896+ private function step_until_tag_is_closed () {
897+ if ( null === $ this ->get_tag () ) {
898+ return null ;
899+ }
900+
901+ /** @var WP_HTML_Token $start Reference to the opening tag when calling this function. */
902+ $ start = $ this ->current_token ;
903+
904+ /** @var bool $keep_searching Whether to continue scanning for a point where the opening tag is closed. */
905+ $ keep_searching = true ;
906+
907+ /**
908+ * Sets a flag indicating that the starting tag has been closed once
909+ * it's popped from the stack of open elements. This is a listener function.
910+ *
911+ * @since 6.4.0
912+ *
913+ * @see WP_HTML_Open_Elements::with_pop_listener()
914+ *
915+ * @param WP_HTML_Token $node Node that was popped.
916+ */
917+ $ tag_is_closed = function ( $ node ) use ( &$ keep_searching , $ start ) {
918+ if ( $ node === $ start ) {
919+ $ keep_searching = false ;
920+ }
921+ };
922+
923+ /*
924+ * Normally, when stepping into each new elements, it would be required to walk up the
925+ * stack of open elements and look to see if the starting tag is still open, if it's
926+ * on the stack. By listening for elements that are popped from the stack, however, it's
927+ * possible to know if the starting tag has been closed without anything more than a
928+ * constant boolean access, as the listener is called for each tag that's closed.
929+ *
930+ * The use of the `foreach` here creates a context which ensures that the listener is
931+ * properly removed and cleaned up without having to manually remove it.
932+ */
933+ foreach ( $ this ->state ->stack_of_open_elements ->with_pop_listener ( $ tag_is_closed ) as $ _ ) {
934+ // Find where the tag is closed by stepping forward until it's no longer on the stack of open elements.
935+ do {
936+ $ found_tag = $ this ->step ();
937+ } while ( $ found_tag && $ keep_searching );
938+ }
939+
940+ return $ found_tag ;
941+ }
942+
943+ /**
944+ * Returns a substring of the input HTML document from a bookmark until the end.
945+ *
946+ * @since 6.4.0
947+ *
948+ * @param string $start_position "before" to clip before bookmark, "after" to clip after.
949+ * @param string $start Bookmark name at which to start clipping.
950+ * @return string Clipped substring of input HTMl document.
951+ */
952+ private function substr_bookmark ( $ start_position , $ start ) {
953+ $ start_bookmark = $ this ->bookmarks [ "_ {$ start }" ];
954+ $ start_offset = 'before ' === $ start_position ? $ start_bookmark ->start : $ start_bookmark ->end + 1 ;
955+
956+ return substr ( $ this ->html , $ start_offset );
957+ }
958+
959+ /**
960+ * Returns a substring of the input HTML document delimited by bookmarks.
961+ *
962+ * @since 6.4.0
963+ *
964+ * @param string $start_position "before" to clip before bookmark, "after" to clip after.
965+ * @param string $start Bookmark name at which to start clipping.
966+ * @param string $end_position "before" to clip before bookmark, "after" to clip after.
967+ * @param string $end Bookmark name at which to end clipping.
968+ * @return string Clipped substring of input HTMl document.
969+ */
970+ private function substr_bookmarks ( $ start_position , $ start , $ end_position , $ end ) {
971+ $ start_bookmark = $ this ->bookmarks [ "_ {$ start }" ];
972+ $ end_bookmark = $ this ->bookmarks [ "_ {$ end }" ];
973+ $ start_offset = 'before ' === $ start_position ? $ start_bookmark ->start : $ start_bookmark ->end + 1 ;
974+ $ end_offset = 'before ' === $ end_position ? $ end_bookmark ->start : $ end_bookmark ->end + 1 ;
975+
976+ return substr ( $ this ->html , $ start_offset , $ end_offset - $ start_offset );
977+ }
978+
803979 /*
804980 * HTML semantic overrides for Tag Processor
805981 */
0 commit comments