@@ -309,7 +309,7 @@ public function __construct( $html, $use_the_static_create_methods_instead = nul
309309 * failed it's possible to request the last error. This can be
310310 * helpful to know if it's possible to fix something or to give up.
311311 *
312- * Example
312+ * Example:
313313 *
314314 * $p = WP_HTML_Processor::createFragment( '<template><strong><button><em><p><em>' );
315315 * false === $p->next_tag();
@@ -418,6 +418,93 @@ public function next_tag( $query = null ) {
418418 return false ;
419419 }
420420
421+ /**
422+ * Returns the raw HTML content inside a matched tag.
423+ *
424+ * "Markup" differs from inner HTML in that it returns the raw HTML inside the matched tag.
425+ * This means that it's possible this returns HTML without matching tags, or with HTML attributes
426+ * serialized differently than a DOM API would return.
427+ *
428+ * Example:
429+ *
430+ * $processor = WP_HTML_Processor::createFragment( '<div><p>Inside <em>P</em> <i>tags</div>' );
431+ * $processor->next_tag( 'P' );
432+ * 'Inside <em>P</em> <i>tags' === $processor->get_inner_markup();
433+ *
434+ * @since 6.4.0
435+ *
436+ * @throws Exception When unable to allocate a bookmark for internal tracking of the open tag.
437+ *
438+ * @return string|null The inner markup if available, else NULL.
439+ */
440+ public function get_inner_markup () {
441+ if ( null === $ this ->get_tag () ) {
442+ return null ;
443+ }
444+
445+ $ this ->set_bookmark ( 'start ' );
446+ $ found_tag = $ this ->step_until_tag_is_closed ();
447+ $ this ->set_bookmark ( 'end ' );
448+
449+ if ( $ found_tag ) {
450+ $ inner_markup = $ this ->substr_bookmarks ( 'after ' , 'start ' , 'before ' , 'end ' );
451+ } else {
452+ // If there's no closing tag then the inner markup continues to the end of the document.
453+ $ inner_markup = $ this ->substr_bookmark ( 'after ' , 'start ' );
454+ }
455+
456+ $ this ->seek ( 'start ' );
457+ $ this ->release_bookmark ( 'start ' );
458+ $ this ->release_bookmark ( 'end ' );
459+
460+ return $ inner_markup ;
461+ }
462+
463+ /**
464+ * Returns the raw HTML content around a matched tag, including the tag itself.
465+ *
466+ * "Markup" differs from outer HTML in that it returns the raw HTML inside the matched tag.
467+ * This means that it's possible this returns HTML without matching tags, or with HTML attributes
468+ * serialized differently than a DOM API would return.
469+ *
470+ * Example:
471+ *
472+ * $processor = WP_HTML_Processor::createFragment( '<div><p>Inside <em>P</em> <i>tags</div>' );
473+ * $processor->next_tag( 'P' );
474+ * '<p>Inside <em>P</em> <i>tags' === $processor->get_inner_markup();
475+ *
476+ * @since 6.4.0
477+ *
478+ * @throws Exception When unable to allocate a bookmark for internal tracking of the open tag.
479+ *
480+ * @return string|null The outer markup if available, else NULL.
481+ */
482+ public function get_outer_markup () {
483+ if ( null === $ this ->get_tag () ) {
484+ return null ;
485+ }
486+
487+ $ this ->set_bookmark ( 'start ' );
488+ $ start_tag = $ this ->current_token ->node_name ;
489+ $ found_tag = $ this ->step_until_tag_is_closed ();
490+ $ this ->set_bookmark ( 'end ' );
491+
492+ if ( $ found_tag ) {
493+ $ did_close = $ this ->get_tag () === $ start_tag && $ this ->is_tag_closer ();
494+ $ end_position = $ did_close ? 'after ' : 'before ' ;
495+ $ outer_markup = $ this ->substr_bookmarks ( 'before ' , 'start ' , $ end_position , 'end ' );
496+ } else {
497+ // If there's no closing tag then the outer markup continues to the end of the document.
498+ $ outer_markup = $ this ->substr_bookmark ( 'before ' , 'start ' );
499+ }
500+
501+ $ this ->seek ( 'start ' );
502+ $ this ->release_bookmark ( 'start ' );
503+ $ this ->release_bookmark ( 'end ' );
504+
505+ return $ outer_markup ;
506+ }
507+
421508 /**
422509 * Steps through the HTML document and stop at the next tag, if any.
423510 *
@@ -438,12 +525,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
438525 $ this ->state ->stack_of_open_elements ->pop ();
439526 }
440527
441- parent ::next_tag ( self ::VISIT_EVERYTHING );
442- }
443-
444- // Finish stepping when there are no more tokens in the document.
445- if ( null === $ this ->get_tag () ) {
446- return false ;
528+ if ( ! parent ::next_tag ( self ::VISIT_EVERYTHING ) ) {
529+ return false ;
530+ }
447531 }
448532
449533 $ this ->current_token = new WP_HTML_Token (
@@ -474,9 +558,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
474558 /**
475559 * Computes the HTML breadcrumbs for the currently-matched node, if matched.
476560 *
477- * Breadcrumbs start at the outer-most parent and descend toward the matched element.
561+ * Breadcrumbs start at the outermost parent and descend toward the matched element.
478562 *
479- * Example
563+ * Example:
480564 *
481565 * $p = WP_HTML_Processor::createFragment( '<p><strong><em><img></em></strong></p>' );
482566 * $p->next_tag( 'IMG' );
@@ -723,6 +807,98 @@ private function bookmark_tag() {
723807 return "{$ this ->bookmark_counter }" ;
724808 }
725809
810+ /**
811+ * Steps through the HTML document until the current open tag is closed.
812+ *
813+ * @since 6.4.0
814+ *
815+ * @throws Exception When unable to allocate bookmark for internal tracking.
816+ *
817+ * @return bool|null true if a closing tag was found, false if not, and null if not starting at a matched tag.
818+ */
819+ private function step_until_tag_is_closed () {
820+ if ( null === $ this ->get_tag () ) {
821+ return null ;
822+ }
823+
824+ /** @var WP_HTML_Token $start Reference to the opening tag when calling this function. */
825+ $ start = $ this ->current_token ;
826+
827+ /** @var bool $keep_searching Whether to continue scanning for a point where the opening tag is closed. */
828+ $ keep_searching = true ;
829+
830+ /**
831+ * Sets a flag indicating that the starting tag has been closed once
832+ * it's popped from the stack of open elements. This is a listener function.
833+ *
834+ * @since 6.4.0
835+ *
836+ * @see WP_HTML_Open_Elements::with_pop_listener()
837+ *
838+ * @param WP_HTML_Token $node Node that was popped.
839+ */
840+ $ tag_is_closed = function ( $ node ) use ( &$ keep_searching , $ start ) {
841+ if ( $ node === $ start ) {
842+ $ keep_searching = false ;
843+ }
844+ };
845+
846+ /*
847+ * Normally, when stepping into each new elements, it would be required to walk up the
848+ * stack of open elements and look to see if the starting tag is still open, if it's
849+ * on the stack. By listening for elements that are popped from the stack, however, it's
850+ * possible to know if the starting tag has been closed without anything more than a
851+ * constant boolean access, as the listener is called for each tag that's closed.
852+ *
853+ * The use of the `foreach` here creates a context which ensures that the listener is
854+ * properly removed and cleaned up without having to manually remove it.
855+ */
856+ foreach ( $ this ->state ->stack_of_open_elements ->with_pop_listener ( $ tag_is_closed ) as $ _ ) {
857+ // Find where the tag is closed by stepping forward until it's no longer on the stack of open elements.
858+ do {
859+ $ found_tag = $ this ->step ();
860+ } while ( $ found_tag && $ keep_searching );
861+ }
862+
863+ return $ found_tag ;
864+ }
865+
866+ /**
867+ * Returns a substring of the input HTML document from a bookmark until the end.
868+ *
869+ * @since 6.4.0
870+ *
871+ * @param string $start_position "before" to clip before bookmark, "after" to clip after.
872+ * @param string $start Bookmark name at which to start clipping.
873+ * @return string Clipped substring of input HTMl document.
874+ */
875+ private function substr_bookmark ( $ start_position , $ start ) {
876+ $ start_bookmark = $ this ->bookmarks [ "_ {$ start }" ];
877+ $ start_offset = 'before ' === $ start_position ? $ start_bookmark ->start : $ start_bookmark ->end + 1 ;
878+
879+ return substr ( $ this ->html , $ start_offset );
880+ }
881+
882+ /**
883+ * Returns a substring of the input HTML document delimited by bookmarks.
884+ *
885+ * @since 6.4.0
886+ *
887+ * @param string $start_position "before" to clip before bookmark, "after" to clip after.
888+ * @param string $start Bookmark name at which to start clipping.
889+ * @param string $end_position "before" to clip before bookmark, "after" to clip after.
890+ * @param string $end Bookmark name at which to end clipping.
891+ * @return string Clipped substring of input HTMl document.
892+ */
893+ private function substr_bookmarks ( $ start_position , $ start , $ end_position , $ end ) {
894+ $ start_bookmark = $ this ->bookmarks [ "_ {$ start }" ];
895+ $ end_bookmark = $ this ->bookmarks [ "_ {$ end }" ];
896+ $ start_offset = 'before ' === $ start_position ? $ start_bookmark ->start : $ start_bookmark ->end + 1 ;
897+ $ end_offset = 'before ' === $ end_position ? $ end_bookmark ->start : $ end_bookmark ->end + 1 ;
898+
899+ return substr ( $ this ->html , $ start_offset , $ end_offset - $ start_offset );
900+ }
901+
726902 /*
727903 * HTML semantic overrides for Tag Processor
728904 */
0 commit comments