@@ -417,6 +417,89 @@ public function next_tag( $query = null ) {
417417 return false ;
418418 }
419419
420+ /**
421+ * Returns the raw HTMl content inside a matched tag.
422+ *
423+ * "Markup" differs from inner HTML in that it returns the raw HTML inside the matched tag.
424+ * This means that it's possible this returns HTML without matching tags, or with HTML attributes
425+ * serialized differently than a DOM API would return.
426+ *
427+ * Example
428+ * $processor = WP_HTML_Processor::createFragment( '<div><p>Inside <em>P</em> <i>tags</div>' );
429+ * $processor->next_tag( 'P' );
430+ * 'Inside <em>P</em> <i>tags' === $processor->get_inner_markup();
431+ *
432+ * @since 6.4.0
433+ *
434+ * @throws Exception When unable to allocate a bookmark for internal tracking of the open tag.
435+ *
436+ * @return string|null The inner markup if available, else NULL.
437+ */
438+ public function get_inner_markup () {
439+ if ( null === $ this ->get_tag () ) {
440+ return null ;
441+ }
442+
443+ parent ::set_bookmark ( 'start ' );
444+ $ found_tag = $ this ->step_until_tag_is_closed ();
445+ parent ::set_bookmark ( 'end ' );
446+
447+ if ( $ found_tag ) {
448+ $ inner_markup = $ this ->substr_bookmarks ( 'after ' , 'start ' , 'before ' , 'end ' );
449+ } else {
450+ // If there's no closing tag then the inner markup continues to the end of the document.
451+ $ inner_markup = $ this ->substr_bookmark ( 'after ' , 'start ' );
452+ }
453+
454+ parent ::release_bookmark ( 'start ' );
455+ parent ::release_bookmark ( 'end ' );
456+
457+ return $ inner_markup ;
458+ }
459+
460+ /**
461+ * Returns the raw HTML content around a matched tag, including the tag itself.
462+ *
463+ * "Markup" differs from outer HTML in that it returns the raw HTML inside the matched tag.
464+ * This means that it's possible this returns HTML without matching tags, or with HTML attributes
465+ * serialized differently than a DOM API would return.
466+ *
467+ * Example
468+ * $processor = WP_HTML_Processor::createFragment( '<div><p>Inside <em>P</em> <i>tags</div>' );
469+ * $processor->next_tag( 'P' );
470+ * '<p>Inside <em>P</em> <i>tags' === $processor->get_inner_markup();
471+ *
472+ * @since 6.4.0
473+ *
474+ * @throws Exception When unable to allocate a bookmark for internal tracking of the open tag.
475+ *
476+ * @return string|null The outer markup if available, else NULL.
477+ */
478+ public function get_outer_markup () {
479+ if ( null === $ this ->get_tag () ) {
480+ return null ;
481+ }
482+
483+ parent ::set_bookmark ( 'start ' );
484+ $ start_tag = $ this ->current_token ->node_name ;
485+ $ found_tag = $ this ->step_until_tag_is_closed ();
486+ parent ::set_bookmark ( 'end ' );
487+
488+ if ( $ found_tag ) {
489+ $ did_close = $ this ->get_tag () === $ start_tag && $ this ->is_tag_closer ();
490+ $ end_position = $ did_close ? 'after ' : 'before ' ;
491+ $ outer_markup = $ this ->substr_bookmarks ( 'before ' , 'start ' , $ end_position , 'end ' );
492+ } else {
493+ // If there's no closing tag then the outer markup continues to the end of the document.
494+ $ outer_markup = $ this ->substr_bookmark ( 'before ' , 'start ' );
495+ }
496+
497+ parent ::release_bookmark ( 'start ' );
498+ parent ::release_bookmark ( 'end ' );
499+
500+ return $ outer_markup ;
501+ }
502+
420503 /**
421504 * Steps through the HTML document and stop at the next tag, if any.
422505 *
@@ -437,12 +520,9 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
437520 $ this ->state ->stack_of_open_elements ->pop ();
438521 }
439522
440- parent ::next_tag ( self ::VISIT_EVERYTHING );
441- }
442-
443- // Finish stepping when there are no more tokens in the document.
444- if ( null === $ this ->get_tag () ) {
445- return false ;
523+ if ( ! parent ::next_tag ( self ::VISIT_EVERYTHING ) ) {
524+ return false ;
525+ }
446526 }
447527
448528 $ this ->current_token = new WP_HTML_Token (
@@ -722,6 +802,65 @@ private function bookmark_tag() {
722802 return "{$ this ->bookmark_counter }" ;
723803 }
724804
805+ /**
806+ * Steps through the HTML document until the current open tag is closed.
807+ *
808+ * @since 6.4.0
809+ *
810+ * @throws Exception When unable to allocate bookmark for internal tracking.
811+ *
812+ * @return bool|null true if a closing tag was found, false if not, and null if not startnig at a matched tag.
813+ */
814+ private function step_until_tag_is_closed () {
815+ if ( null === $ this ->get_tag () ) {
816+ return null ;
817+ }
818+
819+ $ start = $ this ->current_token ;
820+ // @TODO: add after-pop hook to turn this into a constant boolean check.
821+ do {
822+ $ found_tag = $ this ->step ();
823+ } while ( $ found_tag && $ this ->state ->stack_of_open_elements ->contains_node ( $ start ) );
824+
825+ return $ found_tag ;
826+ }
827+
828+ /**
829+ * Returns a substring of the input HTML document from a bookmark until the end.
830+ *
831+ * @since 6.4.0
832+ *
833+ * @param string $start_position "before" to clip before bookmark, "after" to clip after.
834+ * @param string $start Bookmark name at which to start clipping.
835+ * @return string Clipped substring of input HTMl document.
836+ */
837+ private function substr_bookmark ( $ start_position , $ start ) {
838+ $ start_bookmark = $ this ->bookmarks [ $ start ];
839+ $ start_offset = 'before ' === $ start_position ? $ start_bookmark ->start : $ start_bookmark ->end + 1 ;
840+
841+ return substr ( $ this ->html , $ start_offset );
842+ }
843+
844+ /**
845+ * Returns a substring of the input HTML document delimited by bookmarks.
846+ *
847+ * @since 6.4.0
848+ *
849+ * @param string $start_position "before" to clip before bookmark, "after" to clip after.
850+ * @param string $start Bookmark name at which to start clipping.
851+ * @param string $end_position "before" to clip before bookmark, "after" to clip after.
852+ * @param string $end Bookmark name at which to end clipping.
853+ * @return string Clipped substring of input HTMl document.
854+ */
855+ private function substr_bookmarks ( $ start_position , $ start , $ end_position , $ end ) {
856+ $ start_bookmark = $ this ->bookmarks [ $ start ];
857+ $ end_bookmark = $ this ->bookmarks [ $ end ];
858+ $ start_offset = 'before ' === $ start_position ? $ start_bookmark ->start : $ start_bookmark ->end + 1 ;
859+ $ end_offset = 'before ' === $ end_position ? $ end_bookmark ->start : $ end_bookmark ->end + 1 ;
860+
861+ return substr ( $ this ->html , $ start_offset , $ end_offset - $ start_offset );
862+ }
863+
725864 /*
726865 * HTML semantic overrides for Tag Processor
727866 */
0 commit comments