Skip to content

Commit 11e91d5

Browse files
committed
HTML API: Update documentation and rename internal variable on HTML Processor
This patch updates documentation and an internal variable name within the HTML Processor class so that they are more helpful and complete to a reader. There should be no functional or visual changes in this patch. Props dmsnell, mukesh27. Fixes #59267. git-svn-id: https://develop.svn.wordpress.org/trunk@56565 602fd350-edb4-49c9-b593-d223f7449a82
1 parent 7dbf409 commit 11e91d5

1 file changed

Lines changed: 76 additions & 28 deletions

File tree

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 76 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
* - Unwrapping a tag by removing its parent.
2828
* - Inserting and removing nodes.
2929
* - Reading and changing inner content.
30+
* - Navigating up or around HTML structure.
3031
*
3132
* ## Usage
3233
*
@@ -66,7 +67,7 @@
6667
* `array( 'IMG' )` matches all IMG elements and `array( 'P', 'IMG' )`
6768
* matches all IMG elements directly inside a P element. To ensure that no
6869
* partial matches erroneously match it's possible to specify in a query
69-
* the full breadcrumb match.
70+
* the full breadcrumb match all the way down from the root HTML element.
7071
*
7172
* Example:
7273
*
@@ -76,7 +77,7 @@
7677
*
7778
* $html = '<figure><img><figcaption>A <em>lovely</em> day outside</figcaption></figure>';
7879
* // ---- Matches here.
79-
* $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'IMG', 'FIGCAPTION', 'EM' ) ) );
80+
* $processor->next_tag( array( 'breadcrumbs' => array( 'FIGURE', 'FIGCAPTION', 'EM' ) ) );
8081
*
8182
* $html = '<div><img></div><img>';
8283
* // ----- Matches here, because IMG must be a direct child of the implicit BODY.
@@ -100,7 +101,8 @@
100101
*
101102
* - Links: A.
102103
* - The formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
103-
* - Containers: DIV, FIGCAPTION, FIGURE.
104+
* - Containers: DIV, FIGCAPTION, FIGURE, SPAN.
105+
* - Form elements: BUTTON.
104106
* - Paragraph: P.
105107
* - Void elements: IMG.
106108
*
@@ -116,7 +118,9 @@
116118
* - Markup involving only those tags listed above.
117119
* - Fully-balanced and non-overlapping tags.
118120
* - HTML with unexpected tag closers.
121+
* - Some unbalanced or overlapping tags.
119122
* - P tags after unclosed P tags.
123+
* - BUTTON tags after unclosed BUTTON tags.
120124
* - A tags after unclosed A tags that don't involve any active formatting elements.
121125
*
122126
* @since 6.4.0
@@ -126,7 +130,10 @@
126130
*/
127131
class WP_HTML_Processor extends WP_HTML_Tag_Processor {
128132
/**
129-
* HTML processing requires more bookmarks than basic tag processing.
133+
* The maximum number of bookmarks allowed to exist at any given time.
134+
*
135+
* HTML processing requires more bookmarks than basic tag processing,
136+
* so this class constant from the Tag Processor is overwritten.
130137
*
131138
* @since 6.4.0
132139
*
@@ -137,6 +144,8 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
137144
/**
138145
* Static query for instructing the Tag Processor to visit every token.
139146
*
147+
* @access private
148+
*
140149
* @since 6.4.0
141150
*
142151
* @var array
@@ -158,8 +167,15 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
158167
/**
159168
* Used to create unique bookmark names.
160169
*
170+
* This class sets a bookmark for every tag in the HTML document that it encounters.
171+
* The bookmark name is auto-generated and increments, starting with `1`. These are
172+
* internal bookmarks and are automatically released when the referring WP_HTML_Token
173+
* goes out of scope and is garbage-collected.
174+
*
161175
* @since 6.4.0
162176
*
177+
* @see WP_HTML_Processor::$release_internal_bookmark_on_destruct
178+
*
163179
* @var int
164180
*/
165181
private $bookmark_counter = 0;
@@ -186,7 +202,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
186202
*
187203
* @var closure
188204
*/
189-
private $release_internal_bookmark = null;
205+
private $release_internal_bookmark_on_destruct = null;
190206

191207
/*
192208
* Public Interface Functions
@@ -258,10 +274,14 @@ public static function createFragment( $html, $context = '<body>', $encoding = '
258274
/**
259275
* Constructor.
260276
*
277+
* Do not use this method. Use the static creator methods instead.
278+
*
261279
* @access private
262280
*
263281
* @since 6.4.0
264282
*
283+
* @see WP_HTML_Processor::createFragment()
284+
*
265285
* @param string $html HTML to process.
266286
* @param string|null $use_the_static_create_methods_instead This constructor should not be called manually.
267287
*/
@@ -287,7 +307,7 @@ public function __construct( $html, $use_the_static_create_methods_instead = nul
287307
* a private method into WP_HTML_Token classes without
288308
* exposing it to any public API.
289309
*/
290-
$this->release_internal_bookmark = function ( $name ) {
310+
$this->release_internal_bookmark_on_destruct = function ( $name ) {
291311
parent::release_bookmark( $name );
292312
};
293313
}
@@ -298,13 +318,15 @@ public function __construct( $html, $use_the_static_create_methods_instead = nul
298318
* Various situations lead to parsing failure but this class will
299319
* return `false` in all those cases. To determine why something
300320
* failed it's possible to request the last error. This can be
301-
* helpful to know if it's possible to fix something or to give up.
321+
* helpful to know to distinguish whether a given tag couldn't
322+
* be found or if content in the document caused the processor
323+
* to give up and abort processing.
302324
*
303325
* Example
304326
*
305-
* $p = WP_HTML_Processor::createFragment( '<template><strong><button><em><p><em>' );
306-
* false === $p->next_tag();
307-
* WP_HTML_Processor::ERROR_UNSUPPORTED === $p->get_last_error();
327+
* $processor = WP_HTML_Processor::createFragment( '<template><strong><button><em><p><em>' );
328+
* false === $processor->next_tag();
329+
* WP_HTML_Processor::ERROR_UNSUPPORTED === $processor->get_last_error();
308330
*
309331
* @since 6.4.0
310332
*
@@ -429,6 +451,20 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
429451
}
430452

431453
if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
454+
/*
455+
* Void elements still hop onto the stack of open elements even though
456+
* there's no corresponding closing tag. This is important for managing
457+
* stack-based operations such as "navigate to parent node" or checking
458+
* on an element's breadcrumbs.
459+
*
460+
* When moving on to the next node, therefore, if the bottom-most element
461+
* on the stack is a void element, it must be closed.
462+
*
463+
* @TODO: Once self-closing foreign elements and BGSOUND are supported,
464+
* they must also be implicitly closed here too. BGSOUND is
465+
* special since it's only self-closing if the self-closing flag
466+
* is provided in the opening tag, otherwise it expects a tag closer.
467+
*/
432468
$top_node = $this->state->stack_of_open_elements->current_node();
433469
if ( $top_node && self::is_void( $top_node->node_name ) ) {
434470
$this->state->stack_of_open_elements->pop();
@@ -446,7 +482,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
446482
$this->bookmark_tag(),
447483
$this->get_tag(),
448484
$this->is_tag_closer(),
449-
$this->release_internal_bookmark
485+
$this->release_internal_bookmark_on_destruct
450486
);
451487

452488
try {
@@ -456,7 +492,7 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
456492

457493
default:
458494
$this->last_error = self::ERROR_UNSUPPORTED;
459-
throw new WP_HTML_Unsupported_Exception( 'Cannot parse outside of the IN BODY insertion mode.' );
495+
throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." );
460496
}
461497
} catch ( WP_HTML_Unsupported_Exception $e ) {
462498
/*
@@ -470,17 +506,22 @@ public function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
470506
/**
471507
* Computes the HTML breadcrumbs for the currently-matched node, if matched.
472508
*
473-
* Breadcrumbs start at the outer-most parent and descend toward the matched element.
509+
* Breadcrumbs start at the outermost parent and descend toward the matched element.
510+
* They always include the entire path from the root HTML node to the matched element.
511+
*
512+
* @TODO: It could be more efficient to expose a generator-based version of this function
513+
* to avoid creating the array copy on tag iteration. If this is done, it would likely
514+
* be more useful to walk up the stack when yielding instead of starting at the top.
474515
*
475516
* Example
476517
*
477-
* $p = WP_HTML_Processor::createFragment( '<p><strong><em><img></em></strong></p>' );
478-
* $p->next_tag( 'IMG' );
479-
* $p->get_breadcrumbs() === array( 'HTML', 'BODY', 'P', 'STRONG', 'EM', 'IMG' );
518+
* $processor = WP_HTML_Processor::createFragment( '<p><strong><em><img></em></strong></p>' );
519+
* $processor->next_tag( 'IMG' );
520+
* $processor->get_breadcrumbs() === array( 'HTML', 'BODY', 'P', 'STRONG', 'EM', 'IMG' );
480521
*
481522
* @since 6.4.0
482523
*
483-
* @return string[]|null Array of tag-names representing path to matched node, if matched, otherwise null.
524+
* @return string[]|null Array of tag names representing path to matched node, if matched, otherwise NULL.
484525
*/
485526
public function get_breadcrumbs() {
486527
if ( ! $this->get_tag() ) {
@@ -499,16 +540,14 @@ public function get_breadcrumbs() {
499540
* Parses next element in the 'in body' insertion mode.
500541
*
501542
* This internal function performs the 'in body' insertion mode
502-
* logic for the generalized `self::step()` function.
503-
*
504-
* @access private
543+
* logic for the generalized WP_HTML_Processor::step() function.
505544
*
506545
* @since 6.4.0
507546
*
508547
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
509548
*
510549
* @see https://html.spec.whatwg.org/#parsing-main-inbody
511-
* @see self::step
550+
* @see WP_HTML_Processor::step
512551
*
513552
* @return bool Whether an element was found.
514553
*/
@@ -698,7 +737,7 @@ private function step_in_body() {
698737
*/
699738

700739
/**
701-
* Creates a new bookmark for the currently-matched tag and returns generated name.
740+
* Creates a new bookmark for the currently-matched tag and returns the generated name.
702741
*
703742
* @since 6.4.0
704743
*
@@ -726,14 +765,19 @@ private function bookmark_tag() {
726765
/**
727766
* Returns the uppercase name of the matched tag.
728767
*
768+
* The semantic rules for HTML specify that certain tags be reprocessed
769+
* with a different tag name. Because of this, the tag name presented
770+
* by the HTML Processor may differ from the one reported by the HTML
771+
* Tag Processor, which doesn't apply these semantic rules.
772+
*
729773
* Example:
730774
*
731-
* $p = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' );
732-
* $p->next_tag() === true;
733-
* $p->get_tag() === 'DIV';
775+
* $processor = new WP_HTML_Tag_Processor( '<div class="test">Test</div>' );
776+
* $processor->next_tag() === true;
777+
* $processor->get_tag() === 'DIV';
734778
*
735-
* $p->next_tag() === false;
736-
* $p->get_tag() === null;
779+
* $processor->next_tag() === false;
780+
* $processor->get_tag() === null;
737781
*
738782
* @since 6.4.0
739783
*
@@ -775,11 +819,13 @@ public function release_bookmark( $bookmark_name ) {
775819
}
776820

777821
/**
778-
* Moves the internal cursor in the Tag Processor to a given bookmark's location.
822+
* Moves the internal cursor in the HTML Processor to a given bookmark's location.
779823
*
780824
* In order to prevent accidental infinite loops, there's a
781825
* maximum limit on the number of times seek() can be called.
782826
*
827+
* @throws Exception When unable to allocate a bookmark for the next token in the input HTML document.
828+
*
783829
* @since 6.4.0
784830
*
785831
* @param string $bookmark_name Jump to the place in the document identified by this bookmark name.
@@ -1348,6 +1394,8 @@ public static function is_void( $tag_name ) {
13481394
*
13491395
* This unlock code is used to ensure that anyone calling the constructor is
13501396
* doing so with a full understanding that it's intended to be a private API.
1397+
*
1398+
* @access private
13511399
*/
13521400
const CONSTRUCTOR_UNLOCK_CODE = 'Use WP_HTML_Processor::createFragment instead of calling the class constructor directly.';
13531401
}

0 commit comments

Comments
 (0)