Skip to content

Commit 9b56c8b

Browse files
committed
Merge branch 'html-api/improve-active-element-reconstruction' into html-api/try-noahs-ark-of-3
2 parents 6045c24 + 3e8e06b commit 9b56c8b

6 files changed

Lines changed: 267 additions & 64 deletions

File tree

src/wp-includes/html-api/class-wp-html-active-formatting-elements.php

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,18 @@ class WP_HTML_Active_Formatting_Elements {
4343
*/
4444
private $stack = array();
4545

46+
/**
47+
* Returns the node at the given 1-offset index in the list of active formatting elements.
48+
*
49+
* @since 7.0.0
50+
*
51+
* @param int $index Number of nodes from the top node to return.
52+
* @return WP_HTML_Token|null Node at the given index in the stack, if one exists, otherwise null.
53+
*/
54+
public function at( $nth ) {
55+
return $this->stack[ $nth - 1 ];
56+
}
57+
4658
/**
4759
* Reports if a specific node is in the stack of active formatting elements.
4860
*
@@ -110,8 +122,9 @@ public function insert_marker(): void {
110122
* @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements
111123
*
112124
* @param WP_HTML_Token $token Push this node onto the stack.
125+
* @return bool Whether a node was pushed onto the stack of active formatting elements.
113126
*/
114-
public function push( WP_HTML_Token $token ) {
127+
public function push( WP_HTML_Token $token ): bool {
115128
/*
116129
* > If there are already three elements in the list of active formatting elements after the last marker,
117130
* > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and
@@ -120,11 +133,32 @@ public function push( WP_HTML_Token $token ) {
120133
* > created by the parser; two elements have the same attributes if all their parsed attributes can be
121134
* > paired such that the two attributes in each pair have identical names, namespaces, and values
122135
* > (the order of the attributes does not matter).
123-
*
124-
* @todo Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack.
125136
*/
137+
138+
if ( 'marker' !== $token->node_name ) {
139+
$existing_count = 0;
140+
foreach ( $this->walk_up() as $item ) {
141+
if ( 'marker' === $item->node_name ) {
142+
break;
143+
}
144+
145+
if (
146+
$item->node_name === $token->node_name &&
147+
$item->namespace === $token->namespace
148+
// @todo Compare attributes. For now, bail if there are three matching tag names + namespaces.
149+
) {
150+
++$existing_count;
151+
if ( $existing_count >= 3 ) {
152+
// @todo Implement removing the earliest element and moving forward.
153+
return false;
154+
}
155+
}
156+
}
157+
}
158+
126159
// > Add element to the list of active formatting elements.
127160
$this->stack[] = $token;
161+
return true;
128162
}
129163

130164
/**

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 154 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,18 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
256256
*/
257257
private $context_node = null;
258258

259+
/**
260+
* If a formatting element has been reconstructed, this will hold
261+
* the parsed attributes from the original format, once requested.
262+
*
263+
* These attributes are not modifiable.
264+
*
265+
* @since 7.0.0
266+
*
267+
* @var array
268+
*/
269+
protected $actively_reconstructed_formatting_attributes = array();
270+
259271
/*
260272
* Public Interface Functions
261273
*/
@@ -2766,7 +2778,10 @@ private function step_in_body(): bool {
27662778

27672779
$this->reconstruct_active_formatting_elements();
27682780
$this->insert_html_element( $this->state->current_token );
2769-
$this->state->active_formatting_elements->push( $this->state->current_token );
2781+
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
2782+
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
2783+
}
2784+
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
27702785
return true;
27712786

27722787
/*
@@ -2787,7 +2802,10 @@ private function step_in_body(): bool {
27872802
case '+U':
27882803
$this->reconstruct_active_formatting_elements();
27892804
$this->insert_html_element( $this->state->current_token );
2790-
$this->state->active_formatting_elements->push( $this->state->current_token );
2805+
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
2806+
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
2807+
}
2808+
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
27912809
return true;
27922810

27932811
/*
@@ -2803,7 +2821,10 @@ private function step_in_body(): bool {
28032821
}
28042822

28052823
$this->insert_html_element( $this->state->current_token );
2806-
$this->state->active_formatting_elements->push( $this->state->current_token );
2824+
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
2825+
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
2826+
}
2827+
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
28072828
return true;
28082829

28092830
/*
@@ -5284,7 +5305,46 @@ public function get_token_type(): ?string {
52845305
* @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
52855306
*/
52865307
public function get_attribute( $name ) {
5287-
return $this->is_virtual() ? null : parent::get_attribute( $name );
5308+
if ( $this->is_virtual() ) {
5309+
$virtual_attributes = $this->actively_reconstructed_formatting_attributes[ $this->current_element->token->bookmark_name ?? '' ] ?? null;
5310+
if ( null === $virtual_attributes ) {
5311+
return null;
5312+
}
5313+
5314+
$current_attributes = $this->attributes;
5315+
$current_updates = $this->lexical_updates;
5316+
$this->lexical_updates = array();
5317+
$this->attributes = $virtual_attributes;
5318+
$parser_state = $this->parser_state;
5319+
$this->parser_state = WP_HTML_Tag_Processor::STATE_MATCHED_TAG;
5320+
$attribute_names = parent::get_attribute( $name );
5321+
$this->attributes = $current_attributes;
5322+
$this->parser_state = $parser_state;
5323+
$this->lexical_updates = $current_updates;
5324+
5325+
return $attribute_names;
5326+
}
5327+
5328+
return parent::get_attribute( $name );
5329+
}
5330+
5331+
/**
5332+
* Returns the adjusted attribute name for a given attribute, taking into
5333+
* account the current parsing context, whether HTML, SVG, or MathML.
5334+
*
5335+
* @since 7.0.0 Subclassed for the HTML Processor.
5336+
*
5337+
* @param string $attribute_name Which attribute name to adjust.
5338+
*
5339+
* @return string|null The qualified attribute name or null if not on matched tag.
5340+
*/
5341+
public function get_qualified_attribute_name( $attribute_name ): ?string {
5342+
if ( $this->is_virtual() ) {
5343+
$namespace = $this->current_element->token->namespace;
5344+
return self::lookup_qualified_attribute_name( $namespace, $attribute_name );
5345+
}
5346+
5347+
return parent::get_qualified_attribute_name( $attribute_name );
52885348
}
52895349

52905350
/**
@@ -5362,7 +5422,24 @@ public function remove_attribute( $name ): bool {
53625422
* @return array|null List of attribute names, or `null` when no tag opener is matched.
53635423
*/
53645424
public function get_attribute_names_with_prefix( $prefix ): ?array {
5365-
return $this->is_virtual() ? null : parent::get_attribute_names_with_prefix( $prefix );
5425+
if ( $this->is_virtual() ) {
5426+
$virtual_attributes = $this->actively_reconstructed_formatting_attributes[ $this->current_element->token->bookmark_name ?? '' ] ?? null;
5427+
if ( null === $virtual_attributes ) {
5428+
return null;
5429+
}
5430+
5431+
$current_attributes = $this->attributes;
5432+
$this->attributes = $virtual_attributes;
5433+
$parser_state = $this->parser_state;
5434+
$this->parser_state = WP_HTML_Tag_Processor::STATE_MATCHED_TAG;
5435+
$attribute_names = parent::get_attribute_names_with_prefix( $prefix );
5436+
$this->attributes = $current_attributes;
5437+
$this->parser_state = $parser_state;
5438+
5439+
return $attribute_names;
5440+
}
5441+
5442+
return parent::get_attribute_names_with_prefix( $prefix );
53665443
}
53675444

53685445
/**
@@ -5865,6 +5942,7 @@ private function get_adjusted_current_node(): ?WP_HTML_Token {
58655942
* > been explicitly closed.
58665943
*
58675944
* @since 6.4.0
5945+
* @since 7.0.0 Added additional support.
58685946
*
58695947
* @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input.
58705948
*
@@ -5873,34 +5951,89 @@ private function get_adjusted_current_node(): ?WP_HTML_Token {
58735951
* @return bool Whether any formatting elements needed to be reconstructed.
58745952
*/
58755953
private function reconstruct_active_formatting_elements(): bool {
5954+
$count = $this->state->active_formatting_elements->count();
58765955
/*
5877-
* > If there are no entries in the list of active formatting elements, then there is nothing
5878-
* > to reconstruct; stop this algorithm.
5956+
* > 1. If there are no entries in the list of active formatting elements,
5957+
* > then there is nothing to reconstruct; stop this algorithm.
58795958
*/
5880-
if ( 0 === $this->state->active_formatting_elements->count() ) {
5959+
if ( 0 === $count ) {
58815960
return false;
58825961
}
58835962

5884-
$last_entry = $this->state->active_formatting_elements->current_node();
5963+
$currently_at = $count;
5964+
$last_entry = $this->state->active_formatting_elements->at( $currently_at );
5965+
/*
5966+
* > 2. If the last (most recently added) entry in the list of active formatting
5967+
* > elements is a marker, or if it is an element that is in the stack of open
5968+
* > elements, then there is nothing to reconstruct; stop this algorithm.
5969+
*/
58855970
if (
5886-
5887-
/*
5888-
* > If the last (most recently added) entry in the list of active formatting elements is a marker;
5889-
* > stop this algorithm.
5890-
*/
58915971
'marker' === $last_entry->node_name ||
5892-
5893-
/*
5894-
* > If the last (most recently added) entry in the list of active formatting elements is an
5895-
* > element that is in the stack of open elements, then there is nothing to reconstruct;
5896-
* > stop this algorithm.
5897-
*/
58985972
$this->state->stack_of_open_elements->contains_node( $last_entry )
58995973
) {
59005974
return false;
59015975
}
59025976

5903-
$this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' );
5977+
/*
5978+
* > 3. Let entry be the last (most recently added) element
5979+
* > in the list of active formatting elements.
5980+
*/
5981+
$entry = $last_entry;
5982+
5983+
/*
5984+
* > 4. Rewind: If there are no entries before entry in the list of active
5985+
* > formatting elements, then jump to the step labeled create.
5986+
*/
5987+
rewind:
5988+
if ( 1 === $currently_at ) {
5989+
goto create;
5990+
}
5991+
5992+
/*
5993+
* > 5. Let entry be the entry one earlier than entry
5994+
* > in the list of active formatting elements.
5995+
*/
5996+
$entry = $this->state->active_formatting_elements->at( --$currently_at );
5997+
5998+
/*
5999+
* > 6. If entry is neither a marker nor an element that is also in
6000+
* > the stack of open elements, go to the step labeled rewind.
6001+
*/
6002+
if (
6003+
'marker' !== $entry->node_name &&
6004+
! $this->state->stack_of_open_elements->contains_node( $entry )
6005+
) {
6006+
goto rewind;
6007+
}
6008+
6009+
/*
6010+
* > 7. Advance: Let entry be the element one later than entry
6011+
* > in the list of active formatting elements.
6012+
*/
6013+
advance:
6014+
$entry = $this->state->active_formatting_elements->at( ++$currently_at );
6015+
6016+
/*
6017+
* > 8. Create: Insert an HTML element for the token for which the
6018+
* > element entry was created, to obtain new element.
6019+
*/
6020+
create:
6021+
$this->insert_html_element( $entry );
6022+
6023+
/*
6024+
* > 9. Replace the entry for _entry_ in the list with an entry for new element.
6025+
* > This doesn't need to happen here since no DOM is being created.
6026+
*/
6027+
6028+
/*
6029+
* > 10. If the entry for new element in the list of active formatting elements
6030+
* > is not the last entry in the list, return to the step labeled advance.
6031+
*/
6032+
if ( $count !== $currently_at ) {
6033+
goto advance;
6034+
}
6035+
6036+
return true;
59046037
}
59056038

59066039
/**

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,7 @@ class WP_HTML_Tag_Processor {
708708
* @since 6.2.0
709709
* @var WP_HTML_Attribute_Token[]
710710
*/
711-
private $attributes = array();
711+
protected $attributes = array();
712712

713713
/**
714714
* Tracks spans of duplicate attributes on a given tag, used for removing
@@ -3051,23 +3051,37 @@ public function get_qualified_tag_name(): ?string {
30513051
*
30523052
* @since 6.7.0
30533053
*
3054-
* @param string $attribute_name Which attribute to adjust.
3054+
* @param string $attribute_name Which attribute name to adjust.
30553055
*
3056-
* @return string|null
3056+
* @return string|null The qualified attribute name or null if not on matched tag.
30573057
*/
30583058
public function get_qualified_attribute_name( $attribute_name ): ?string {
30593059
if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
30603060
return null;
30613061
}
3062+
$namespace = $this->get_namespace();
3063+
return self::lookup_qualified_attribute_name( $namespace, $attribute_name );
3064+
}
30623065

3063-
$namespace = $this->get_namespace();
3066+
/**
3067+
* Returns the adjusted attribute name for a given attribute, taking into
3068+
* account the provided namespace.
3069+
*
3070+
* @since 7.0.0
3071+
*
3072+
* @param string $ns The namespace to use: 'html', 'svg', or 'math'.
3073+
* @param string $attribute_name Which attribute to adjust.
3074+
*
3075+
* @return string The qualified attribute name.
3076+
*/
3077+
final protected static function lookup_qualified_attribute_name( string $ns, string $attribute_name ): string {
30643078
$lower_name = strtolower( $attribute_name );
30653079

3066-
if ( 'math' === $namespace && 'definitionurl' === $lower_name ) {
3080+
if ( 'math' === $ns && 'definitionurl' === $lower_name ) {
30673081
return 'definitionURL';
30683082
}
30693083

3070-
if ( 'svg' === $this->get_namespace() ) {
3084+
if ( 'svg' === $ns ) {
30713085
switch ( $lower_name ) {
30723086
case 'attributename':
30733087
return 'attributeName';
@@ -3245,7 +3259,7 @@ public function get_qualified_attribute_name( $attribute_name ): ?string {
32453259
}
32463260
}
32473261

3248-
if ( 'html' !== $namespace ) {
3262+
if ( 'html' !== $ns ) {
32493263
switch ( $lower_name ) {
32503264
case 'xlink:actuate':
32513265
return 'xlink actuate';

tests/phpunit/tests/html-api/wpHtmlProcessor.php

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,18 +185,23 @@ public function test_clear_to_navigate_after_seeking() {
185185
}
186186

187187
/**
188-
* Ensures that support is added for reconstructing active formatting elements
189-
* before the HTML Processor handles situations with unclosed formats requiring it.
188+
* Ensures that support is added for reconstructing active formatting elements.
190189
*
191190
* @ticket 58517
192191
*
193192
* @covers WP_HTML_Processor::reconstruct_active_formatting_elements
194193
*/
195-
public function test_fails_to_reconstruct_formatting_elements() {
196-
$processor = WP_HTML_Processor::create_fragment( '<p><em>One<p><em>Two<p><em>Three<p><em>Four' );
194+
public function test_reconstructs_formatting_elements() {
195+
$processor = WP_HTML_Processor::create_fragment( '<p><em>One<p><em><span>Two<p><em>Three<p><em>Four' );
197196

198197
$this->assertTrue( $processor->next_tag( 'EM' ), 'Could not find first EM.' );
199-
$this->assertFalse( $processor->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
198+
$this->assertSame( array( 'HTML', 'BODY', 'P', 'EM' ), $processor->get_breadcrumbs(), 'Found incorrect breadcrumbs for first EM.' );
199+
$this->assertTrue( $processor->next_tag( 'SPAN' ), 'Could not find test span.' );
200+
$this->assertSame(
201+
array( 'HTML', 'BODY', 'P', 'EM', 'EM', 'SPAN' ),
202+
$processor->get_breadcrumbs(),
203+
'Found incorrect breadcrumbs for test SPAN; should have created two EMs.'
204+
);
200205
}
201206

202207
/**

0 commit comments

Comments
 (0)