Skip to content

Commit 11e895d

Browse files
committed
Try a hash-based Noah's Ark clause implementation
1 parent 849dbec commit 11e895d

2 files changed

Lines changed: 92 additions & 22 deletions

File tree

src/wp-includes/html-api/class-wp-html-active-formatting-elements.php

Lines changed: 83 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,17 @@ class WP_HTML_Active_Formatting_Elements {
4343
*/
4444
private $stack = array();
4545

46+
/**
47+
* Holds a stack of hashes representing uniquely representing the active formatting element.
48+
*
49+
* This is important to efficiently track and remove duplicate elements when pushing.
50+
*
51+
* @since 7.0.0
52+
*
53+
* @var string[]
54+
*/
55+
private $hash_stack = array();
56+
4657
/**
4758
* Returns the node at the given 1-offset index in the list of active formatting elements.
4859
*
@@ -111,7 +122,52 @@ public function current_node() {
111122
* @since 6.7.0
112123
*/
113124
public function insert_marker(): void {
114-
$this->push( new WP_HTML_Token( null, 'marker', false ) );
125+
$this->stack[] = new WP_HTML_Token( null, 'marker', false );
126+
$this->hash_stack[] = 'marker';
127+
}
128+
129+
/**
130+
* Generates a hash string for a given token, based on its
131+
* tag name, namespace, and attributes.
132+
*
133+
* @since 7.0.0
134+
*
135+
* @param WP_HTML_Token $token Token to generate a hash for.
136+
* @param string $token_html The original HTML of the token.
137+
* @return string Generated hash string.
138+
*/
139+
private function get_token_hash( WP_HTML_Token $token, string $token_html ): string {
140+
$processor = new WP_HTML_Tag_Processor( $token_html );
141+
$processor->change_parsing_namespace( $token->namespace );
142+
$processor->next_tag();
143+
144+
$node_name = $processor->get_qualified_tag_name();
145+
$hash_string = "{$token->namespace}::<{$node_name}";
146+
147+
$attribute_names = $processor->get_attribute_names_with_prefix( '' );
148+
if ( ! empty( $attribute_names ) ) {
149+
$attr_parts = [];
150+
sort( $attribute_names, SORT_STRING );
151+
foreach ( $attribute_names as $attribute_name ) {
152+
$display_name = $processor->get_qualified_attribute_name( $attribute_name );
153+
$val = $processor->get_attribute( $attribute_name );
154+
155+
/*
156+
* Attributes with no value are `true` with the HTML API,
157+
* We map use the empty string value in the tree structure.
158+
*/
159+
if ( true === $val ) {
160+
$val = '';
161+
}
162+
$val = strtr( $val, '"', '&quot;' );
163+
164+
$attr_parts[] = "{$display_name}=\"{$val}\"";
165+
}
166+
$hash_string .= ' ' . implode( ' ', $attr_parts );
167+
}
168+
$hash_string .= '>';
169+
170+
return dechex( crc32( $hash_string ) );
115171
}
116172

117173
/**
@@ -124,7 +180,7 @@ public function insert_marker(): void {
124180
* @param WP_HTML_Token $token Push this node onto the stack.
125181
* @return bool Whether a node was pushed onto the stack of active formatting elements.
126182
*/
127-
public function push( WP_HTML_Token $token ): bool {
183+
public function push( WP_HTML_Token $token, string $token_html ): bool {
128184
/*
129185
* > If there are already three elements in the list of active formatting elements after the last marker,
130186
* > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and
@@ -135,29 +191,35 @@ public function push( WP_HTML_Token $token ): bool {
135191
* > (the order of the attributes does not matter).
136192
*/
137193

138-
if ( 'marker' !== $token->node_name ) {
139-
$existing_count = 0;
140-
foreach ( $this->walk_up() as $item ) {
141-
if ( 'marker' === $item->node_name ) {
142-
break;
143-
}
194+
if ( 'marker' === $token->node_name ) {
195+
_doing_it_wrong(
196+
__METHOD__,
197+
'Markers must be added using the WP_HTML_Active_Formatting_Elements::insert_marker() method.',
198+
'7.0.0'
199+
);
200+
return false;
201+
}
144202

145-
if (
146-
$item->node_name === $token->node_name &&
147-
$item->namespace === $token->namespace
148-
// @todo Compare attributes. For now, bail if there are three matching tag names + namespaces.
149-
) {
150-
++$existing_count;
151-
if ( $existing_count >= 3 ) {
152-
// @todo Implement removing the earliest element and moving forward.
153-
return false;
154-
}
203+
$token_hash = $this->get_token_hash( $token, $token_html );
204+
$existing_count = 0;
205+
for ( $i = count( $this->hash_stack ) - 1; $i >= 0; $i-- ) {
206+
$item_hash = $this->hash_stack[ $i ];
207+
208+
if ( 'marker' === $item_hash ) {
209+
break;
210+
}
211+
212+
if ( $item_hash === $token_hash ) {
213+
if ( ++$existing_count >= 3 ) {
214+
$this->remove_node( $this->stack[ $i ] );
215+
break;
155216
}
156217
}
157218
}
158219

159220
// > Add element to the list of active formatting elements.
160-
$this->stack[] = $token;
221+
$this->stack[] = $token;
222+
$this->hash_stack[] = $token_hash;
161223
return true;
162224
}
163225

@@ -177,6 +239,7 @@ public function remove_node( WP_HTML_Token $token ) {
177239

178240
$position_from_start = $this->count() - $position_from_end - 1;
179241
array_splice( $this->stack, $position_from_start, 1 );
242+
array_splice( $this->hash_stack, $position_from_start, 1 );
180243
return true;
181244
}
182245

@@ -255,6 +318,7 @@ public function walk_up() {
255318
public function clear_up_to_last_marker(): void {
256319
foreach ( $this->walk_up() as $item ) {
257320
array_pop( $this->stack );
321+
array_pop( $this->hash_stack );
258322
if ( 'marker' === $item->node_name ) {
259323
break;
260324
}

src/wp-includes/html-api/class-wp-html-processor.php

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2778,7 +2778,9 @@ private function step_in_body(): bool {
27782778

27792779
$this->reconstruct_active_formatting_elements();
27802780
$this->insert_html_element( $this->state->current_token );
2781-
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
2781+
$bookmark = $this->bookmarks[ $this->state->current_token->bookmark_name ];
2782+
$token_html = substr( $this->html, $bookmark->start, $bookmark->length );
2783+
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token, $token_html ) ) {
27822784
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
27832785
}
27842786
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
@@ -2802,7 +2804,9 @@ private function step_in_body(): bool {
28022804
case '+U':
28032805
$this->reconstruct_active_formatting_elements();
28042806
$this->insert_html_element( $this->state->current_token );
2805-
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
2807+
$bookmark = $this->bookmarks[ $this->state->current_token->bookmark_name ];
2808+
$token_html = substr( $this->html, $bookmark->start, $bookmark->length );
2809+
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token, $token_html ) ) {
28062810
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
28072811
}
28082812
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;
@@ -2821,7 +2825,9 @@ private function step_in_body(): bool {
28212825
}
28222826

28232827
$this->insert_html_element( $this->state->current_token );
2824-
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token ) ) {
2828+
$bookmark = $this->bookmarks[ $this->state->current_token->bookmark_name ];
2829+
$token_html = substr( $this->html, $bookmark->start, $bookmark->length );
2830+
if ( false === $this->state->active_formatting_elements->push( $this->state->current_token, $token_html ) ) {
28252831
$this->bail( 'Cannot track formatting elements when encountering a fourth identical token.' );
28262832
}
28272833
$this->actively_reconstructed_formatting_attributes[ $this->state->current_token->bookmark_name ] = $this->attributes;

0 commit comments

Comments
 (0)