Skip to content

Commit 216bcfc

Browse files
committed
HTML-API: Introduce minimal HTML Processor.
1 parent 530fd02 commit 216bcfc

10 files changed

Lines changed: 2023 additions & 2 deletions
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
<?php
2+
/**
3+
* HTML API: WP_HTML_Active_Formatting_Elements class
4+
*
5+
* @package WordPress
6+
* @subpackage HTML-API
7+
* @since {WP_VERSION}
8+
*/
9+
10+
/**
11+
* Core class used by the HTML processor during HTML parsing
12+
* for managing the stack of active formatting elements.
13+
*
14+
* This class is designed for internal use by the HTML processor.
15+
*
16+
* > Initially, the list of active formatting elements is empty.
17+
* > It is used to handle mis-nested formatting element tags.
18+
* >
19+
* > The list contains elements in the formatting category, and markers.
20+
* > The markers are inserted when entering applet, object, marquee,
21+
* > template, td, th, and caption elements, and are used to prevent
22+
* > formatting from "leaking" into applet, object, marquee, template,
23+
* > td, th, and caption elements.
24+
* >
25+
* > In addition, each element in the list of active formatting elements
26+
* > is associated with the token for which it was created, so that
27+
* > further elements can be created for that token if necessary.
28+
*
29+
* @since {WP_VERSION}
30+
*
31+
* @access private
32+
*
33+
* @see https://html.spec.whatwg.org/#list-of-active-formatting-elements
34+
* @see WP_HTML_Processor
35+
*/
36+
class WP_HTML_Active_Formatting_Elements {
37+
/**
38+
* Holds the stack of active formatting element references.
39+
*
40+
* @since {WP_VERSION}
41+
*
42+
* @var WP_HTML_Token[]
43+
*/
44+
private $stack = array();
45+
46+
/**
47+
* Reports if a specific node is in the stack of active formatting elements.
48+
*
49+
* @since {WP_VERSION}
50+
*
51+
* @param WP_HTML_Token $token Look for this node in the stack.
52+
* @return bool Whether the referenced node is in the stack of active formatting elements.
53+
*
54+
*/
55+
public function contains_node( $token ) {
56+
foreach ( $this->walk_up() as $item ) {
57+
if ( $token->bookmark_name === $item->bookmark_name ) {
58+
return true;
59+
}
60+
}
61+
62+
return false;
63+
}
64+
65+
/**
66+
* Returns how many nodes are currently in the stack of active formatting elements.
67+
*
68+
* @since {WP_VERSION}
69+
*
70+
* @return int How many node are in the stack of active formatting elements.
71+
*/
72+
public function count() {
73+
return count( $this->stack );
74+
}
75+
76+
/**
77+
* Returns the node at the end of the stack of active formatting elements,
78+
* if one exists. If the stack is empty, returns null.
79+
*
80+
* @since {WP_VERSION}
81+
*
82+
* @return WP_HTML_Token|null Last node in the stack of active formatting elements, if one exists, otherwise null.
83+
*/
84+
public function current_node() {
85+
$current_node = end( $this->stack );
86+
87+
return $current_node ? $current_node : null;
88+
}
89+
90+
/**
91+
* Pushes a node onto the stack of active formatting elements.
92+
*
93+
* @since {WP_VERSION}
94+
*
95+
* @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements
96+
*
97+
* @param WP_HTML_Token $token Push this node onto the stack.
98+
*/
99+
public function push( $token ) {
100+
defined( 'DEBUG' ) && printf( "Pushing \e[31m{$token->node_name}\e[m element onto Active stack at \e[35m{$token->bookmark_name}\e[m\n" );
101+
102+
/*
103+
* > If there are already three elements in the list of active formatting elements after the last marker,
104+
* > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and
105+
* > attributes as element, then remove the earliest such element from the list of active formatting
106+
* > elements. For these purposes, the attributes must be compared as they were when the elements were
107+
* > created by the parser; two elements have the same attributes if all their parsed attributes can be
108+
* > paired such that the two attributes in each pair have identical names, namespaces, and values
109+
* > (the order of the attributes does not matter).
110+
*
111+
* @TODO: Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack.
112+
*/
113+
114+
// > Add element to the list of active formatting elements.
115+
$this->stack[] = $token;
116+
}
117+
118+
/**
119+
* Removes a node from the stack of active formatting elements.
120+
*
121+
* @since {WP_VERSION}
122+
*
123+
* @param WP_HTML_Token $token Remove this node from the stack, if it's there already.
124+
*/
125+
public function remove_node( $token ) {
126+
foreach ( $this->walk_up() as $position => $item ) {
127+
if ( $token->bookmark_name === $item->bookmark_name ) {
128+
array_splice( $this->stack, $this->count() - $position - 1, 1 );
129+
return;
130+
}
131+
}
132+
}
133+
134+
/**
135+
* Step through the stack of active formatting elements, starting with the
136+
* top element (added first) and walking downwards to the one added last.
137+
*
138+
* This generator function is designed to be used inside a "foreach" loop.
139+
*
140+
* Example:
141+
*
142+
* $html = '<em><strong><a>We are here';
143+
* foreach ( $stack->walk_down() as $node ) {
144+
* echo "{$node->node_name} -> ";
145+
* }
146+
* > EM -> STRONG -> A ->
147+
*
148+
* To start with the most-recently added element and walk towards the top,
149+
* @see WP_HTML_Active_Formatting_Elements::walk_up
150+
*
151+
* @since {WP_VERSION}
152+
*/
153+
public function walk_down() {
154+
$count = count( $this->stack );
155+
156+
for ( $i = 0; $i < $count; $i++ ) {
157+
yield $this->stack[ $i ];
158+
}
159+
}
160+
161+
/**
162+
* Step through the stack of active formatting elements, starting with the
163+
* bottom element (added last) and walking upwards to the one added first.
164+
*
165+
* This generator function is designed to be used inside a "foreach" loop.
166+
*
167+
* Example:
168+
*
169+
* $html = '<em><strong><a>We are here';
170+
* foreach ( $stack->walk_up() as $node ) {
171+
* echo "{$node->node_name} -> ";
172+
* }
173+
* > A -> STRONG -> EM ->
174+
*
175+
* To start with the first added element and walk towards the bottom,
176+
* @see WP_HTML_Active_Formatting_Elements::walk_down
177+
*
178+
* @since {WP_VERSION}
179+
*/
180+
public function walk_up() {
181+
for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) {
182+
yield $this->stack[ $i ];
183+
}
184+
}
185+
}

0 commit comments

Comments
 (0)