Skip to content

Commit 7d781a8

Browse files
committed
HTML-API: Introduce minimal HTML Processor.
1 parent 12f854c commit 7d781a8

11 files changed

Lines changed: 2455 additions & 2 deletions
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
<?php
2+
/**
3+
* HTML API: WP_HTML_Active_Formatting_Elements class
4+
*
5+
* @package WordPress
6+
* @subpackage HTML-API
7+
* @since {WP_VERSION}
8+
*/
9+
10+
/**
11+
* Core class used by the HTML processor during HTML parsing
12+
* for managing the stack of active formatting elements.
13+
*
14+
* This class is designed for internal use by the HTML processor.
15+
*
16+
* > Initially, the list of active formatting elements is empty.
17+
* > It is used to handle mis-nested formatting element tags.
18+
* >
19+
* > The list contains elements in the formatting category, and markers.
20+
* > The markers are inserted when entering applet, object, marquee,
21+
* > template, td, th, and caption elements, and are used to prevent
22+
* > formatting from "leaking" into applet, object, marquee, template,
23+
* > td, th, and caption elements.
24+
* >
25+
* > In addition, each element in the list of active formatting elements
26+
* > is associated with the token for which it was created, so that
27+
* > further elements can be created for that token if necessary.
28+
*
29+
* @since {WP_VERSION}
30+
*
31+
* @access private
32+
*
33+
* @see https://html.spec.whatwg.org/#list-of-active-formatting-elements
34+
* @see WP_HTML_Processor
35+
*/
36+
class WP_HTML_Active_Formatting_Elements {
37+
/**
38+
* Holds the stack of active formatting element references.
39+
*
40+
* @since {WP_VERSION}
41+
*
42+
* @var WP_HTML_Token[]
43+
*/
44+
private $stack = array();
45+
46+
/**
47+
* Reports if a specific node is in the stack of active formatting elements.
48+
*
49+
* @since {WP_VERSION}
50+
*
51+
* @param WP_HTML_Token $token Look for this node in the stack.
52+
* @return bool Whether the referenced node is in the stack of active formatting elements.
53+
*
54+
*/
55+
public function contains_node( $token ) {
56+
foreach ( $this->walk_up() as $item ) {
57+
if ( $token->bookmark_name === $item->bookmark_name ) {
58+
return true;
59+
}
60+
}
61+
62+
return false;
63+
}
64+
65+
/**
66+
* Returns how many nodes are currently in the stack of active formatting elements.
67+
*
68+
* @since {WP_VERSION}
69+
*
70+
* @return int How many node are in the stack of active formatting elements.
71+
*/
72+
public function count() {
73+
return count( $this->stack );
74+
}
75+
76+
/**
77+
* Returns the node at the end of the stack of active formatting elements,
78+
* if one exists. If the stack is empty, returns null.
79+
*
80+
* @since {WP_VERSION}
81+
*
82+
* @return WP_HTML_Token|null Last node in the stack of active formatting elements, if one exists, otherwise null.
83+
*/
84+
public function current_node() {
85+
$current_node = end( $this->stack );
86+
87+
return $current_node ? $current_node : null;
88+
}
89+
90+
/**
91+
* Pushes a node onto the stack of active formatting elements.
92+
*
93+
* @since {WP_VERSION}
94+
*
95+
* @see https://html.spec.whatwg.org/#push-onto-the-list-of-active-formatting-elements
96+
*
97+
* @param WP_HTML_Token $token Push this node onto the stack.
98+
*/
99+
public function push( $token ) {
100+
/*
101+
* > If there are already three elements in the list of active formatting elements after the last marker,
102+
* > if any, or anywhere in the list if there are no markers, that have the same tag name, namespace, and
103+
* > attributes as element, then remove the earliest such element from the list of active formatting
104+
* > elements. For these purposes, the attributes must be compared as they were when the elements were
105+
* > created by the parser; two elements have the same attributes if all their parsed attributes can be
106+
* > paired such that the two attributes in each pair have identical names, namespaces, and values
107+
* > (the order of the attributes does not matter).
108+
*
109+
* @TODO: Implement the "Noah's Ark clause" to only add up to three of any given kind of formatting elements to the stack.
110+
*/
111+
112+
// > Add element to the list of active formatting elements.
113+
$this->stack[] = $token;
114+
}
115+
116+
/**
117+
* Removes a node from the stack of active formatting elements.
118+
*
119+
* @since {WP_VERSION}
120+
*
121+
* @param WP_HTML_Token $token Remove this node from the stack, if it's there already.
122+
*/
123+
public function remove_node( $token ) {
124+
foreach ( $this->walk_up() as $position => $item ) {
125+
if ( $token->bookmark_name === $item->bookmark_name ) {
126+
array_splice( $this->stack, $this->count() - $position - 1, 1 );
127+
return;
128+
}
129+
}
130+
}
131+
132+
/**
133+
* Step through the stack of active formatting elements, starting with the
134+
* top element (added first) and walking downwards to the one added last.
135+
*
136+
* This generator function is designed to be used inside a "foreach" loop.
137+
*
138+
* Example:
139+
*
140+
* $html = '<em><strong><a>We are here';
141+
* foreach ( $stack->walk_down() as $node ) {
142+
* echo "{$node->node_name} -> ";
143+
* }
144+
* > EM -> STRONG -> A ->
145+
*
146+
* To start with the most-recently added element and walk towards the top,
147+
* @see WP_HTML_Active_Formatting_Elements::walk_up
148+
*
149+
* @since {WP_VERSION}
150+
*/
151+
public function walk_down() {
152+
$count = count( $this->stack );
153+
154+
for ( $i = 0; $i < $count; $i++ ) {
155+
yield $this->stack[ $i ];
156+
}
157+
}
158+
159+
/**
160+
* Step through the stack of active formatting elements, starting with the
161+
* bottom element (added last) and walking upwards to the one added first.
162+
*
163+
* This generator function is designed to be used inside a "foreach" loop.
164+
*
165+
* Example:
166+
*
167+
* $html = '<em><strong><a>We are here';
168+
* foreach ( $stack->walk_up() as $node ) {
169+
* echo "{$node->node_name} -> ";
170+
* }
171+
* > A -> STRONG -> EM ->
172+
*
173+
* To start with the first added element and walk towards the bottom,
174+
* @see WP_HTML_Active_Formatting_Elements::walk_down
175+
*
176+
* @since {WP_VERSION}
177+
*/
178+
public function walk_up() {
179+
for ( $i = count( $this->stack ) - 1; $i >= 0; $i-- ) {
180+
yield $this->stack[ $i ];
181+
}
182+
}
183+
}

0 commit comments

Comments
 (0)