|
249 | 249 | * |
250 | 250 | * ## Tokens and finer-grained processing. |
251 | 251 | * |
252 | | - * It's also possible to scan through every lexical token in |
253 | | - * the HTML document using the `next_token()` function. This |
| 252 | + * It's possible to scan through every lexical token in the |
| 253 | + * HTML document using the `next_token()` function. This |
254 | 254 | * alternative form takes no argument and provides no built-in |
255 | 255 | * query syntax. |
256 | 256 | * |
257 | 257 | * Example: |
258 | 258 | * |
259 | | - * $title = '(untitled)'; |
260 | | - * $text_content = ''; |
| 259 | + * $title = '(untitled)'; |
| 260 | + * $text = ''; |
261 | 261 | * while ( $processor->next_token() ) { |
262 | 262 | * switch ( $processor->get_token_name() ) { |
263 | 263 | * case '#text': |
264 | | - * $text .= $processor->get_node_text(); |
| 264 | + * $text .= $processor->get_modifiable_text(); |
265 | 265 | * break; |
266 | 266 | * |
267 | 267 | * case 'BR': |
268 | 268 | * $text .= "\n"; |
269 | 269 | * break; |
270 | 270 | * |
271 | 271 | * case 'TITLE': |
272 | | - * $title = $processor->get_node_text(); |
| 272 | + * $title = $processor->get_modifiable_text(); |
273 | 273 | * break; |
274 | 274 | * } |
275 | 275 | * } |
276 | | - * return trim( "# {$title}\n\n{$text_content}\n" ); |
| 276 | + * return trim( "# {$title}\n\n{$text}" ); |
277 | 277 | * |
278 | 278 | * ### Tokens and _modifiable text_. |
279 | 279 | * |
|
301 | 301 | * style of including Javascript inside of HTML comments to avoid accidentally |
302 | 302 | * closing the SCRIPT from inside a Javascript string. E.g. `console.log( '</script>' )`. |
303 | 303 | * - `TITLE` and `TEXTAREA` whose contents are treated as plaintext and then any |
304 | | - * character references are decoded. E.g. "1 &lt; 2 < 3" becomes "1 < 2 < 3". |
| 304 | + * character references are decoded. E.g. `1 < 2 < 3` becomes `1 < 2 < 3`. |
305 | 305 | * - `IFRAME`, `NOSCRIPT`, `NOEMBED`, `NOFRAME`, `STYLE` whose contents are treated as |
306 | | - * raw plaintext and left as-is. E.g. "1 &lt; 2 < 3" remains "1 &lt; 2 < 3". |
| 306 | + * raw plaintext and left as-is. E.g. `1 < 2 < 3` remains `1 < 2 < 3`. |
307 | 307 | * |
308 | 308 | * #### Other tokens with modifiable text. |
309 | 309 | * |
@@ -909,9 +909,14 @@ public function next_token() { |
909 | 909 |
|
910 | 910 | $tag_name = $this->get_tag(); |
911 | 911 | if ( |
| 912 | + // Skips SCRIPT data. |
912 | 913 | 'SCRIPT' !== $tag_name && |
| 914 | + |
| 915 | + // Skips RCDATA data. |
913 | 916 | 'TEXTAREA' !== $tag_name && |
914 | 917 | 'TITLE' !== $tag_name && |
| 918 | + |
| 919 | + // Skips RAWTEXT data. |
915 | 920 | 'IFRAME' !== $tag_name && |
916 | 921 | 'NOEMBED' !== $tag_name && |
917 | 922 | 'NOFRAMES' !== $tag_name && |
|
0 commit comments