|
| 1 | +<?php |
| 2 | + |
| 3 | +require_once __DIR__ . '/src/wp-load.php'; |
| 4 | + |
| 5 | +define( 'C_TEXT', "\e[m" ); |
| 6 | +define( 'C_REF', "\e[38;2;154;110;58m" ); |
| 7 | +define( 'C_COMMENT', "\e[38;2;112;128;144m" ); |
| 8 | +define( 'C_SYNTAX', "\e[38;2;153;153;153m" ); |
| 9 | +define( 'C_TAGNAME', "\e[38;2;153;0;85m" ); |
| 10 | +define( 'C_ANAME', "\e[38;2;102;153;0m" ); |
| 11 | +define( 'C_AVALUE', "\e[38;2;0;119;170m" ); |
| 12 | + |
| 13 | +$options = getopt( 'fu:' ); // How rude! |
| 14 | +$do_format = isset( $options['f'] ); |
| 15 | + |
| 16 | +$uri = 'php://stdin'; |
| 17 | +if ( isset( $options['u'] ) ) { |
| 18 | + $uri = $options['u']; |
| 19 | + if ( ! preg_match( '~^https?://~', $uri ) ) { |
| 20 | + $uri = "https://{$uri}"; |
| 21 | + } |
| 22 | +} |
| 23 | + |
| 24 | +$html = file_get_contents( $uri ); |
| 25 | + |
| 26 | +$p = new class( $html ) extends WP_HTML_Processor { |
| 27 | + public function get_raw_token() { |
| 28 | + $this->set_bookmark('here'); |
| 29 | + $here = $this->bookmarks['_here']; |
| 30 | + return substr( $this->html, $here->start, $here->length ); |
| 31 | + } |
| 32 | +}; |
| 33 | + |
| 34 | +$p = $p::create_Full_parser( $html ); |
| 35 | + |
| 36 | +while ( $p->next_token() ) { |
| 37 | + switch ( $p->get_token_type() ) { |
| 38 | + case '#comment': |
| 39 | + echo C_COMMENT . '<!--' . $p->get_modifiable_text() . '-->'; |
| 40 | + break; |
| 41 | + |
| 42 | + case '#doctype': |
| 43 | + echo C_SYNTAX . '<!DOCTYPE' . $p->get_modifiable_text() . '>'; |
| 44 | + break; |
| 45 | + |
| 46 | + case '#tag': |
| 47 | + print_tag( $p ); |
| 48 | + break; |
| 49 | + |
| 50 | + case '#text': |
| 51 | + print_text( $p ); |
| 52 | + break; |
| 53 | + |
| 54 | + default: |
| 55 | + die( "Unsupported syntax: {$p->get_token_type()}" ); |
| 56 | + } |
| 57 | +} |
| 58 | + |
| 59 | +echo "\e[m\n"; |
| 60 | + |
| 61 | +function print_text( $p ) { |
| 62 | + $token_name = $p->get_token_name(); |
| 63 | + if ( in_array( $token_name, [ 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ], true ) ) { |
| 64 | + return C_TEXT . $p->get_modifiable_text(); |
| 65 | + } |
| 66 | + |
| 67 | + $raw_token = $p->get_raw_token(); |
| 68 | + $text = C_TEXT; |
| 69 | + $at = 0; |
| 70 | + $was_at = 0; |
| 71 | + while ( $at < strlen( $raw_token ) ) { |
| 72 | + $next_at = strpos( $raw_token, '&', $at ); |
| 73 | + if ( false === $next_at ) { |
| 74 | + break; |
| 75 | + } |
| 76 | + |
| 77 | + $replacement = WP_HTML_Decoder::read_character_reference( 'data', $raw_token, $next_at, $skip_bytes ); |
| 78 | + if ( isset( $replacement ) ) { |
| 79 | + $text .= substr( $raw_token, $was_at, $next_at - $was_at ) . C_REF . substr( $raw_token, $next_at, $skip_bytes ) . C_TEXT; |
| 80 | + $at = $next_at + $skip_bytes; |
| 81 | + $was_at = $at; |
| 82 | + continue; |
| 83 | + } |
| 84 | + |
| 85 | + ++$at; |
| 86 | + } |
| 87 | + if ( $was_at < strlen( $raw_token ) ) { |
| 88 | + $text .= substr( $raw_token, $was_at ); |
| 89 | + } |
| 90 | + echo C_TEXT . $text; |
| 91 | +} |
| 92 | + |
| 93 | +function print_tag( $p ) { |
| 94 | + global $do_format; |
| 95 | + |
| 96 | + static $depth = 0; |
| 97 | + |
| 98 | + $tag_name = $p->get_tag(); |
| 99 | + $is_closer = $p->is_tag_closer(); |
| 100 | + $closer = $is_closer ? '/' : ''; |
| 101 | + $is_void = WP_HTML_Processor::is_void( $tag_name ); |
| 102 | + $voider = $is_void ? '/' : ''; |
| 103 | + |
| 104 | + if ( $is_closer && in_array( $tag_name, [ 'HEAD', 'BODY', 'OL', 'UL', 'DIV' ], true ) ) { |
| 105 | + $depth--; |
| 106 | + } |
| 107 | + |
| 108 | + $indent = str_pad( '', $depth * 2, ' ' ); |
| 109 | + |
| 110 | + if ( $do_format && ( |
| 111 | + ( |
| 112 | + ! $is_closer && in_array( $tag_name, [ |
| 113 | + 'DIV', 'P', 'UL', 'OL', 'DETAILS', 'SVG', 'PATH', 'G', |
| 114 | + 'LINK', 'META', 'HTML', 'HEAD', 'BODY', 'TITLE', 'TEXTAREA', |
| 115 | + 'PRE', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HGROUP', |
| 116 | + 'PICTURE', 'SOURCE', 'FIGURE', 'FORM', 'TABLE', 'TR', |
| 117 | + 'FIGCAPTION', 'BLOCKQUOTE', 'OBJECT', 'EMBED', 'IFRAME', |
| 118 | + 'SCRIPT', 'STYLE', 'NOSCRIPT', 'NAV', 'LI' |
| 119 | + ], true ) |
| 120 | + ) || ( |
| 121 | + $is_closer && in_array( $tag_name, [ |
| 122 | + 'HEAD', 'HTML', 'BODY', 'PICTURE', 'FIGURE', 'TABLE' |
| 123 | + ], true ) |
| 124 | + ) |
| 125 | + ) ) { |
| 126 | + echo "\n{$indent}"; |
| 127 | + } |
| 128 | + echo C_SYNTAX . '<' . $closer; |
| 129 | + |
| 130 | + echo C_TAGNAME . strtolower( $p->get_tag() ); |
| 131 | + $attributes = $p->get_attribute_names_with_prefix( '' ) ?? array(); |
| 132 | + |
| 133 | + foreach( $attributes as $name ) { |
| 134 | + $value = $p->get_attribute( $name ); |
| 135 | + |
| 136 | + echo ' ' . C_ANAME . $name; |
| 137 | + if ( true === $value ) { |
| 138 | + continue; |
| 139 | + } |
| 140 | + |
| 141 | + echo C_SYNTAX . '="'; |
| 142 | + echo C_AVALUE . str_replace( '"', '"', $value ); |
| 143 | + echo C_SYNTAX . '"'; |
| 144 | + } |
| 145 | + echo C_SYNTAX . '>'; |
| 146 | + |
| 147 | + $text = $p->get_modifiable_text(); |
| 148 | + if ( ! empty( $text ) ) { |
| 149 | + echo 'TITLE' === $p->get_tag() ? C_TEXT : C_COMMENT; |
| 150 | + |
| 151 | + $add_newlines = ( |
| 152 | + $do_format && |
| 153 | + strlen( trim( $text ) ) > 0 && |
| 154 | + ( |
| 155 | + 'SCRIPT' === $tag_name || |
| 156 | + 'STYLE' === $tag_name || |
| 157 | + 'TEXTAREA' === $tag_name || |
| 158 | + 'PRE' === $tag_name |
| 159 | + ) |
| 160 | + ); |
| 161 | + |
| 162 | + if ( $add_newlines ) { |
| 163 | + echo "\n" . trim( $text, "\n" ) . "\n"; |
| 164 | + } else { |
| 165 | + echo $text; |
| 166 | + } |
| 167 | + |
| 168 | + echo C_SYNTAX . '</' . C_TAGNAME . strtolower( $p->get_tag() ) . C_SYNTAX . '>'; |
| 169 | + } elseif ( in_array( $tag_name, [ 'SCRIPT', 'STYLE', 'TEXTAREA', 'PRE' ], true ) ) { |
| 170 | + echo C_SYNTAX . '</' . C_TAGNAME . strtolower( $p->get_tag() ) . C_SYNTAX . '>'; |
| 171 | + } |
| 172 | + |
| 173 | + if ( ! $is_closer && in_array( $tag_name, [ 'HEAD', 'BODY', 'OL', 'UL', 'DIV' ], true ) ) { |
| 174 | + $depth++; |
| 175 | + } |
| 176 | +} |
0 commit comments