|
| 1 | +<?php |
| 2 | + |
| 3 | +namespace Clue\React\Utf8; |
| 4 | + |
| 5 | +use Evenement\EventEmitter; |
| 6 | +use React\Stream\ReadableStreamInterface; |
| 7 | +use React\Stream\WritableStreamInterface; |
| 8 | +use React\Stream\Util; |
| 9 | + |
| 10 | +/** |
| 11 | + * forwards only complete UTF-8 sequences |
| 12 | + */ |
| 13 | +class Sequencer extends EventEmitter implements ReadableStreamInterface |
| 14 | +{ |
| 15 | + private $buffer = ''; |
| 16 | + private $expect = 0; |
| 17 | + private $closed = false; |
| 18 | + |
| 19 | + private $input; |
| 20 | + private $invalid; |
| 21 | + |
| 22 | + public function __construct(ReadableStreamInterface $input, $replacementCharacter = '?') |
| 23 | + { |
| 24 | + $this->input = $input; |
| 25 | + $this->invalid = $replacementCharacter; |
| 26 | + |
| 27 | + if (!$input->isReadable()) { |
| 28 | + return $this->close(); |
| 29 | + } |
| 30 | + |
| 31 | + $this->input->on('data', array($this, 'handleData')); |
| 32 | + $this->input->on('end', array($this, 'handleEnd')); |
| 33 | + $this->input->on('error', array($this, 'handleError')); |
| 34 | + $this->input->on('close', array($this, 'close')); |
| 35 | + } |
| 36 | + |
| 37 | + /** @internal */ |
| 38 | + public function handleData($data) |
| 39 | + { |
| 40 | + $this->buffer .= $data; |
| 41 | + $len = strlen($this->buffer); |
| 42 | + |
| 43 | + $sequence = ''; |
| 44 | + $expect = 0; |
| 45 | + $out = ''; |
| 46 | + |
| 47 | + for ($i = 0; $i < $len; ++$i) { |
| 48 | + $char = $this->buffer[$i]; |
| 49 | + $code = ord($char); |
| 50 | + |
| 51 | + if ($code & 128) { |
| 52 | + // multi-byte sequence |
| 53 | + if ($code & 64) { |
| 54 | + // this is the start of a sequence |
| 55 | + |
| 56 | + // unexpected start of sequence because already within sequence |
| 57 | + if ($expect !== 0) { |
| 58 | + $out .= str_repeat($this->invalid, strlen($sequence)); |
| 59 | + $sequence = ''; |
| 60 | + } |
| 61 | + |
| 62 | + $sequence = $char; |
| 63 | + $expect = 2; |
| 64 | + |
| 65 | + if ($code & 32) { |
| 66 | + ++$expect; |
| 67 | + if ($code & 16) { |
| 68 | + ++$expect; |
| 69 | + |
| 70 | + if ($code & 8) { |
| 71 | + // invalid sequence start length |
| 72 | + $out .= $this->invalid; |
| 73 | + $sequence = ''; |
| 74 | + $expect = 0; |
| 75 | + } |
| 76 | + } |
| 77 | + } |
| 78 | + } else { |
| 79 | + // this is a follow-up byte in a sequence |
| 80 | + if ($expect === 0) { |
| 81 | + // we're not within a sequence in first place |
| 82 | + $out .= $this->invalid; |
| 83 | + } else { |
| 84 | + // valid following byte in sequence |
| 85 | + $sequence .= $char; |
| 86 | + |
| 87 | + // sequence reached expected length => add to output |
| 88 | + if (strlen($sequence) === $expect) { |
| 89 | + $out .= $sequence; |
| 90 | + $sequence = ''; |
| 91 | + $expect = 0; |
| 92 | + } |
| 93 | + } |
| 94 | + } |
| 95 | + } else { |
| 96 | + // simple ASCII character found |
| 97 | + |
| 98 | + // unexpected because already within sequence |
| 99 | + if ($expect !== 0) { |
| 100 | + $out .= str_repeat($this->invalid, strlen($sequence)); |
| 101 | + $sequence = ''; |
| 102 | + $expect = 0; |
| 103 | + } |
| 104 | + |
| 105 | + $out .= $char; |
| 106 | + } |
| 107 | + } |
| 108 | + |
| 109 | + if ($out !== '') { |
| 110 | + $this->buffer = substr($this->buffer, strlen($out)); |
| 111 | + |
| 112 | + $this->emit('data', array($out)); |
| 113 | + } |
| 114 | + } |
| 115 | + |
| 116 | + /** @internal */ |
| 117 | + public function handleEnd() |
| 118 | + { |
| 119 | + if ($this->buffer !== '' && $this->invalid !== '') { |
| 120 | + $data = str_repeat($this->invalid, strlen($this->buffer)); |
| 121 | + $this->buffer = ''; |
| 122 | + |
| 123 | + $this->emit('data', array($data)); |
| 124 | + } |
| 125 | + |
| 126 | + $this->emit('end', array()); |
| 127 | + } |
| 128 | + |
| 129 | + /** @internal */ |
| 130 | + public function handleError(\Exception $error) |
| 131 | + { |
| 132 | + $this->emit('error', array($error)); |
| 133 | + $this->close(); |
| 134 | + } |
| 135 | + |
| 136 | + public function isReadable() |
| 137 | + { |
| 138 | + return !$this->closed && $this->input->isReadable(); |
| 139 | + } |
| 140 | + |
| 141 | + public function close() |
| 142 | + { |
| 143 | + if ($this->closed) { |
| 144 | + return; |
| 145 | + } |
| 146 | + |
| 147 | + $this->closed = true; |
| 148 | + |
| 149 | + $this->input->close(); |
| 150 | + |
| 151 | + $this->emit('close', array()); |
| 152 | + } |
| 153 | + |
| 154 | + public function pause() |
| 155 | + { |
| 156 | + $this->input->pause(); |
| 157 | + } |
| 158 | + |
| 159 | + public function resume() |
| 160 | + { |
| 161 | + $this->input->resume(); |
| 162 | + } |
| 163 | + |
| 164 | + public function pipe(WritableStreamInterface $dest, array $options = array()) |
| 165 | + { |
| 166 | + Util::pipe($this, $dest, $options); |
| 167 | + |
| 168 | + return $dest; |
| 169 | + } |
| 170 | +} |
0 commit comments