|
19 | 19 | import os |
20 | 20 | import sys |
21 | 21 | import re |
| 22 | +import codecs |
22 | 23 | import contextlib |
23 | 24 | import subprocess |
24 | 25 | import signal |
@@ -116,6 +117,12 @@ def __init__(self, path, tty=True, env=None, args=()): |
116 | 117 | env = {} |
117 | 118 | self.env = env |
118 | 119 | self.readbuf = '' |
| 120 | + # os.read()/pipe reads can split a multi-byte UTF-8 sequence across two |
| 121 | + # reads (the boundary depends on terminal/readline echo flushing, which |
| 122 | + # changed with the Ubuntu 22.04 / readline 8.1 build image). Decode |
| 123 | + # incrementally so a partial trailing sequence is buffered until the |
| 124 | + # rest of its bytes arrive, instead of failing with UnicodeDecodeError. |
| 125 | + self._decoder = codecs.getincrementaldecoder("utf-8")() |
119 | 126 |
|
120 | 127 | self.start_proc() |
121 | 128 |
|
@@ -160,16 +167,26 @@ def send_pipe(self, data): |
160 | 167 | self.proc.stdin.write(data) |
161 | 168 |
|
162 | 169 | def read_tty(self, blksize, timeout=None): |
163 | | - buf = os.read(self.childpty, blksize) |
164 | | - if isinstance(buf, bytes): |
165 | | - buf = buf.decode("utf-8") |
166 | | - return buf |
| 170 | + while True: |
| 171 | + buf = os.read(self.childpty, blksize) |
| 172 | + if not isinstance(buf, bytes): |
| 173 | + return buf |
| 174 | + decoded = self._decoder.decode(buf) |
| 175 | + # A non-empty read that decodes to '' means we only got the start of |
| 176 | + # a multi-byte UTF-8 sequence; keep reading until it completes rather |
| 177 | + # than returning '', which callers (read_until) treat as EOF. |
| 178 | + # An empty read (b'') is genuine EOF and must propagate as ''. |
| 179 | + if decoded != '' or buf == b'': |
| 180 | + return decoded |
167 | 181 |
|
168 | 182 | def read_pipe(self, blksize, timeout=None): |
169 | | - buf = self.proc.stdout.read(blksize) |
170 | | - if isinstance(buf, bytes): |
171 | | - buf = buf.decode("utf-8") |
172 | | - return buf |
| 183 | + while True: |
| 184 | + buf = self.proc.stdout.read(blksize) |
| 185 | + if not isinstance(buf, bytes): |
| 186 | + return buf |
| 187 | + decoded = self._decoder.decode(buf) |
| 188 | + if decoded != '' or buf == b'': |
| 189 | + return decoded |
173 | 190 |
|
174 | 191 | def read_until(self, until, blksize=4096, timeout=None, |
175 | 192 | flags=0, ptty_timeout=None, replace=None): |
|
0 commit comments