Skip to content

Commit abd7dea

Browse files
committed
Merge branch 'cassandra-5.0' into cassandra-6.0
* cassandra-5.0: run_cqlsh.py: fix parsing of a multi-byte UTF-8 sequence, os.read()/pipe reads can split the sequence across two reads
2 parents 105694f + 5858baf commit abd7dea

1 file changed

Lines changed: 25 additions & 8 deletions

File tree

pylib/cqlshlib/test/run_cqlsh.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import os
2020
import sys
2121
import re
22+
import codecs
2223
import contextlib
2324
import subprocess
2425
import signal
@@ -116,6 +117,12 @@ def __init__(self, path, tty=True, env=None, args=()):
116117
env = {}
117118
self.env = env
118119
self.readbuf = ''
120+
# os.read()/pipe reads can split a multi-byte UTF-8 sequence across two
121+
# reads (the boundary depends on terminal/readline echo flushing, which
122+
# changed with the Ubuntu 22.04 / readline 8.1 build image). Decode
123+
# incrementally so a partial trailing sequence is buffered until the
124+
# rest of its bytes arrive, instead of failing with UnicodeDecodeError.
125+
self._decoder = codecs.getincrementaldecoder("utf-8")()
119126

120127
self.start_proc()
121128

@@ -160,16 +167,26 @@ def send_pipe(self, data):
160167
self.proc.stdin.write(data)
161168

162169
def read_tty(self, blksize, timeout=None):
163-
buf = os.read(self.childpty, blksize)
164-
if isinstance(buf, bytes):
165-
buf = buf.decode("utf-8")
166-
return buf
170+
while True:
171+
buf = os.read(self.childpty, blksize)
172+
if not isinstance(buf, bytes):
173+
return buf
174+
decoded = self._decoder.decode(buf)
175+
# A non-empty read that decodes to '' means we only got the start of
176+
# a multi-byte UTF-8 sequence; keep reading until it completes rather
177+
# than returning '', which callers (read_until) treat as EOF.
178+
# An empty read (b'') is genuine EOF and must propagate as ''.
179+
if decoded != '' or buf == b'':
180+
return decoded
167181

168182
def read_pipe(self, blksize, timeout=None):
169-
buf = self.proc.stdout.read(blksize)
170-
if isinstance(buf, bytes):
171-
buf = buf.decode("utf-8")
172-
return buf
183+
while True:
184+
buf = self.proc.stdout.read(blksize)
185+
if not isinstance(buf, bytes):
186+
return buf
187+
decoded = self._decoder.decode(buf)
188+
if decoded != '' or buf == b'':
189+
return decoded
173190

174191
def read_until(self, until, blksize=4096, timeout=None,
175192
flags=0, ptty_timeout=None, replace=None):

0 commit comments

Comments
 (0)