1+ import codecs
12import errno
23import locale
34import os
5+ import signal
46import struct
57import sys
68import threading
79import time
8- import signal
910from subprocess import Popen , PIPE
1011from types import TracebackType
1112from typing import (
@@ -692,8 +693,9 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
692693 :returns:
693694 A generator yielding strings.
694695
695- Specifically, each resulting string is the result of decoding
696- `read_chunk_size` bytes read from the subprocess' out/err stream.
696+ Specifically, each resulting string is the result of incrementally
697+ decoding up to `read_chunk_size` bytes subprocess' out/err stream.
698+ The decoder ensures that encoding boundaries are respected.
697699
698700 .. versionadded:: 1.0
699701 """
@@ -703,11 +705,16 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
703705 # process is done running" because sometimes that signal will appear
704706 # before we've actually read all the data in the stream (i.e.: a race
705707 # condition).
708+ decoder_cls = codecs .getincrementaldecoder (self .encoding )
709+ decoder = decoder_cls ("replace" )
706710 while True :
707711 data = reader (self .read_chunk_size )
708712 if not data :
709713 break
710- yield self .decode (data )
714+ # The incremental decoder will deal with partial characters.
715+ yield decoder .decode (data )
716+ # Emit the final chunk of data
717+ yield decoder .decode (b"" , True )
711718
712719 def write_our_output (self , stream : IO , string : str ) -> None :
713720 """
@@ -1020,6 +1027,13 @@ def decode(self, data: bytes) -> str:
10201027 """
10211028 Decode some ``data`` bytes, returning Unicode.
10221029
1030+ .. warning::
1031+ This function should not be used for streaming data. When data is
1032+ streamed in chunks, one chunk can end with only parts of a
1033+ multi-byte codepoint. This function will return a replacement
1034+ character for the incomplete byte sequence.
1035+ Use a ``codecs.IncrementalDecoder`` instead.
1036+
10231037 .. versionadded:: 1.0
10241038 """
10251039 # NOTE: yes, this is a 1-liner. The point is to make it much harder to
0 commit comments