Skip to content

Commit 9d8e7f1

Browse files
authored
Fix codec errors when streaming proc output
1 parent 2a8f95f commit 9d8e7f1

1 file changed

Lines changed: 18 additions & 4 deletions

File tree

invoke/runners.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
import codecs
12
import errno
23
import locale
34
import os
5+
import signal
46
import struct
57
import sys
68
import threading
79
import time
8-
import signal
910
from subprocess import Popen, PIPE
1011
from types import TracebackType
1112
from typing import (
@@ -692,8 +693,9 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
692693
:returns:
693694
A generator yielding strings.
694695
695-
Specifically, each resulting string is the result of decoding
696-
`read_chunk_size` bytes read from the subprocess' out/err stream.
696+
Specifically, each resulting string is the result of incrementally
697+
decoding up to `read_chunk_size` bytes subprocess' out/err stream.
698+
The decoder ensures that encoding boundaries are respected.
697699
698700
.. versionadded:: 1.0
699701
"""
@@ -703,11 +705,16 @@ def read_proc_output(self, reader: Callable) -> Generator[str, None, None]:
703705
# process is done running" because sometimes that signal will appear
704706
# before we've actually read all the data in the stream (i.e.: a race
705707
# condition).
708+
decoder_cls = codecs.getincrementaldecoder(self.encoding)
709+
decoder = decoder_cls("replace")
706710
while True:
707711
data = reader(self.read_chunk_size)
708712
if not data:
709713
break
710-
yield self.decode(data)
714+
# The incremental decoder will deal with partial characters.
715+
yield decoder.decode(data)
716+
# Emit the final chunk of data
717+
yield decoder.decode(b"", True)
711718

712719
def write_our_output(self, stream: IO, string: str) -> None:
713720
"""
@@ -1020,6 +1027,13 @@ def decode(self, data: bytes) -> str:
10201027
"""
10211028
Decode some ``data`` bytes, returning Unicode.
10221029
1030+
.. warning::
1031+
This function should not be used for streaming data. When data is
1032+
streamed in chunks, one chunk can end with only parts of a
1033+
multi-byte codepoint. This function will return a replacement
1034+
character for the incomplete byte sequence.
1035+
Use a ``codecs.IncrementalDecoder`` instead.
1036+
10231037
.. versionadded:: 1.0
10241038
"""
10251039
# NOTE: yes, this is a 1-liner. The point is to make it much harder to

0 commit comments

Comments
 (0)