Skip to content

Commit 626d696

Browse files
committed
Add substream() method for zero-copy substreams
See kaitai-io/kaitai_struct#44 A detailed explanation of why this implementation approach was chosen can be found in this comment: #67 (comment) Closely follows the existing implementation in our Ruby runtime library: * `SubIO` class: https://github.com/kaitai-io/kaitai_struct_ruby_runtime/blob/c4dc80f6b89189b6ee9fdbe15130b2a7daebc02b/lib/kaitai/struct/struct.rb#L607-L722 * `substream()` method: https://github.com/kaitai-io/kaitai_struct_ruby_runtime/blob/c4dc80f6b89189b6ee9fdbe15130b2a7daebc02b/lib/kaitai/struct/struct.rb#L577-L597
1 parent eadd633 commit 626d696

1 file changed

Lines changed: 97 additions & 1 deletion

File tree

kaitaistruct.py

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import struct
33
import warnings
44
from contextlib import suppress
5-
from io import SEEK_CUR, SEEK_END, BytesIO
5+
from io import SEEK_CUR, SEEK_END, SEEK_SET, BufferedIOBase, BytesIO
66

77
# Kaitai Struct runtime version, in the format defined by PEP 440.
88
# Used by our setup.cfg to set the version number in
@@ -92,6 +92,81 @@ def __setattr__(self, key, value):
9292
super_setattr(key, value)
9393

9494

95+
class SubIO(BufferedIOBase):
96+
def __init__(self, parent_io, parent_start, size):
97+
self.parent_io = parent_io
98+
self.parent_start = parent_start
99+
self.size = size
100+
self._pos = 0
101+
102+
def readable(self):
103+
self._checkClosed()
104+
return True
105+
106+
def seekable(self):
107+
self._checkClosed()
108+
return True
109+
110+
def writable(self):
111+
self._checkClosed()
112+
# NOTE: `SubIO` only supports reading for now
113+
return False
114+
115+
def seek(self, offset, whence=SEEK_SET, /):
116+
if not isinstance(offset, int):
117+
msg = (
118+
f"'{type(offset).__name__}' object cannot be interpreted as an integer"
119+
)
120+
raise TypeError(msg)
121+
122+
self._checkClosed()
123+
124+
if whence == SEEK_SET:
125+
if offset < 0:
126+
msg = f"negative seek value {offset}"
127+
raise ValueError(msg)
128+
self._pos = offset
129+
elif whence == SEEK_CUR:
130+
self._pos += offset
131+
elif whence == SEEK_END:
132+
self._pos = self.size + offset
133+
else:
134+
msg = f"invalid whence ({whence}, should be 0, 1 or 2)"
135+
raise ValueError(msg)
136+
137+
self._pos = max(0, self._pos)
138+
return self._pos
139+
140+
def tell(self):
141+
self._checkClosed()
142+
return self._pos
143+
144+
def read(self, size=-1, /):
145+
if size is None:
146+
size = -1
147+
if not isinstance(size, int):
148+
msg = f"argument should be integer or None, not '{type(size).__name__}'"
149+
raise TypeError(msg)
150+
151+
self._checkClosed()
152+
153+
left = self.size - self._pos
154+
155+
size = left if size < 0 else min(size, left)
156+
if size <= 0:
157+
return b""
158+
159+
old_pos = self.parent_io.tell()
160+
self.parent_io.seek(self.parent_start + self._pos)
161+
try:
162+
res = self.parent_io.read(size)
163+
self._pos += len(res)
164+
finally:
165+
self.parent_io.seek(old_pos)
166+
167+
return res
168+
169+
95170
class KaitaiStream:
96171
def __init__(self, io):
97172
self._io = io
@@ -809,6 +884,27 @@ def process_rotate_left(data, amount, group_size):
809884

810885
# region Misc runtime operations
811886

887+
def substream(self, n):
888+
if not self._io.seekable():
889+
# Non-seekable stream => fall back to the traditional copying implementation
890+
return KaitaiStream(BytesIO(self.read_bytes(n)))
891+
892+
self.align_to_byte()
893+
894+
if n < 0:
895+
msg = f"requested invalid {n} amount of bytes"
896+
raise InvalidArgumentError(msg)
897+
898+
pos = self.pos()
899+
num_bytes_available = max(0, self.size() - pos)
900+
if n > num_bytes_available:
901+
msg = f"requested {n} bytes, but only {num_bytes_available} bytes available"
902+
raise EndOfStreamError(msg, n, num_bytes_available)
903+
904+
sub = KaitaiStream(SubIO(self._io, pos, n))
905+
self._io.seek(pos + n)
906+
return sub
907+
812908
@staticmethod
813909
def int_from_byte(v):
814910
"""Convert a byte array item to an integer.

0 commit comments

Comments
 (0)