Skip to content

Commit 47a1372

Browse files
committed
Small update
1 parent 1d7ca2c commit 47a1372

1 file changed

Lines changed: 265 additions & 0 deletions

File tree

pycatfile/pycatfile.py

Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3789,6 +3789,271 @@ def GetDataFromArrayAlt(structure, path, default=None):
37893789
return default
37903790
return element
37913791

3792+
class MultiOpen:
3793+
def __init__(self, *paths, mode="r+b"):
3794+
self.files = [open(p, mode) for p in paths]
3795+
self.sizes = [os.path.getsize(p) for p in paths]
3796+
self.total_size = sum(self.sizes)
3797+
self.position = 0
3798+
3799+
def tell(self):
3800+
return self.position
3801+
3802+
def seek(self, offset, whence=os.SEEK_SET):
3803+
if whence == os.SEEK_SET:
3804+
new_pos = offset
3805+
elif whence == os.SEEK_CUR:
3806+
new_pos = self.position + offset
3807+
elif whence == os.SEEK_END:
3808+
new_pos = self.total_size + offset
3809+
else:
3810+
raise ValueError("Invalid whence")
3811+
3812+
if not (0 <= new_pos <= self.total_size):
3813+
raise ValueError("Seek out of range")
3814+
3815+
self.position = new_pos
3816+
return self.position
3817+
3818+
def _locate_file(self, position):
3819+
cumulative = 0
3820+
for i, size in enumerate(self.sizes):
3821+
if position < cumulative + size:
3822+
return i, position - cumulative
3823+
cumulative += size
3824+
return len(self.files) - 1, self.sizes[-1]
3825+
3826+
def read(self, size=-1):
3827+
if size < 0:
3828+
size = self.total_size - self.position
3829+
3830+
data = bytearray()
3831+
remaining = size
3832+
3833+
while remaining > 0 and self.position < self.total_size:
3834+
idx, offset = self._locate_file(self.position)
3835+
f = self.files[idx]
3836+
f.seek(offset)
3837+
3838+
to_read = min(remaining, self.sizes[idx] - offset)
3839+
chunk = f.read(to_read)
3840+
3841+
if not chunk:
3842+
break
3843+
3844+
data.extend(chunk)
3845+
read_len = len(chunk)
3846+
self.position += read_len
3847+
remaining -= read_len
3848+
3849+
return bytes(data)
3850+
3851+
def write(self, data):
3852+
remaining = len(data)
3853+
written = 0
3854+
3855+
while remaining > 0 and self.position < self.total_size:
3856+
idx, offset = self._locate_file(self.position)
3857+
f = self.files[idx]
3858+
f.seek(offset)
3859+
3860+
to_write = min(remaining, self.sizes[idx] - offset)
3861+
chunk = data[written:written + to_write]
3862+
f.write(chunk)
3863+
f.flush()
3864+
3865+
self.position += to_write
3866+
written += to_write
3867+
remaining -= to_write
3868+
3869+
return written
3870+
3871+
def close(self):
3872+
for f in self.files:
3873+
f.close()
3874+
3875+
class MultiFileRaw(io.RawIOBase):
3876+
"""
3877+
Treat multiple underlying files as one continuous binary stream.
3878+
Works best when all component files already exist and have fixed sizes.
3879+
3880+
- Supports readinto(), read(), write(), seek(), tell()
3881+
- Intended for binary modes: 'rb', 'r+b', 'wb', etc.
3882+
"""
3883+
def __init__(self, paths, mode="r+b"):
3884+
super().__init__()
3885+
if isinstance(paths, (str, bytes, os.PathLike)):
3886+
paths = [paths]
3887+
self._paths = list(paths)
3888+
self._mode = mode
3889+
self._files = [open(p, mode) for p in self._paths]
3890+
self._sizes = [os.path.getsize(p) for p in self._paths]
3891+
self._total = sum(self._sizes)
3892+
self._pos = 0
3893+
self._closed = False
3894+
3895+
# --- Helpers ---
3896+
def _check_open(self):
3897+
if self._closed:
3898+
raise ValueError("I/O operation on closed MultiFileRaw")
3899+
3900+
def _locate(self, pos: int):
3901+
"""Return (file_index, offset_in_that_file) for absolute stream position."""
3902+
# pos in [0, total]
3903+
acc = 0
3904+
for i, sz in enumerate(self._sizes):
3905+
nxt = acc + sz
3906+
if pos < nxt:
3907+
return i, pos - acc
3908+
acc = nxt
3909+
# pos == total -> point at end of last file
3910+
return len(self._files) - 1, self._sizes[-1]
3911+
3912+
# --- io.RawIOBase API ---
3913+
def readable(self):
3914+
return "r" in self._mode or "+" in self._mode
3915+
3916+
def writable(self):
3917+
return any(ch in self._mode for ch in ("w", "a", "+"))
3918+
3919+
def seekable(self):
3920+
return True
3921+
3922+
def tell(self):
3923+
self._check_open()
3924+
return self._pos
3925+
3926+
def seek(self, offset, whence=os.SEEK_SET):
3927+
self._check_open()
3928+
if whence == os.SEEK_SET:
3929+
new = int(offset)
3930+
elif whence == os.SEEK_CUR:
3931+
new = self._pos + int(offset)
3932+
elif whence == os.SEEK_END:
3933+
new = self._total + int(offset)
3934+
else:
3935+
raise ValueError("Invalid whence")
3936+
3937+
if new < 0 or new > self._total:
3938+
raise ValueError("Seek out of range")
3939+
3940+
self._pos = new
3941+
return self._pos
3942+
3943+
def readinto(self, b):
3944+
"""
3945+
Read bytes into a pre-allocated, writable bytes-like object b.
3946+
Returns number of bytes read (0 at EOF).
3947+
"""
3948+
self._check_open()
3949+
if not self.readable():
3950+
raise io.UnsupportedOperation("not readable")
3951+
3952+
mv = memoryview(b).cast("B")
3953+
if len(mv) == 0:
3954+
return 0
3955+
if self._pos >= self._total:
3956+
return 0
3957+
3958+
remaining = len(mv)
3959+
out_off = 0
3960+
3961+
while remaining > 0 and self._pos < self._total:
3962+
idx, off = self._locate(self._pos)
3963+
f = self._files[idx]
3964+
f.seek(off, os.SEEK_SET)
3965+
3966+
can = min(remaining, self._sizes[idx] - off)
3967+
n = f.readinto(mv[out_off:out_off + can])
3968+
if not n:
3969+
break
3970+
3971+
self._pos += n
3972+
out_off += n
3973+
remaining -= n
3974+
3975+
return out_off
3976+
3977+
def read(self, size=-1):
3978+
self._check_open()
3979+
if size is None or size < 0:
3980+
size = self._total - self._pos
3981+
if size == 0 or self._pos >= self._total:
3982+
return b""
3983+
3984+
buf = bytearray(size)
3985+
n = self.readinto(buf)
3986+
return bytes(buf[:n])
3987+
3988+
def write(self, b):
3989+
self._check_open()
3990+
if not self.writable():
3991+
raise io.UnsupportedOperation("not writable")
3992+
3993+
mv = memoryview(b).cast("B")
3994+
total_to_write = len(mv)
3995+
if total_to_write == 0:
3996+
return 0
3997+
3998+
remaining = total_to_write
3999+
in_off = 0
4000+
4001+
# This implementation writes *within existing file extents*.
4002+
# If you want auto-growing into the last file, say so and I’ll adjust.
4003+
while remaining > 0 and self._pos < self._total:
4004+
idx, off = self._locate(self._pos)
4005+
f = self._files[idx]
4006+
f.seek(off, os.SEEK_SET)
4007+
4008+
can = min(remaining, self._sizes[idx] - off)
4009+
n = f.write(mv[in_off:in_off + can])
4010+
if n is None:
4011+
n = can # some file objects may return None; assume full write
4012+
if n <= 0:
4013+
break
4014+
4015+
self._pos += n
4016+
in_off += n
4017+
remaining -= n
4018+
4019+
return total_to_write - remaining
4020+
4021+
def flush(self):
4022+
self._check_open()
4023+
for f in self._files:
4024+
f.flush()
4025+
4026+
def close(self):
4027+
if not self._closed:
4028+
try:
4029+
for f in self._files:
4030+
try:
4031+
f.close()
4032+
except Exception:
4033+
pass
4034+
finally:
4035+
self._closed = True
4036+
super().close()
4037+
4038+
4039+
def multiopen(paths, mode="r+b", buffering=io.DEFAULT_BUFFER_SIZE):
4040+
"""
4041+
Return a buffered, seekable file-like object over multiple files.
4042+
4043+
Examples:
4044+
f = multiopen(["a.bin","b.bin"], "rb")
4045+
f = multiopen(["a.bin","b.bin"], "r+b") # read/write
4046+
"""
4047+
raw = MultiFileRaw(paths, mode=mode)
4048+
4049+
# Choose an appropriate buffered wrapper
4050+
if "r" in mode and "+" not in mode and "w" not in mode and "a" not in mode:
4051+
return io.BufferedReader(raw, buffer_size=buffering)
4052+
if any(ch in mode for ch in ("w", "a")) and "+" not in mode and "r" not in mode:
4053+
return io.BufferedWriter(raw, buffer_size=buffering)
4054+
# default for random read/write
4055+
return io.BufferedRandom(raw, buffer_size=buffering)
4056+
37924057
# ========= pushback-aware delimiter reader =========
37934058
class _DelimiterReader:
37944059
"""

0 commit comments

Comments
 (0)