Skip to content

Commit 83b32b1

Browse files
committed
Small update
1 parent c500fa6 commit 83b32b1

1 file changed

Lines changed: 348 additions & 0 deletions

File tree

pycatfile/pyfile.py

Lines changed: 348 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3171,6 +3171,354 @@ def GetTotalSize(file_list):
31713171
PY_STDERR_TEXT.write("Error accessing file {}: {}\n".format(item, e))
31723172
return total_size
31733173

3174+
def MajorMinorToDev(major, minor):
3175+
"""
3176+
Converts major and minor numbers to a device number.
3177+
Compatible with Python 2 and 3.
3178+
"""
3179+
return (major << 8) | minor
3180+
3181+
def DevToMajorMinor(dev):
3182+
"""
3183+
Extracts major and minor numbers from a device number.
3184+
Compatible with Python 2 and 3.
3185+
"""
3186+
major = (dev >> 8) & 0xFF
3187+
minor = dev & 0xFF
3188+
return major, minor
3189+
3190+
3191+
def GetDataFromArray(data, path, default=None):
3192+
element = data
3193+
try:
3194+
for key in path:
3195+
element = element[key]
3196+
return element
3197+
except (KeyError, TypeError, IndexError):
3198+
return default
3199+
3200+
3201+
def GetDataFromArrayAlt(structure, path, default=None):
3202+
element = structure
3203+
for key in path:
3204+
if isinstance(element, dict) and key in element:
3205+
element = element[key]
3206+
elif isinstance(element, list) and isinstance(key, int) and -len(element) <= key < len(element):
3207+
element = element[key]
3208+
else:
3209+
return default
3210+
return element
3211+
3212+
# ========= pushback-aware delimiter reader =========
3213+
class _DelimiterReader:
3214+
"""
3215+
Chunked reader that consumes up to N occurrences of a byte delimiter.
3216+
- Works with non-seekable streams by stashing over-read bytes on fp._read_until_delim_pushback
3217+
- For seekable streams, rewinds over-read via seek(-n, SEEK_CUR)
3218+
"""
3219+
_PB_ATTR = "_read_until_delim_pushback"
3220+
3221+
def __init__(self, fp, delimiter, chunk_size=8192, max_read=64 * 1024 * 1024):
3222+
if not hasattr(fp, "read"):
3223+
raise ValueError("fp must be a readable file-like object")
3224+
3225+
# normalize delimiter -> bytes
3226+
if delimiter is None:
3227+
delimiter = "\0"
3228+
if isinstance(delimiter, str):
3229+
delimiter_b = delimiter.encode("utf-8")
3230+
else:
3231+
delimiter_b = bytes(delimiter)
3232+
if not delimiter_b:
3233+
raise ValueError("delimiter must not be empty")
3234+
3235+
self.fp = fp
3236+
self.delim = delimiter_b
3237+
self.dlen = len(delimiter_b)
3238+
self.chunk = int(chunk_size)
3239+
self.max_read = int(max_read)
3240+
3241+
self._buf = bytearray()
3242+
self._total = 0
3243+
3244+
# detect seekability (best-effort)
3245+
seekable = getattr(fp, "seekable", None)
3246+
if callable(seekable):
3247+
self._seekable = bool(seekable())
3248+
else:
3249+
self._seekable = hasattr(fp, "seek") and hasattr(fp, "tell")
3250+
3251+
# Preload any pushback from previous reads on this fp
3252+
pb = getattr(fp, self._PB_ATTR, None)
3253+
if pb:
3254+
self._buf.extend(pb)
3255+
setattr(fp, self._PB_ATTR, bytearray()) # consume
3256+
3257+
def _read_more(self):
3258+
data = self.fp.read(self.chunk)
3259+
if not data:
3260+
return False
3261+
if not isinstance(data, (bytes, bytearray, memoryview)):
3262+
raise TypeError("fp.read() must return bytes-like")
3263+
if isinstance(data, memoryview):
3264+
data = data.tobytes()
3265+
self._buf.extend(data)
3266+
self._total += len(data)
3267+
if self._total > self.max_read:
3268+
raise ValueError("Maximum read limit reached without finding the delimiter")
3269+
return True
3270+
3271+
def _pushback(self, over_bytes):
3272+
"""Return extra bytes to the stream (seek back) or stash on the fp."""
3273+
if not over_bytes:
3274+
return
3275+
if self._seekable:
3276+
try:
3277+
self.fp.seek(-len(over_bytes), io.SEEK_CUR)
3278+
return
3279+
except Exception:
3280+
pass
3281+
# Non-seekable: stash for next call on this fp
3282+
pb = getattr(self.fp, self._PB_ATTR, None)
3283+
if pb is None:
3284+
setattr(self.fp, self._PB_ATTR, bytearray(over_bytes))
3285+
else:
3286+
pb.extend(over_bytes)
3287+
3288+
def read_one_piece(self):
3289+
"""
3290+
Read bytes up to (but not including) the next delimiter.
3291+
Returns (piece_bytes, found_delimiter_bool).
3292+
"""
3293+
out = bytearray()
3294+
while True:
3295+
idx = self._buf.find(self.delim)
3296+
if idx != -1:
3297+
out.extend(self._buf[:idx])
3298+
over = self._buf[idx + self.dlen:]
3299+
self._buf[:] = b""
3300+
self._pushback(over)
3301+
return bytes(out), True
3302+
3303+
# No delimiter present: emit buffer and read more
3304+
if self._buf:
3305+
out.extend(self._buf)
3306+
self._buf[:] = b""
3307+
3308+
if not self._read_more():
3309+
# EOF: return whatever we have (possibly empty), no delimiter
3310+
return bytes(out), False
3311+
3312+
def read_n_pieces(self, n, pad_to_n=False):
3313+
"""
3314+
Read up to n pieces (n delimiters). Returns list of bytes; len <= n.
3315+
If pad_to_n=True, pads with b"" until length == n (avoids downstream IndexError).
3316+
"""
3317+
n = int(n)
3318+
parts = []
3319+
while len(parts) < n:
3320+
piece, found = self.read_one_piece()
3321+
if not found and piece == b"":
3322+
break # true EOF with nothing more
3323+
parts.append(piece)
3324+
if not found:
3325+
break # EOF after a final unterminated piece
3326+
if pad_to_n and len(parts) < n:
3327+
parts.extend([b""] * (n - len(parts)))
3328+
return parts
3329+
3330+
3331+
# ========= helpers =========
3332+
def _default_delim(delimiter):
3333+
# Try your global spec if present; else default to NUL
3334+
try:
3335+
if delimiter is None:
3336+
delimiter = __file_format_dict__["format_delimiter"]
3337+
except Exception:
3338+
pass
3339+
return delimiter if delimiter is not None else "\0"
3340+
3341+
3342+
def _decode_text(b, errors):
3343+
return b.decode("utf-8", errors=errors)
3344+
3345+
3346+
def _read_exact(fp, n):
3347+
"""Read exactly n bytes or raise EOFError on premature EOF."""
3348+
want = int(n)
3349+
out = bytearray()
3350+
while len(out) < want:
3351+
chunk = fp.read(want - len(out))
3352+
if not chunk:
3353+
raise EOFError("Unexpected EOF: wanted {} more bytes".format(want - len(out)))
3354+
if isinstance(chunk, memoryview):
3355+
chunk = chunk.tobytes()
3356+
out.extend(chunk)
3357+
return bytes(out)
3358+
3359+
3360+
def _expect_delimiter(fp, delimiter):
3361+
"""Read exactly len(delimiter) bytes and require an exact match (no seeking)."""
3362+
delim = _default_delim(delimiter)
3363+
if isinstance(delim, str):
3364+
delim_b = delim.encode("utf-8")
3365+
else:
3366+
delim_b = bytes(delim)
3367+
got = _read_exact(fp, len(delim_b))
3368+
if got != delim_b:
3369+
raise ValueError("Delimiter mismatch: expected {!r}, got {!r}".format(delim_b, got))
3370+
3371+
3372+
# ========= unified public API (bytes/text control) =========
3373+
def read_until_delimiter(
3374+
fp,
3375+
delimiter=b"\0",
3376+
max_read=None,
3377+
chunk_size=None,
3378+
decode=True,
3379+
errors=None,
3380+
):
3381+
"""
3382+
Read until the first occurrence of 'delimiter'. Strips the delimiter.
3383+
- Returns text (UTF-8) when decode=True; bytes when decode=False.
3384+
- Non-seekable streams are supported via pushback on the file object.
3385+
"""
3386+
if max_read is None:
3387+
max_read = 64 * 1024 * 1024
3388+
if chunk_size is None:
3389+
chunk_size = 8192
3390+
if errors is None:
3391+
errors = "strict"
3392+
3393+
r = _DelimiterReader(
3394+
fp,
3395+
delimiter=_default_delim(delimiter),
3396+
chunk_size=chunk_size,
3397+
max_read=max_read,
3398+
)
3399+
piece, _found = r.read_one_piece()
3400+
return _decode_text(piece, errors) if decode else piece
3401+
3402+
3403+
def read_until_n_delimiters(
3404+
fp,
3405+
delimiter=b"\0",
3406+
num_delimiters=1,
3407+
max_read=None,
3408+
chunk_size=None,
3409+
decode=True,
3410+
errors=None,
3411+
pad_to_n=False,
3412+
):
3413+
"""
3414+
Read up to 'num_delimiters' occurrences. Returns list of pieces (len <= N).
3415+
If pad_to_n=True, pads with empty pieces to length N (useful for rigid parsers).
3416+
"""
3417+
if max_read is None:
3418+
max_read = 64 * 1024 * 1024
3419+
if chunk_size is None:
3420+
chunk_size = 8192
3421+
if errors is None:
3422+
errors = "strict"
3423+
3424+
r = _DelimiterReader(
3425+
fp,
3426+
delimiter=_default_delim(delimiter),
3427+
chunk_size=chunk_size,
3428+
max_read=max_read,
3429+
)
3430+
parts = r.read_n_pieces(num_delimiters, pad_to_n=pad_to_n)
3431+
if decode:
3432+
return [_decode_text(p, errors) for p in parts]
3433+
return parts
3434+
3435+
3436+
# ========= back-compat wrappers (your original names) =========
3437+
def ReadTillNullByteOld(fp, delimiter=_default_delim(None)):
3438+
# emulate byte-by-byte via chunk_size=1; decode with 'replace' like your Alt
3439+
return read_until_delimiter(
3440+
fp,
3441+
delimiter,
3442+
max_read=64 * 1024 * 1024,
3443+
chunk_size=1,
3444+
decode=True,
3445+
errors="replace",
3446+
)
3447+
3448+
3449+
def ReadUntilNullByteOld(fp, delimiter=_default_delim(None)):
3450+
return ReadTillNullByteOld(fp, delimiter)
3451+
3452+
3453+
def ReadTillNullByteAlt(fp, delimiter=_default_delim(None), chunk_size=1024, max_read=64 * 1024 * 1024):
3454+
return read_until_delimiter(
3455+
fp,
3456+
delimiter,
3457+
max_read=max_read,
3458+
chunk_size=chunk_size,
3459+
decode=True,
3460+
errors="replace",
3461+
)
3462+
3463+
3464+
def ReadUntilNullByteAlt(fp, delimiter=_default_delim(None), chunk_size=1024, max_read=64 * 1024 * 1024):
3465+
return ReadTillNullByteAlt(fp, delimiter, chunk_size, max_read)
3466+
3467+
3468+
def ReadTillNullByte(fp, delimiter=_default_delim(None), max_read=64 * 1024 * 1024):
3469+
return read_until_delimiter(
3470+
fp,
3471+
delimiter,
3472+
max_read=max_read,
3473+
chunk_size=8192,
3474+
decode=True,
3475+
errors="strict",
3476+
)
3477+
3478+
3479+
def ReadUntilNullByte(fp, delimiter=_default_delim(None), max_read=64 * 1024 * 1024):
3480+
return ReadTillNullByte(fp, delimiter, max_read)
3481+
3482+
3483+
def ReadTillNullByteByNum(
3484+
fp,
3485+
delimiter=_default_delim(None),
3486+
num_delimiters=1,
3487+
chunk_size=1024,
3488+
max_read=64 * 1024 * 1024,
3489+
):
3490+
# Return list of text parts; **pad to N** to avoid IndexError in rigid parsers
3491+
return read_until_n_delimiters(
3492+
fp,
3493+
delimiter,
3494+
num_delimiters,
3495+
max_read=max_read,
3496+
chunk_size=chunk_size,
3497+
decode=True,
3498+
errors="replace",
3499+
pad_to_n=True,
3500+
)
3501+
3502+
3503+
def ReadUntilNullByteByNum(
3504+
fp,
3505+
delimiter=_default_delim(None),
3506+
num_delimiters=1,
3507+
chunk_size=1024,
3508+
max_read=64 * 1024 * 1024,
3509+
):
3510+
return ReadTillNullByteByNum(fp, delimiter, num_delimiters, chunk_size, max_read)
3511+
3512+
3513+
def SeekToEndOfFile(fp):
3514+
lasttell = 0
3515+
while(True):
3516+
fp.seek(1, 1)
3517+
if(lasttell == fp.tell()):
3518+
break
3519+
lasttell = fp.tell()
3520+
return True
3521+
31743522
def ReadFileHeaderData(fp, skipchecksum=False, formatspecs=None, saltkey=None):
31753523
if(formatspecs is None):
31763524
formatspecs = __file_format_multi_dict__

0 commit comments

Comments
 (0)