@@ -3171,6 +3171,354 @@ def GetTotalSize(file_list):
31713171 PY_STDERR_TEXT .write ("Error accessing file {}: {}\n " .format (item , e ))
31723172 return total_size
31733173
3174+ def MajorMinorToDev (major , minor ):
3175+ """
3176+ Converts major and minor numbers to a device number.
3177+ Compatible with Python 2 and 3.
3178+ """
3179+ return (major << 8 ) | minor
3180+
3181+ def DevToMajorMinor (dev ):
3182+ """
3183+ Extracts major and minor numbers from a device number.
3184+ Compatible with Python 2 and 3.
3185+ """
3186+ major = (dev >> 8 ) & 0xFF
3187+ minor = dev & 0xFF
3188+ return major , minor
3189+
3190+
3191+ def GetDataFromArray (data , path , default = None ):
3192+ element = data
3193+ try :
3194+ for key in path :
3195+ element = element [key ]
3196+ return element
3197+ except (KeyError , TypeError , IndexError ):
3198+ return default
3199+
3200+
3201+ def GetDataFromArrayAlt (structure , path , default = None ):
3202+ element = structure
3203+ for key in path :
3204+ if isinstance (element , dict ) and key in element :
3205+ element = element [key ]
3206+ elif isinstance (element , list ) and isinstance (key , int ) and - len (element ) <= key < len (element ):
3207+ element = element [key ]
3208+ else :
3209+ return default
3210+ return element
3211+
3212+ # ========= pushback-aware delimiter reader =========
3213+ class _DelimiterReader :
3214+ """
3215+ Chunked reader that consumes up to N occurrences of a byte delimiter.
3216+ - Works with non-seekable streams by stashing over-read bytes on fp._read_until_delim_pushback
3217+ - For seekable streams, rewinds over-read via seek(-n, SEEK_CUR)
3218+ """
3219+ _PB_ATTR = "_read_until_delim_pushback"
3220+
3221+ def __init__ (self , fp , delimiter , chunk_size = 8192 , max_read = 64 * 1024 * 1024 ):
3222+ if not hasattr (fp , "read" ):
3223+ raise ValueError ("fp must be a readable file-like object" )
3224+
3225+ # normalize delimiter -> bytes
3226+ if delimiter is None :
3227+ delimiter = "\0 "
3228+ if isinstance (delimiter , str ):
3229+ delimiter_b = delimiter .encode ("utf-8" )
3230+ else :
3231+ delimiter_b = bytes (delimiter )
3232+ if not delimiter_b :
3233+ raise ValueError ("delimiter must not be empty" )
3234+
3235+ self .fp = fp
3236+ self .delim = delimiter_b
3237+ self .dlen = len (delimiter_b )
3238+ self .chunk = int (chunk_size )
3239+ self .max_read = int (max_read )
3240+
3241+ self ._buf = bytearray ()
3242+ self ._total = 0
3243+
3244+ # detect seekability (best-effort)
3245+ seekable = getattr (fp , "seekable" , None )
3246+ if callable (seekable ):
3247+ self ._seekable = bool (seekable ())
3248+ else :
3249+ self ._seekable = hasattr (fp , "seek" ) and hasattr (fp , "tell" )
3250+
3251+ # Preload any pushback from previous reads on this fp
3252+ pb = getattr (fp , self ._PB_ATTR , None )
3253+ if pb :
3254+ self ._buf .extend (pb )
3255+ setattr (fp , self ._PB_ATTR , bytearray ()) # consume
3256+
3257+ def _read_more (self ):
3258+ data = self .fp .read (self .chunk )
3259+ if not data :
3260+ return False
3261+ if not isinstance (data , (bytes , bytearray , memoryview )):
3262+ raise TypeError ("fp.read() must return bytes-like" )
3263+ if isinstance (data , memoryview ):
3264+ data = data .tobytes ()
3265+ self ._buf .extend (data )
3266+ self ._total += len (data )
3267+ if self ._total > self .max_read :
3268+ raise ValueError ("Maximum read limit reached without finding the delimiter" )
3269+ return True
3270+
3271+ def _pushback (self , over_bytes ):
3272+ """Return extra bytes to the stream (seek back) or stash on the fp."""
3273+ if not over_bytes :
3274+ return
3275+ if self ._seekable :
3276+ try :
3277+ self .fp .seek (- len (over_bytes ), io .SEEK_CUR )
3278+ return
3279+ except Exception :
3280+ pass
3281+ # Non-seekable: stash for next call on this fp
3282+ pb = getattr (self .fp , self ._PB_ATTR , None )
3283+ if pb is None :
3284+ setattr (self .fp , self ._PB_ATTR , bytearray (over_bytes ))
3285+ else :
3286+ pb .extend (over_bytes )
3287+
3288+ def read_one_piece (self ):
3289+ """
3290+ Read bytes up to (but not including) the next delimiter.
3291+ Returns (piece_bytes, found_delimiter_bool).
3292+ """
3293+ out = bytearray ()
3294+ while True :
3295+ idx = self ._buf .find (self .delim )
3296+ if idx != - 1 :
3297+ out .extend (self ._buf [:idx ])
3298+ over = self ._buf [idx + self .dlen :]
3299+ self ._buf [:] = b""
3300+ self ._pushback (over )
3301+ return bytes (out ), True
3302+
3303+ # No delimiter present: emit buffer and read more
3304+ if self ._buf :
3305+ out .extend (self ._buf )
3306+ self ._buf [:] = b""
3307+
3308+ if not self ._read_more ():
3309+ # EOF: return whatever we have (possibly empty), no delimiter
3310+ return bytes (out ), False
3311+
3312+ def read_n_pieces (self , n , pad_to_n = False ):
3313+ """
3314+ Read up to n pieces (n delimiters). Returns list of bytes; len <= n.
3315+ If pad_to_n=True, pads with b"" until length == n (avoids downstream IndexError).
3316+ """
3317+ n = int (n )
3318+ parts = []
3319+ while len (parts ) < n :
3320+ piece , found = self .read_one_piece ()
3321+ if not found and piece == b"" :
3322+ break # true EOF with nothing more
3323+ parts .append (piece )
3324+ if not found :
3325+ break # EOF after a final unterminated piece
3326+ if pad_to_n and len (parts ) < n :
3327+ parts .extend ([b"" ] * (n - len (parts )))
3328+ return parts
3329+
3330+
3331+ # ========= helpers =========
3332+ def _default_delim (delimiter ):
3333+ # Try your global spec if present; else default to NUL
3334+ try :
3335+ if delimiter is None :
3336+ delimiter = __file_format_dict__ ["format_delimiter" ]
3337+ except Exception :
3338+ pass
3339+ return delimiter if delimiter is not None else "\0 "
3340+
3341+
3342+ def _decode_text (b , errors ):
3343+ return b .decode ("utf-8" , errors = errors )
3344+
3345+
3346+ def _read_exact (fp , n ):
3347+ """Read exactly n bytes or raise EOFError on premature EOF."""
3348+ want = int (n )
3349+ out = bytearray ()
3350+ while len (out ) < want :
3351+ chunk = fp .read (want - len (out ))
3352+ if not chunk :
3353+ raise EOFError ("Unexpected EOF: wanted {} more bytes" .format (want - len (out )))
3354+ if isinstance (chunk , memoryview ):
3355+ chunk = chunk .tobytes ()
3356+ out .extend (chunk )
3357+ return bytes (out )
3358+
3359+
3360+ def _expect_delimiter (fp , delimiter ):
3361+ """Read exactly len(delimiter) bytes and require an exact match (no seeking)."""
3362+ delim = _default_delim (delimiter )
3363+ if isinstance (delim , str ):
3364+ delim_b = delim .encode ("utf-8" )
3365+ else :
3366+ delim_b = bytes (delim )
3367+ got = _read_exact (fp , len (delim_b ))
3368+ if got != delim_b :
3369+ raise ValueError ("Delimiter mismatch: expected {!r}, got {!r}" .format (delim_b , got ))
3370+
3371+
3372+ # ========= unified public API (bytes/text control) =========
3373+ def read_until_delimiter (
3374+ fp ,
3375+ delimiter = b"\0 " ,
3376+ max_read = None ,
3377+ chunk_size = None ,
3378+ decode = True ,
3379+ errors = None ,
3380+ ):
3381+ """
3382+ Read until the first occurrence of 'delimiter'. Strips the delimiter.
3383+ - Returns text (UTF-8) when decode=True; bytes when decode=False.
3384+ - Non-seekable streams are supported via pushback on the file object.
3385+ """
3386+ if max_read is None :
3387+ max_read = 64 * 1024 * 1024
3388+ if chunk_size is None :
3389+ chunk_size = 8192
3390+ if errors is None :
3391+ errors = "strict"
3392+
3393+ r = _DelimiterReader (
3394+ fp ,
3395+ delimiter = _default_delim (delimiter ),
3396+ chunk_size = chunk_size ,
3397+ max_read = max_read ,
3398+ )
3399+ piece , _found = r .read_one_piece ()
3400+ return _decode_text (piece , errors ) if decode else piece
3401+
3402+
3403+ def read_until_n_delimiters (
3404+ fp ,
3405+ delimiter = b"\0 " ,
3406+ num_delimiters = 1 ,
3407+ max_read = None ,
3408+ chunk_size = None ,
3409+ decode = True ,
3410+ errors = None ,
3411+ pad_to_n = False ,
3412+ ):
3413+ """
3414+ Read up to 'num_delimiters' occurrences. Returns list of pieces (len <= N).
3415+ If pad_to_n=True, pads with empty pieces to length N (useful for rigid parsers).
3416+ """
3417+ if max_read is None :
3418+ max_read = 64 * 1024 * 1024
3419+ if chunk_size is None :
3420+ chunk_size = 8192
3421+ if errors is None :
3422+ errors = "strict"
3423+
3424+ r = _DelimiterReader (
3425+ fp ,
3426+ delimiter = _default_delim (delimiter ),
3427+ chunk_size = chunk_size ,
3428+ max_read = max_read ,
3429+ )
3430+ parts = r .read_n_pieces (num_delimiters , pad_to_n = pad_to_n )
3431+ if decode :
3432+ return [_decode_text (p , errors ) for p in parts ]
3433+ return parts
3434+
3435+
3436+ # ========= back-compat wrappers (your original names) =========
3437+ def ReadTillNullByteOld (fp , delimiter = _default_delim (None )):
3438+ # emulate byte-by-byte via chunk_size=1; decode with 'replace' like your Alt
3439+ return read_until_delimiter (
3440+ fp ,
3441+ delimiter ,
3442+ max_read = 64 * 1024 * 1024 ,
3443+ chunk_size = 1 ,
3444+ decode = True ,
3445+ errors = "replace" ,
3446+ )
3447+
3448+
3449+ def ReadUntilNullByteOld (fp , delimiter = _default_delim (None )):
3450+ return ReadTillNullByteOld (fp , delimiter )
3451+
3452+
3453+ def ReadTillNullByteAlt (fp , delimiter = _default_delim (None ), chunk_size = 1024 , max_read = 64 * 1024 * 1024 ):
3454+ return read_until_delimiter (
3455+ fp ,
3456+ delimiter ,
3457+ max_read = max_read ,
3458+ chunk_size = chunk_size ,
3459+ decode = True ,
3460+ errors = "replace" ,
3461+ )
3462+
3463+
3464+ def ReadUntilNullByteAlt (fp , delimiter = _default_delim (None ), chunk_size = 1024 , max_read = 64 * 1024 * 1024 ):
3465+ return ReadTillNullByteAlt (fp , delimiter , chunk_size , max_read )
3466+
3467+
3468+ def ReadTillNullByte (fp , delimiter = _default_delim (None ), max_read = 64 * 1024 * 1024 ):
3469+ return read_until_delimiter (
3470+ fp ,
3471+ delimiter ,
3472+ max_read = max_read ,
3473+ chunk_size = 8192 ,
3474+ decode = True ,
3475+ errors = "strict" ,
3476+ )
3477+
3478+
3479+ def ReadUntilNullByte (fp , delimiter = _default_delim (None ), max_read = 64 * 1024 * 1024 ):
3480+ return ReadTillNullByte (fp , delimiter , max_read )
3481+
3482+
3483+ def ReadTillNullByteByNum (
3484+ fp ,
3485+ delimiter = _default_delim (None ),
3486+ num_delimiters = 1 ,
3487+ chunk_size = 1024 ,
3488+ max_read = 64 * 1024 * 1024 ,
3489+ ):
3490+ # Return list of text parts; **pad to N** to avoid IndexError in rigid parsers
3491+ return read_until_n_delimiters (
3492+ fp ,
3493+ delimiter ,
3494+ num_delimiters ,
3495+ max_read = max_read ,
3496+ chunk_size = chunk_size ,
3497+ decode = True ,
3498+ errors = "replace" ,
3499+ pad_to_n = True ,
3500+ )
3501+
3502+
3503+ def ReadUntilNullByteByNum (
3504+ fp ,
3505+ delimiter = _default_delim (None ),
3506+ num_delimiters = 1 ,
3507+ chunk_size = 1024 ,
3508+ max_read = 64 * 1024 * 1024 ,
3509+ ):
3510+ return ReadTillNullByteByNum (fp , delimiter , num_delimiters , chunk_size , max_read )
3511+
3512+
3513+ def SeekToEndOfFile (fp ):
3514+ lasttell = 0
3515+ while (True ):
3516+ fp .seek (1 , 1 )
3517+ if (lasttell == fp .tell ()):
3518+ break
3519+ lasttell = fp .tell ()
3520+ return True
3521+
31743522def ReadFileHeaderData (fp , skipchecksum = False , formatspecs = None , saltkey = None ):
31753523 if (formatspecs is None ):
31763524 formatspecs = __file_format_multi_dict__
0 commit comments