@@ -2149,6 +2149,113 @@ def CheckCompressionSubType(infile, formatspecs=__file_format_multi_dict__, file
21492149 fp .close ()
21502150 return filetype
21512151
2152+ # Precompiled regexes (faster than compiling each call)
2153+ _RE_ZERO_SPACE_UNIT = re .compile (r"([0]+) ([A-Za-z]+)" )
2154+ _RE_DOT_SPACE_UNIT = re .compile (r"\. ([A-Za-z]+)" )
2155+
2156+ # Unit tables
2157+ _IEC_UNITS = (" B" , " KiB" , " MiB" , " GiB" , " TiB" , " PiB" , " EiB" , " ZiB" , " YiB" )
2158+ _SI_UNITS = (" B" , " kB" , " MB" , " GB" , " TB" , " PB" , " EB" , " ZB" , " YB" )
2159+
2160+
2161+ def _format_readable (value , suffix , precision ):
2162+ # Keep behavior close to original: format with width 3 and precision, then clean up
2163+ s = ("%3." + str (precision ) + "f%s" ) % (value , suffix )
2164+ s = _RE_ZERO_SPACE_UNIT .sub (r" \2" , s )
2165+ s = _RE_DOT_SPACE_UNIT .sub (r" \1" , s )
2166+ return s
2167+
2168+
2169+ def get_readable_size (num_bytes , precision = 1 , unit = "IEC" ):
2170+ unit = (unit or "IEC" ).upper ()
2171+ if unit == "SI" :
2172+ unitsize = 1000.0
2173+ units = _SI_UNITS
2174+ else :
2175+ unitsize = 1024.0
2176+ units = _IEC_UNITS
2177+
2178+ org = num_bytes
2179+ value = float (num_bytes )
2180+
2181+ # Find the best unit without repeated loop string work
2182+ last_suffix = units [- 1 ]
2183+ for suffix in units [:- 1 ]:
2184+ if abs (value ) < unitsize :
2185+ readable = _format_readable (value , suffix , precision )
2186+ parts = readable .split ()
2187+ return {
2188+ "Bytes" : org ,
2189+ "ReadableWithSuffix" : readable ,
2190+ "ReadableWithoutSuffix" : parts [0 ],
2191+ "ReadableSuffix" : parts [1 ],
2192+ }
2193+ value /= unitsize
2194+
2195+ # Fall back to the largest unit (use the right "Y*" suffix for chosen system)
2196+ readable = _format_readable (value , last_suffix , precision )
2197+ parts = readable .split ()
2198+ return {
2199+ "Bytes" : org ,
2200+ "ReadableWithSuffix" : readable ,
2201+ "ReadableWithoutSuffix" : parts [0 ],
2202+ "ReadableSuffix" : parts [1 ],
2203+ }
2204+
2205+
2206+ def _normalize_hash_types (usehashtypes ):
2207+ # Returns list like ["md5", "sha1"] with empties removed
2208+ if not usehashtypes :
2209+ return []
2210+ return [h .strip ().lower () for h in usehashtypes .split ("," ) if h .strip ()]
2211+
2212+
2213+ def get_readable_size_from_file (infile , precision = 1 , unit = "IEC" ,
2214+ usehashes = False , usehashtypes = "md5,sha1" ):
2215+ size = os .path .getsize (infile )
2216+ out = get_readable_size (size , precision , unit )
2217+
2218+ if usehashes :
2219+ hash_types = _normalize_hash_types (usehashtypes )
2220+ if hash_types :
2221+ # Stream the file once and update multiple hashers (no full read into memory)
2222+ hashers = {}
2223+ for h in hash_types :
2224+ # hashlib.new expects names like "md5", "sha1", "sha256"...
2225+ hashers [h .upper ()] = hashlib .new (h )
2226+
2227+ with open (infile , "rb" ) as f :
2228+ for chunk in iter (lambda : f .read (1024 * 1024 ), b"" ):
2229+ for hasher in hashers .values ():
2230+ hasher .update (chunk )
2231+
2232+ for name , hasher in hashers .items ():
2233+ out [name ] = hasher .hexdigest ()
2234+
2235+ return out
2236+
2237+
2238+ def get_readable_size_from_string (instring , precision = 1 , unit = "IEC" ,
2239+ usehashes = False , usehashtypes = "md5,sha1" ):
2240+ # In Py3, len(str) counts characters; len(bytes) counts bytes. Keep original behavior.
2241+ size = len (instring )
2242+ out = get_readable_size (size , precision , unit )
2243+
2244+ if usehashes :
2245+ hash_types = _normalize_hash_types (usehashtypes )
2246+ if hash_types :
2247+ if isinstance (instring , bytes ):
2248+ data = instring
2249+ else :
2250+ data = instring .encode ("utf-8" )
2251+
2252+ for h in hash_types :
2253+ hasher = hashlib .new (h )
2254+ hasher .update (data )
2255+ out [h .upper ()] = hasher .hexdigest ()
2256+
2257+ return out
2258+
21522259def _advance (fp , base , n ):
21532260 """
21542261 Move file position to right after the BOM/signature.
@@ -4848,7 +4955,9 @@ def ReadFileHeaderDataWithContentToArray(fp, listonly=False, contentasfile=True,
48484955 fcontents .seek (0 , 0 )
48494956 if (not contentasfile ):
48504957 fcontents = fcontents .read ()
4851- outlist = {'fheadersize' : fheadsize , 'fhstart' : fheaderstart , 'fhend' : fhend , 'ftype' : ftype , 'fencoding' : fencoding , 'fcencoding' : fcencoding , 'fname' : fname , 'fbasedir' : fbasedir , 'flinkname' : flinkname , 'fsize' : fsize , 'fblksize' : fblksize , 'fblocks' : fblocks , 'fflags' : fflags , 'fatime' : divmod (int (fatime ), 10 ** 9 )[0 ], 'fmtime' : divmod (int (fmtime ), 10 ** 9 )[0 ], 'fctime' : divmod (int (fctime ), 10 ** 9 )[0 ], 'fbtime' : divmod (int (fbtime ), 10 ** 9 )[0 ], 'fatime_ns' : fatime , 'fmtime_ns' : fmtime , 'fctime_ns' : fctime , 'fbtime_ns' : fbtime , 'fmode' : fmode , 'fchmode' : fchmode , 'fstrmode' : PrintPermissionString (fmode , ftype ), 'ftypemod' : ftypemod , 'fwinattributes' : fwinattributes , 'fcompression' : fcompression , 'fcsize' : fcsize , 'fuid' : fuid , 'funame' : funame , 'fgid' : fgid , 'fgname' : fgname , 'finode' : finode , 'flinkcount' : flinkcount ,
4958+ iecsize = get_readable_size (fsize , unit = "IEC" )
4959+ sisize = get_readable_size (fsize , unit = "SI" )
4960+ outlist = {'fheadersize' : fheadsize , 'fhstart' : fheaderstart , 'fhend' : fhend , 'ftype' : ftype , 'fencoding' : fencoding , 'fcencoding' : fcencoding , 'fname' : fname , 'fbasedir' : fbasedir , 'flinkname' : flinkname , 'fsize' : fsize , 'fsize_si' : sisize , 'fsize_iec' : iecsize , 'fblksize' : fblksize , 'fblocks' : fblocks , 'fflags' : fflags , 'fatime' : divmod (int (fatime ), 10 ** 9 )[0 ], 'fmtime' : divmod (int (fmtime ), 10 ** 9 )[0 ], 'fctime' : divmod (int (fctime ), 10 ** 9 )[0 ], 'fbtime' : divmod (int (fbtime ), 10 ** 9 )[0 ], 'fatime_ns' : fatime , 'fmtime_ns' : fmtime , 'fctime_ns' : fctime , 'fbtime_ns' : fbtime , 'fmode' : fmode , 'fchmode' : fchmode , 'fstrmode' : PrintPermissionString (fmode , ftype ), 'ftypemod' : ftypemod , 'fwinattributes' : fwinattributes , 'fcompression' : fcompression , 'fcsize' : fcsize , 'fuid' : fuid , 'funame' : funame , 'fgid' : fgid , 'fgname' : fgname , 'finode' : finode , 'flinkcount' : flinkcount ,
48524961 'fdev' : fdev , 'frdev' : frdev , 'fseektojson' : fseektojson , 'fseektocontent' : fseektocontent , 'fseeknextfile' : fseeknextfile , 'fheaderchecksumtype' : HeaderOut [- 4 ], 'fjsonchecksumtype' : fjsonchecksumtype , 'fcontentchecksumtype' : HeaderOut [- 3 ], 'fnumfields' : fnumfields + 2 , 'frawheader' : HeaderOut , 'fvendorfields' : fvendorfields , 'fvendordata' : fvendorfieldslist , 'fextrafields' : fextrafields , 'fextrafieldsize' : fextrasize , 'fextradata' : fextrafieldslist , 'fjsontype' : fjsontype , 'fjsonlen' : fjsonlen , 'fjsonsize' : fjsonsize , 'fjsonrawdata' : fjsonrawcontent , 'fjsondata' : fjsoncontent , 'fjstart' : fjstart , 'fjend' : fjend , 'fheaderchecksum' : fcs , 'fjsonchecksum' : fjsonchecksum , 'fcontentchecksum' : fccs , 'fhascontents' : pyhascontents , 'fcontentstart' : fcontentstart , 'fcontentend' : fcontentend , 'fcontentasfile' : contentasfile , 'fcontents' : fcontents }
48534962 return outlist
48544963
@@ -5485,7 +5594,9 @@ def ReadFileDataWithContentToArray(fp, filestart=0, seekstart=0, seekend=0, list
54855594 realidnum = realidnum + 1
54865595 CatSize = fp .tell ()
54875596 CatSizeEnd = CatSize
5488- outlist .update ({'fp' : fp , 'fsize' : CatSizeEnd })
5597+ iecsize = get_readable_size (CatSizeEnd , unit = "IEC" )
5598+ sisize = get_readable_size (CatSizeEnd , unit = "SI" )
5599+ outlist .update ({'fp' : fp , 'fsize' : CatSizeEnd , 'fsize_si' : sisize , 'fsize_iec' : iecsize })
54895600 return outlist
54905601
54915602
0 commit comments