Skip to content

Commit 54eea9a

Browse files
committed
Small update
1 parent d4e3279 commit 54eea9a

1 file changed

Lines changed: 113 additions & 2 deletions

File tree

pyfoxfile/pyfoxfile.py

Lines changed: 113 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2149,6 +2149,113 @@ def CheckCompressionSubType(infile, formatspecs=__file_format_multi_dict__, file
21492149
fp.close()
21502150
return filetype
21512151

2152+
# Precompiled regexes (faster than compiling each call)
2153+
_RE_ZERO_SPACE_UNIT = re.compile(r"([0]+) ([A-Za-z]+)")
2154+
_RE_DOT_SPACE_UNIT = re.compile(r"\. ([A-Za-z]+)")
2155+
2156+
# Unit tables
2157+
_IEC_UNITS = (" B", " KiB", " MiB", " GiB", " TiB", " PiB", " EiB", " ZiB", " YiB")
2158+
_SI_UNITS = (" B", " kB", " MB", " GB", " TB", " PB", " EB", " ZB", " YB")
2159+
2160+
2161+
def _format_readable(value, suffix, precision):
2162+
# Keep behavior close to original: format with width 3 and precision, then clean up
2163+
s = ("%3." + str(precision) + "f%s") % (value, suffix)
2164+
s = _RE_ZERO_SPACE_UNIT.sub(r" \2", s)
2165+
s = _RE_DOT_SPACE_UNIT.sub(r" \1", s)
2166+
return s
2167+
2168+
2169+
def get_readable_size(num_bytes, precision=1, unit="IEC"):
2170+
unit = (unit or "IEC").upper()
2171+
if unit == "SI":
2172+
unitsize = 1000.0
2173+
units = _SI_UNITS
2174+
else:
2175+
unitsize = 1024.0
2176+
units = _IEC_UNITS
2177+
2178+
org = num_bytes
2179+
value = float(num_bytes)
2180+
2181+
# Find the best unit without repeated loop string work
2182+
last_suffix = units[-1]
2183+
for suffix in units[:-1]:
2184+
if abs(value) < unitsize:
2185+
readable = _format_readable(value, suffix, precision)
2186+
parts = readable.split()
2187+
return {
2188+
"Bytes": org,
2189+
"ReadableWithSuffix": readable,
2190+
"ReadableWithoutSuffix": parts[0],
2191+
"ReadableSuffix": parts[1],
2192+
}
2193+
value /= unitsize
2194+
2195+
# Fall back to the largest unit (use the right "Y*" suffix for chosen system)
2196+
readable = _format_readable(value, last_suffix, precision)
2197+
parts = readable.split()
2198+
return {
2199+
"Bytes": org,
2200+
"ReadableWithSuffix": readable,
2201+
"ReadableWithoutSuffix": parts[0],
2202+
"ReadableSuffix": parts[1],
2203+
}
2204+
2205+
2206+
def _normalize_hash_types(usehashtypes):
2207+
# Returns list like ["md5", "sha1"] with empties removed
2208+
if not usehashtypes:
2209+
return []
2210+
return [h.strip().lower() for h in usehashtypes.split(",") if h.strip()]
2211+
2212+
2213+
def get_readable_size_from_file(infile, precision=1, unit="IEC",
2214+
usehashes=False, usehashtypes="md5,sha1"):
2215+
size = os.path.getsize(infile)
2216+
out = get_readable_size(size, precision, unit)
2217+
2218+
if usehashes:
2219+
hash_types = _normalize_hash_types(usehashtypes)
2220+
if hash_types:
2221+
# Stream the file once and update multiple hashers (no full read into memory)
2222+
hashers = {}
2223+
for h in hash_types:
2224+
# hashlib.new expects names like "md5", "sha1", "sha256"...
2225+
hashers[h.upper()] = hashlib.new(h)
2226+
2227+
with open(infile, "rb") as f:
2228+
for chunk in iter(lambda: f.read(1024 * 1024), b""):
2229+
for hasher in hashers.values():
2230+
hasher.update(chunk)
2231+
2232+
for name, hasher in hashers.items():
2233+
out[name] = hasher.hexdigest()
2234+
2235+
return out
2236+
2237+
2238+
def get_readable_size_from_string(instring, precision=1, unit="IEC",
2239+
usehashes=False, usehashtypes="md5,sha1"):
2240+
# In Py3, len(str) counts characters; len(bytes) counts bytes. Keep original behavior.
2241+
size = len(instring)
2242+
out = get_readable_size(size, precision, unit)
2243+
2244+
if usehashes:
2245+
hash_types = _normalize_hash_types(usehashtypes)
2246+
if hash_types:
2247+
if isinstance(instring, bytes):
2248+
data = instring
2249+
else:
2250+
data = instring.encode("utf-8")
2251+
2252+
for h in hash_types:
2253+
hasher = hashlib.new(h)
2254+
hasher.update(data)
2255+
out[h.upper()] = hasher.hexdigest()
2256+
2257+
return out
2258+
21522259
def _advance(fp, base, n):
21532260
"""
21542261
Move file position to right after the BOM/signature.
@@ -4848,7 +4955,9 @@ def ReadFileHeaderDataWithContentToArray(fp, listonly=False, contentasfile=True,
48484955
fcontents.seek(0, 0)
48494956
if(not contentasfile):
48504957
fcontents = fcontents.read()
4851-
outlist = {'fheadersize': fheadsize, 'fhstart': fheaderstart, 'fhend': fhend, 'ftype': ftype, 'fencoding': fencoding, 'fcencoding': fcencoding, 'fname': fname, 'fbasedir': fbasedir, 'flinkname': flinkname, 'fsize': fsize, 'fblksize': fblksize, 'fblocks': fblocks, 'fflags': fflags, 'fatime': divmod(int(fatime), 10**9)[0], 'fmtime': divmod(int(fmtime), 10**9)[0], 'fctime': divmod(int(fctime), 10**9)[0], 'fbtime': divmod(int(fbtime), 10**9)[0], 'fatime_ns': fatime, 'fmtime_ns': fmtime, 'fctime_ns': fctime, 'fbtime_ns': fbtime, 'fmode': fmode, 'fchmode': fchmode, 'fstrmode': PrintPermissionString(fmode, ftype), 'ftypemod': ftypemod, 'fwinattributes': fwinattributes, 'fcompression': fcompression, 'fcsize': fcsize, 'fuid': fuid, 'funame': funame, 'fgid': fgid, 'fgname': fgname, 'finode': finode, 'flinkcount': flinkcount,
4958+
iecsize = get_readable_size(fsize, unit="IEC")
4959+
sisize = get_readable_size(fsize, unit="SI")
4960+
outlist = {'fheadersize': fheadsize, 'fhstart': fheaderstart, 'fhend': fhend, 'ftype': ftype, 'fencoding': fencoding, 'fcencoding': fcencoding, 'fname': fname, 'fbasedir': fbasedir, 'flinkname': flinkname, 'fsize': fsize, 'fsize_si': sisize, 'fsize_iec': iecsize, 'fblksize': fblksize, 'fblocks': fblocks, 'fflags': fflags, 'fatime': divmod(int(fatime), 10**9)[0], 'fmtime': divmod(int(fmtime), 10**9)[0], 'fctime': divmod(int(fctime), 10**9)[0], 'fbtime': divmod(int(fbtime), 10**9)[0], 'fatime_ns': fatime, 'fmtime_ns': fmtime, 'fctime_ns': fctime, 'fbtime_ns': fbtime, 'fmode': fmode, 'fchmode': fchmode, 'fstrmode': PrintPermissionString(fmode, ftype), 'ftypemod': ftypemod, 'fwinattributes': fwinattributes, 'fcompression': fcompression, 'fcsize': fcsize, 'fuid': fuid, 'funame': funame, 'fgid': fgid, 'fgname': fgname, 'finode': finode, 'flinkcount': flinkcount,
48524961
'fdev': fdev, 'frdev': frdev, 'fseektojson': fseektojson, 'fseektocontent': fseektocontent, 'fseeknextfile': fseeknextfile, 'fheaderchecksumtype': HeaderOut[-4], 'fjsonchecksumtype': fjsonchecksumtype, 'fcontentchecksumtype': HeaderOut[-3], 'fnumfields': fnumfields + 2, 'frawheader': HeaderOut, 'fvendorfields': fvendorfields, 'fvendordata': fvendorfieldslist, 'fextrafields': fextrafields, 'fextrafieldsize': fextrasize, 'fextradata': fextrafieldslist, 'fjsontype': fjsontype, 'fjsonlen': fjsonlen, 'fjsonsize': fjsonsize, 'fjsonrawdata': fjsonrawcontent, 'fjsondata': fjsoncontent, 'fjstart': fjstart, 'fjend': fjend, 'fheaderchecksum': fcs, 'fjsonchecksum': fjsonchecksum, 'fcontentchecksum': fccs, 'fhascontents': pyhascontents, 'fcontentstart': fcontentstart, 'fcontentend': fcontentend, 'fcontentasfile': contentasfile, 'fcontents': fcontents}
48534962
return outlist
48544963

@@ -5485,7 +5594,9 @@ def ReadFileDataWithContentToArray(fp, filestart=0, seekstart=0, seekend=0, list
54855594
realidnum = realidnum + 1
54865595
CatSize = fp.tell()
54875596
CatSizeEnd = CatSize
5488-
outlist.update({'fp': fp, 'fsize': CatSizeEnd})
5597+
iecsize = get_readable_size(CatSizeEnd, unit="IEC")
5598+
sisize = get_readable_size(CatSizeEnd, unit="SI")
5599+
outlist.update({'fp': fp, 'fsize': CatSizeEnd, 'fsize_si': sisize, 'fsize_iec': iecsize})
54895600
return outlist
54905601

54915602

0 commit comments

Comments
 (0)