Skip to content

Commit 94d24f9

Browse files
author
Kazuki Suzuki Przyborowski
committed
Update pycatfile.py
1 parent ed7cf54 commit 94d24f9

1 file changed

Lines changed: 98 additions & 55 deletions

File tree

pycatfile.py

Lines changed: 98 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -5769,88 +5769,131 @@ def CheckCompressionTypeFromBytes(instring, formatspecs=__file_format_multi_dict
57695769
return CheckCompressionType(instringsfile, formatspecs, filestart, closefp)
57705770

57715771

5772-
def UncompressFileAlt(fp, formatspecs=__file_format_multi_dict__, filestart=0):
5773-
if(not hasattr(fp, "read")):
5772+
def UncompressFileAlt(fp, formatspecs=__file_format_multi_dict__, filestart=0,
5773+
use_mmap=False):
5774+
"""
5775+
Accepts an already-open *bytes* file-like (fp). Detects compression and
5776+
returns a FileLikeAdapter opened for 'rb'. If the stream is uncompressed
5777+
and backed by a real file, you can enable mmap via use_mmap=True.
5778+
"""
5779+
if not hasattr(fp, "read"):
57745780
return False
5781+
5782+
# Detect format on the fileobj at filestart
57755783
compresscheck = CheckCompressionType(fp, formatspecs, filestart, False)
5776-
if(IsNestedDict(formatspecs) and compresscheck in formatspecs):
5784+
if IsNestedDict(formatspecs) and compresscheck in formatspecs:
57775785
formatspecs = formatspecs[compresscheck]
5778-
if(compresscheck == "gzip" and compresscheck in compressionsupport):
5786+
5787+
# Build the appropriate decompressor stream (or pass-through)
5788+
if (compresscheck == "gzip" and compresscheck in compressionsupport):
57795789
fp = gzip.GzipFile(fileobj=fp, mode="rb")
5780-
elif(compresscheck == "bzip2" and compresscheck in compressionsupport):
5790+
elif (compresscheck == "bzip2" and compresscheck in compressionsupport):
57815791
fp = bz2.BZ2File(fp)
5782-
elif(compresscheck == "zstd" and compresscheck in compressionsupport):
5792+
elif (compresscheck == "zstd" and compresscheck in compressionsupport):
57835793
if 'zstandard' in sys.modules:
57845794
fp = ZstdFile(fileobj=fp, mode="rb")
57855795
elif 'pyzstd' in sys.modules:
57865796
fp = pyzstd.zstdfile.ZstdFile(fileobj=fp, mode="rb")
57875797
else:
5788-
return Flase
5789-
elif(compresscheck == "lz4" and compresscheck in compressionsupport):
5798+
return False
5799+
elif (compresscheck == "lz4" and compresscheck in compressionsupport):
57905800
fp = lz4.frame.LZ4FrameFile(fp, mode='rb')
5791-
elif((compresscheck == "lzo" or compresscheck == "lzop") and compresscheck in compressionsupport):
5801+
elif ((compresscheck == "lzo" or compresscheck == "lzop") and compresscheck in compressionsupport):
57925802
fp = LzopFile(fileobj=fp, mode="rb")
5793-
elif((compresscheck == "lzma" or compresscheck == "xz") and compresscheck in compressionsupport):
5803+
elif ((compresscheck == "lzma" or compresscheck == "xz") and compresscheck in compressionsupport):
57945804
fp = lzma.LZMAFile(fp)
5795-
elif(compresscheck == "zlib" and compresscheck in compressionsupport):
5805+
elif (compresscheck == "zlib" and compresscheck in compressionsupport):
57965806
fp = ZlibFile(fileobj=fp, mode="rb")
5797-
elif(compresscheck == formatspecs['format_magic']):
5798-
fp = fp
5799-
elif(not compresscheck):
5807+
else:
5808+
# Either magic matched your format OR no compression detected:
5809+
# pass-through original fp.
5810+
fp.seek(filestart, 0)
5811+
5812+
# Wrap in FileLikeAdapter; optionally mmap only if uncompressed + real file
5813+
mm = None
5814+
if use_mmap and compresscheck in (None, formatspecs.get('format_magic', None)):
5815+
base = _extract_base_fp(fp)
58005816
try:
5801-
fp = lz4.frame.LZ4FrameFile(fp, mode='rb')
5802-
except lzma.LZMAError:
5803-
return False
5804-
if(compresscheck != formatspecs['format_magic']):
5805-
fp.close()
5806-
return fp
5817+
if base is not None:
5818+
# Map whole file for read-only; keep base open via adapter
5819+
mm = mmap.mmap(base.fileno(), 0, access=mmap.ACCESS_READ)
5820+
except Exception:
5821+
mm = None # silently fall back to streaming
58075822

5823+
# Always position at start of logical stream
5824+
try:
5825+
fp.seek(0, 0)
5826+
except Exception:
5827+
pass
5828+
5829+
return FileLikeAdapter(fp, mode="rb", mm=mm)
58085830

5809-
def UncompressFile(infile, formatspecs=__file_format_multi_dict__, mode="rb", filestart=0):
5831+
def UncompressFile(infile, formatspecs=__file_format_multi_dict__, mode="rb",
5832+
filestart=0, use_mmap=False):
5833+
"""
5834+
Opens a path, detects compression by header, and returns a FileLikeAdapter.
5835+
If uncompressed and use_mmap=True, returns an mmap-backed reader.
5836+
"""
58105837
compresscheck = CheckCompressionType(infile, formatspecs, filestart, False)
5811-
if(IsNestedDict(formatspecs) and compresscheck in formatspecs):
5838+
if IsNestedDict(formatspecs) and compresscheck in formatspecs:
58125839
formatspecs = formatspecs[compresscheck]
5813-
if(sys.version_info[0] == 2 and compresscheck):
5814-
if(mode == "rt"):
5815-
mode = "r"
5816-
elif(mode == "wt"):
5817-
mode = "w"
5840+
5841+
# Python 2 text-mode fixups if needed (though you're bytes-only)
5842+
if sys.version_info[0] == 2 and compresscheck:
5843+
if mode == "rt": mode = "r"
5844+
elif mode == "wt": mode = "w"
5845+
58185846
try:
5819-
if(compresscheck == "gzip" and compresscheck in compressionsupport):
5820-
if sys.version_info[0] == 2:
5821-
filefp = GzipFile(infile, mode=mode)
5822-
else:
5823-
filefp = gzip.open(infile, mode)
5824-
elif(compresscheck == "bzip2" and compresscheck in compressionsupport):
5825-
filefp = bz2.open(infile, mode)
5826-
elif(compresscheck == "zstd" and compresscheck in compressionsupport):
5847+
# Compressed branches
5848+
if (compresscheck == "gzip" and "gzip" in compressionsupport):
5849+
fp = GzipFile(infile, mode=mode) if sys.version_info[0] == 2 else gzip.open(infile, mode)
5850+
elif (compresscheck == "bzip2" and "bzip2" in compressionsupport):
5851+
fp = bz2.open(infile, mode)
5852+
elif (compresscheck == "zstd" and "zstandard" in compressionsupport):
58275853
if 'zstandard' in sys.modules:
5828-
filefp = ZstdFile(infile, mode=mode)
5854+
fp = ZstdFile(infile, mode=mode)
58295855
elif 'pyzstd' in sys.modules:
5830-
filefp = pyzstd.zstdfile.ZstdFile(infile, mode=mode)
5856+
fp = pyzstd.zstdfile.ZstdFile(infile, mode=mode)
58315857
else:
5832-
return Flase
5833-
elif(compresscheck == "lz4" and compresscheck in compressionsupport):
5834-
filefp = lz4.frame.open(infile, mode)
5835-
elif((compresscheck == "lzo" or compresscheck == "lzop") and compresscheck in compressionsupport):
5836-
filefp = LzopFile(infile, mode=mode)
5837-
elif((compresscheck == "lzma" or compresscheck == "xz") and compresscheck in compressionsupport):
5838-
filefp = lzma.open(infile, mode)
5839-
elif(compresscheck == "zlib" and compresscheck in compressionsupport):
5840-
filefp = ZlibFile(infile, mode=mode)
5841-
elif(compresscheck == formatspecs['format_magic']):
5842-
filefp = open(infile, mode)
5843-
elif(not compresscheck):
5844-
filefp = open(infile, mode)
5845-
else:
5846-
filefp = open(infile, mode)
5858+
return False
5859+
elif (compresscheck == "lz4" and "lz4" in compressionsupport):
5860+
fp = lz4.frame.open(infile, mode)
5861+
elif ((compresscheck == "lzo" or compresscheck == "lzop") and "lzop" in compressionsupport):
5862+
fp = LzopFile(infile, mode=mode)
5863+
elif ((compresscheck == "lzma" or compresscheck == "xz") and "xz" in compressionsupport):
5864+
fp = lzma.open(infile, mode)
5865+
elif (compresscheck == "zlib" and "zlib" in compressionsupport):
5866+
fp = ZlibFile(infile, mode=mode)
5867+
5868+
# Uncompressed (or unknown): open plain file
5869+
else:
5870+
fp = open(infile, mode)
5871+
58475872
except FileNotFoundError:
58485873
return False
5874+
5875+
# For uncompressed: optional mmap
5876+
mm = None
5877+
if use_mmap and (compresscheck is None or compresscheck == formatspecs.get('format_magic', None)):
5878+
try:
5879+
base = _extract_base_fp(fp)
5880+
if base is not None:
5881+
mm = mmap.mmap(base.fileno(), 0, access=mmap.ACCESS_READ if "r" in mode else mmap.ACCESS_WRITE)
5882+
except Exception:
5883+
mm = None # fallback to normal file stream
5884+
5885+
# Position to filestart if caller requested it (mainly for fileobj-based headers)
58495886
try:
5850-
filefp.write_through = True
5851-
except AttributeError:
5887+
fp.seek(0 if compresscheck else filestart, 0)
5888+
except Exception:
58525889
pass
5853-
return filefp
5890+
5891+
out = FileLikeAdapter(fp, mode="rb" if "r" in mode else "wb", mm=mm)
5892+
try:
5893+
out.write_through = True
5894+
except Exception:
5895+
pass
5896+
return out
58545897

58555898

58565899
def UncompressString(infile, formatspecs=__file_format_multi_dict__, filestart=0):

0 commit comments

Comments
 (0)