Skip to content

Commit c92cebe

Browse files
committed
skip decompression when content is already text
1 parent 64a50fa commit c92cebe

1 file changed

Lines changed: 12 additions & 6 deletions

File tree

util/__init__.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,8 @@ def unzstd(contents):
204204
def extract(data, fmt):
205205
""" Extract the contents based on mimetype or file ending. Return the
206206
unmodified data if neither mimetype nor file ending matches, otherwise
207-
return the extracted contents.
207+
return the extracted contents. Falls back to unmodified data if
208+
decompression fails (e.g. requests already decompressed the content).
208209
"""
209210
try:
210211
mime = magic.from_buffer(data, mime=True)
@@ -213,14 +214,19 @@ def extract(data, fmt):
213214
m = magic.open(magic.MAGIC_MIME)
214215
m.load()
215216
mime = m.buffer(data).split(';')[0]
217+
if mime.startswith('text/'):
218+
return data
219+
extracted = None
216220
if mime == 'application/zstd' or fmt.endswith('zst'):
217-
return unzstd(data)
218-
if mime == 'application/x-xz' or fmt.endswith('xz'):
219-
return unxz(data)
221+
extracted = unzstd(data)
222+
elif mime == 'application/x-xz' or fmt.endswith('xz'):
223+
extracted = unxz(data)
220224
elif mime == 'application/x-bzip2' or fmt.endswith('bz2'):
221-
return bunzip2(data)
225+
extracted = bunzip2(data)
222226
elif mime == 'application/gzip' or fmt.endswith('gz'):
223-
return gunzip(data)
227+
extracted = gunzip(data)
228+
if extracted is not None:
229+
return extracted
224230
return data
225231

226232

0 commit comments

Comments
 (0)