Skip to content

Commit e68e76e

Browse files
committed
Optimize _read_gzip_header for the most common code paths
Those are: + Only FNAME set. (Created by gzip and python's GzipFile) + No flags set. (Created by gzip.compress and zlib.compress with wbits=31)
1 parent 3d5bb47 commit e68e76e

File tree

1 file changed

+29
-19
lines changed

1 file changed

+29
-19
lines changed

Lib/gzip.py

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -431,48 +431,58 @@ def _read_gzip_header(fp):
431431
if method != 8:
432432
raise BadGzipFile('Unknown compression method')
433433

434-
# FHCRC will be checked often. So save the result of the check.
435-
fhcrc = bool(flag & FHCRC)
436-
# Only create and append to a list of header parts when FHCRC is set.
437-
# In the most common use cases FHCRC is not set. So we optimize for those
438-
# cases.
439-
if fhcrc:
440-
header_parts = [magic, base_header]
434+
# No flags. No need for further parsing. These headers are returned by
435+
# gzip.compress or zlib.compress(..., wbits=31)
436+
if not flag:
437+
return last_mtime
438+
# Most gzip files will have only FNAME set. For example: produced by gzip
439+
# command line application or python's GzipFile.
440+
if flag == FNAME:
441+
while True:
442+
s = fp.read(1)
443+
if not s:
444+
raise EOFError("Compressed file ended before the "
445+
"end-of-stream marker was reached")
446+
if s == b'\000':
447+
break
448+
return last_mtime
449+
450+
# Processing for more complex flags.
451+
452+
# Save header parts for FHCRC checking
453+
header_parts = [magic, base_header]
441454

442455
if flag & FEXTRA:
443-
# Read the extra field, if present, save the fields if FHCRC is set.
456+
# Read the extra field, if present, save the fields for FHCRC checking.
444457
extra_len_bytes = _read_exact(fp, 2)
445458
extra_len, = struct.unpack("<H", extra_len_bytes)
446459
extra = _read_exact(fp, extra_len)
447-
if fhcrc:
448-
header_parts.extend([extra_len_bytes, extra])
460+
header_parts.extend([extra_len_bytes, extra])
449461

450462
if flag & FNAME:
451-
# Read a null-terminated string containing the filename. Save it
452-
# if FHCRC is set.
463+
# Read a null-terminated string containing the filename, save the name
464+
# for FHCRC checking.
453465
while True:
454466
s = fp.read(1)
455467
if not s:
456468
raise EOFError("Compressed file ended before the "
457469
"end-of-stream marker was reached")
458-
if fhcrc:
459-
header_parts.append(s)
470+
header_parts.append(s)
460471
if s == b'\000':
461472
break
462473
if flag & FCOMMENT:
463-
# Read a null-terminated string containing the filename. Save it
464-
# if FHCRC is set.
474+
# Read a null-terminated string containing the comment, save the
475+
# comment for FHCRC checking.
465476
while True:
466477
s = fp.read(1)
467478
if not s:
468479
raise EOFError("Compressed file ended before the "
469480
"end-of-stream marker was reached")
470-
if fhcrc:
471-
header_parts.append(s)
481+
header_parts.append(s)
472482
if s == b'\000':
473483
break
474484

475-
if fhcrc:
485+
if flag & FHCRC:
476486
# Read the 16-bit header CRC and check it against the header.
477487
header_crc, = struct.unpack("<H", _read_exact(fp, 2))
478488
header = b"".join(header_parts)

0 commit comments

Comments
 (0)