Skip to content

Commit 0121b2c

Browse files
committed
Small update
1 parent 6d4da8f commit 0121b2c

1 file changed

Lines changed: 63 additions & 21 deletions

File tree

pyarchivefile/pyarchivefile.py

Lines changed: 63 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import inspect
3939
import tempfile
4040
import configparser
41+
from zoneinfo import ZoneInfo
4142
from io import open, StringIO, BytesIO
4243
__enable_pywwwget__ = True
4344
pywwwget = False
@@ -1075,30 +1076,71 @@ def format_ns_local(ts_ns, fmt='%Y-%m-%d %H:%M:%S'):
10751076
ns_str = "%09d" % ns
10761077
return base + "." + ns_str
10771078

1078-
def get_unix_timestamp_zip(member):
1079+
WINDOWS_EPOCH_DELTA = 11644473600 # seconds between 1601-01-01 and 1970-01-01
1080+
1081+
def _filetime_to_unix_seconds(filetime: int) -> int:
1082+
# FILETIME is 100-ns intervals since 1601-01-01 UTC
1083+
return int(filetime // 10_000_000 - WINDOWS_EPOCH_DELTA)
1084+
1085+
def _parse_ext_timestamp_0x5455(extra_data: bytes) -> int | None:
1086+
# Layout: [flags:1][mtime?:4][atime?:4][ctime?:4] (only if flags bits set)
1087+
if len(extra_data) < 1:
1088+
return None
1089+
flags = extra_data[0]
1090+
off = 1
1091+
if flags & 0x01:
1092+
if off + 4 <= len(extra_data):
1093+
(mtime,) = struct.unpack_from("<I", extra_data, off)
1094+
return int(mtime)
1095+
return None
1096+
1097+
def _parse_ntfs_0x000a(extra_data: bytes) -> int | None:
1098+
# Layout: [reserved:4] then attributes:
1099+
# attr_tag(2), attr_size(2), attr_data(attr_size)
1100+
if len(extra_data) < 4:
1101+
return None
1102+
off = 4
1103+
while off + 4 <= len(extra_data):
1104+
attr_tag, attr_size = struct.unpack_from("<HH", extra_data, off)
1105+
off += 4
1106+
if off + attr_size > len(extra_data):
1107+
break
1108+
attr = extra_data[off:off + attr_size]
1109+
off += attr_size
1110+
1111+
# 0x0001 attribute contains 3 FILETIMEs: mtime, atime, ctime (each 8 bytes)
1112+
if attr_tag == 0x0001 and len(attr) >= 24:
1113+
(mtime_filetime,) = struct.unpack_from("<Q", attr, 0)
1114+
return _filetime_to_unix_seconds(mtime_filetime)
1115+
return None
1116+
1117+
def get_unix_timestamp_zip(member, fallback_tz: str = "America/Chicago") -> int:
10791118
extra = member.extra
10801119
i = 0
1081-
1082-
# 1. Try to find UTC Extra Fields
1083-
while i + 4 <= len(extra):
1084-
tag, length = struct.unpack('<HH', extra[i:i+4])
1085-
data = extra[i+4 : i+4+length]
1086-
1087-
# 0x5455: Info-ZIP (Unix)
1088-
if tag == 0x5455 and len(data) >= 5:
1089-
if data[0] & 1:
1090-
return struct.unpack('<I', data[1:5])[0]
1091-
1092-
# 0x000a: NTFS (Windows)
1093-
elif tag == 0x000a and len(data) >= 24:
1094-
ntfs_mtime = struct.unpack('<Q', data[8:16])[0]
1095-
return int((ntfs_mtime / 1e7) - 11644473600)
1096-
1097-
i += 4 + length
10981120

1099-
# 2. Fallback: Convert MS-DOS date_time to Unix integer
1100-
dt = datetime.datetime(*member.date_time)
1101-
return int(dt.replace(tzinfo=datetime.timezone.utc).timestamp())
1121+
# 1) Prefer UTC-capable extra fields
1122+
while i + 4 <= len(extra):
1123+
tag, length = struct.unpack_from("<HH", extra, i)
1124+
i += 4
1125+
data = extra[i:i+length]
1126+
i += length
1127+
1128+
if tag == 0x5455:
1129+
ts = _parse_ext_timestamp_0x5455(data)
1130+
if ts is not None:
1131+
return ts
1132+
1133+
elif tag == 0x000A:
1134+
ts = _parse_ntfs_0x000a(data)
1135+
if ts is not None:
1136+
return ts
1137+
1138+
# 2) Fallback: DOS local time -> interpret in fallback_tz -> UTC
1139+
# ZIP DOS timestamps are "local time" with no TZ info.
1140+
local_naive = datetime.datetime(*member.date_time)
1141+
local_dt = local_naive.replace(tzinfo=ZoneInfo(fallback_tz))
1142+
utc_dt = local_dt.astimezone(datetime.timezone.utc)
1143+
return int(utc_dt.timestamp())
11021144

11031145
def CheckSumSupport(checkfor, guaranteed=True):
11041146
if(guaranteed):

0 commit comments

Comments
 (0)