Skip to content

Commit 27ac1de

Browse files
author
Pamparampam
committed
Fixed utf8 flag not being set correctly
1 parent a3a2cfb commit 27ac1de

4 files changed

Lines changed: 64 additions & 4 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
66

77
[project]
88
name = "zipFly64"
9-
version = "1.3.1"
9+
version = "1.3.2"
1010
description = "Stream zip64 archives on the fly."
1111
readme = "README.md"
1212
authors = [{ name = "Pamparampampam" }]

src/zipFly/BaseFile.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from . import consts
66
from .Compressor import Compressor
7+
from .consts import DATA_DESCRIPTOR_FLAG
78

89

910
class BaseFile(ABC):
@@ -13,7 +14,7 @@ def __init__(self, name: str, compression_method: int = consts.NO_COMPRESSION):
1314
self.__offset = 0 # Offset to local file header
1415
self.__crc = 0
1516
self.__compression_method = compression_method
16-
self.__flags = 0b00001000 # flag about using data descriptor is always on
17+
self.__flags = DATA_DESCRIPTOR_FLAG # flag about using data descriptor is always on
1718
self.__byte_offset_mode = False
1819
if name == "":
1920
raise KeyError("File name cannot be blank.")
@@ -102,8 +103,9 @@ def file_path_bytes(self) -> bytes:
102103
try:
103104
return self.name.encode("ascii")
104105
except UnicodeError:
106+
print("UNICOE ERROR")
105107
self.__flags |= consts.UTF8_FLAG
106-
return self.name.encode("utf-8")
108+
return self.name.encode()
107109

108110
@abstractmethod
109111
def _generate_file_data(self) -> Generator[bytes, None, None]:
@@ -127,6 +129,7 @@ def modification_time(self) -> float:
127129

128130
@property
129131
def flags(self) -> int:
132+
_ = self.file_path_bytes # trigger to set utf8 flag if needed
130133
return self.__flags
131134

132135
@property

src/zipFly/consts.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
# ZIP CONSTANTS
55
ZIP64_VERSION = 45
66
VERSION_MADE_BY = 0x0A45 # Windows and ZIP version 45
7-
UTF8_FLAG = 0x800 # utf-8 filename encoding flag
7+
UTF8_FLAG = 0x0800 # utf-8 filename encoding flag
8+
DATA_DESCRIPTOR_FLAG = 0x08 # flag that signalises that a data descriptor is used
89

910
# ZIP COMPRESSION METHODS
1011
NO_COMPRESSION = 0

tests/test_zipfly.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Run tests of ZipFly."""
22
import re
3+
import struct
34
import time
45
import zipfile
56
import zlib
@@ -654,3 +655,58 @@ def test_EmptyFolder_creates_empty_directory(tmp_path):
654655
with zfp.open(folder_name) as folder_file:
655656
content = folder_file.read()
656657
assert content == b""
658+
659+
def _read_local_flags(data, offset):
660+
return struct.unpack_from("<H", data, offset + 6)[0]
661+
662+
663+
def _read_central_flags(data, offset):
664+
return struct.unpack_from("<H", data, offset + 8)[0]
665+
666+
667+
def test_non_ascii_file_names_utf8_flag(tmp_path):
668+
file_path = tmp_path / "input.bin"
669+
file_path.write_bytes(lorem_ipsum)
670+
671+
files = [
672+
LocalFile(file_path=file_path, name="zażółć.txt"),
673+
LocalFile(file_path=file_path, name="файл.txt"),
674+
LocalFile(file_path=file_path, name="αρχείο.txt"),
675+
LocalFile(file_path=file_path, name="ファイル.txt"),
676+
LocalFile(file_path=file_path, name="文件.txt"),
677+
]
678+
679+
zip_path = tmp_path / "non_ascii_flags.zip"
680+
zipfly = ZipFly(files)
681+
682+
with open(zip_path, "wb") as f:
683+
for chunk in zipfly.stream():
684+
f.write(chunk)
685+
686+
data = zip_path.read_bytes()
687+
688+
UTF8_FLAG = 0x0800
689+
690+
# --- scan local headers ---
691+
offset = 0
692+
while True:
693+
sig = data.find(b"PK\x03\x04", offset)
694+
if sig == -1:
695+
break
696+
697+
flags = _read_local_flags(data, sig)
698+
assert flags & UTF8_FLAG, f"UTF-8 flag missing in local header at {sig}"
699+
700+
offset = sig + 4
701+
702+
# --- scan central directory ---
703+
offset = 0
704+
while True:
705+
sig = data.find(b"PK\x01\x02", offset)
706+
if sig == -1:
707+
break
708+
709+
flags = _read_central_flags(data, sig)
710+
assert flags & UTF8_FLAG, f"UTF-8 flag missing in central dir at {sig}"
711+
712+
offset = sig + 4

0 commit comments

Comments
 (0)