Skip to content

Commit fe92708

Browse files
author
Pamparampam
committed
Added robust checks, and changed the api layout again. Byte offset mode parameter was moved to ZipFly instead of stream()
1 parent 20ad061 commit fe92708

11 files changed

Lines changed: 261 additions & 101 deletions

File tree

MANIFEST.IN

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
exclude .gitignore
2-
exclude dev-requirements.txt
2+
exclude dev-requirements.txt
3+
prune tests

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ file2 = LocalFile(file_path='files/as61aade2ebfd.mp4', compression_method=consts
7676
files = [file1, file2]
7777

7878
zipFly = ZipFly(files)
79-
archive_size = zipFly.calculate_archive_size() # raises ValueError if it can't calculate size
79+
archive_size = zipFly.calculate_archive_size() # raises RuntimeError if it can't calculate size
8080

8181
# for example you can set as content length in http response
8282
response['Content-Length'] = archive_size
@@ -143,11 +143,11 @@ async def async_save_pause():
143143
file3 = GenFile(name="file.txt", generator=file_generator(), crc=crc)
144144
file4 = LocalFile(file_path='public/2ae9dcd01a3aa.mp4', name="files/my_file2.mp4")
145145
files2 = [file3, file4]
146-
zipFly2 = ZipFly(files2)
146+
resumeZipFly = ZipFly(files2, byte_offset=STOP_BYTE)
147147

148148
async def async_save_resume():
149149
with open("out/file.zip", 'ab') as f_out: # Append mode
150-
async for chunk in zipFly2.async_stream(byte_offset=STOP_BYTE):
150+
async for chunk in resumeZipFly.async_stream():
151151
f_out.write(chunk)
152152

153153
async def pause_resume_save():

src/zipFly/BaseFile.py

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,29 +7,34 @@
77

88

99
class BaseFile(ABC):
10-
def __init__(self, compression_method: int):
10+
def __init__(self, name: str, compression_method: int):
1111
self.__used = False
1212
self.__compressed_size = 0
1313
self.__offset = 0 # Offset to local file header
1414
self.__crc = 0
1515
self.__compression_method = compression_method or consts.NO_COMPRESSION
1616
self.__flags = 0b00001000 # flag about using data descriptor is always on
17+
self.__byte_offset_mode = False
18+
if name == "":
19+
raise KeyError("File name cannot be blank.")
20+
self._name = name
1721

1822
def __str__(self):
19-
return f"FILE[{self.name}]"
23+
return f"BaseFile[name={self.name}]"
24+
25+
def __repr__(self):
26+
return f"BaseFile({self.name})"
2027

2128
def _check_if_used(self):
2229
if self.__used:
2330
raise RuntimeError("Do not re-use file instances. Recreate it.")
2431
self.__used = True
2532

2633
def generate_processed_file_data(self) -> Generator[bytes, None, None]:
34+
"""Generates compressed file data"""
2735
self._check_if_used()
2836
compressor = Compressor(self)
2937

30-
"""
31-
Generates compressed file data
32-
"""
3338
for chunk in self._generate_file_data():
3439
chunk = compressor.process(chunk)
3540
if len(chunk) > 0:
@@ -39,12 +44,10 @@ def generate_processed_file_data(self) -> Generator[bytes, None, None]:
3944
yield chunk
4045

4146
async def async_generate_processed_file_data(self) -> AsyncGenerator[bytes, None]:
47+
"""Generates compressed file data"""
4248
self._check_if_used()
4349
compressor = Compressor(self)
4450

45-
"""
46-
Generates compressed file data
47-
"""
4851
async for chunk in self._async_generate_file_data():
4952
chunk = compressor.process(chunk)
5053
if len(chunk) > 0:
@@ -85,6 +88,15 @@ def get_crc(self) -> int:
8588
def set_crc(self, new_crc) -> None:
8689
self.__crc = new_crc
8790

91+
def set_byte_offset_mode(self, value) -> None:
92+
self.__byte_offset_mode = value
93+
94+
def is_byte_offset_mode(self) -> bool:
95+
return self.__byte_offset_mode
96+
97+
def set_file_name(self, new_name: str) -> None:
98+
self._name = new_name
99+
88100
@property
89101
def file_path_bytes(self) -> bytes:
90102
try:
@@ -102,11 +114,7 @@ async def _async_generate_file_data(self) -> AsyncGenerator[bytes, None]:
102114
raise NotImplementedError
103115

104116
@abstractmethod
105-
def set_file_name(self, new_name: str) -> None:
106-
raise NotImplementedError
107-
108-
@abstractmethod
109-
def calculate_crc(self) -> int:
117+
def get_predicted_crc(self) -> int:
110118
raise NotImplementedError
111119

112120
@property
@@ -117,14 +125,14 @@ def size(self) -> int:
117125
def modification_time(self) -> float:
118126
raise NotImplementedError
119127

120-
@property
121-
def name(self) -> str:
122-
raise NotImplementedError
123-
124128
@property
125129
def flags(self) -> int:
126130
return self.__flags
127131

132+
@property
133+
def name(self) -> str:
134+
return self._name
135+
128136
@property
129137
def compression_method(self) -> int:
130138
return self.__compression_method

src/zipFly/Compressor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ def __init__(self, file: 'BaseFile'):
1212
self.compr = zlib.compressobj(5, zlib.DEFLATED, -15)
1313
self.process = self._process_deflate
1414
self.tail = self._tail_deflate
15+
else:
16+
raise KeyError("Unknown compression method in compressor")
1517

1618
# no compression
1719
def _process_through(self, chunk):

src/zipFly/EmptyFolder.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import time
2+
from typing import Generator, AsyncGenerator
3+
4+
from . import consts
5+
from .BaseFile import BaseFile
6+
7+
8+
class EmptyFolder(BaseFile):
9+
def __init__(self, name: str, modification_time: float = None):
10+
super().__init__(name, consts.NO_COMPRESSION)
11+
if not name.endswith("/"):
12+
name += "/"
13+
self._name = name
14+
self._modification_time = modification_time if modification_time else time.time()
15+
self._finished_streaming = False
16+
17+
def __str__(self):
18+
return f"EmptyFolder[name={self.name}]"
19+
20+
def __repr__(self):
21+
return f"EmptyFolder({self.name})"
22+
23+
def _generate_file_data(self) -> Generator[bytes, None, None]:
24+
yield b''
25+
26+
async def _async_generate_file_data(self) -> AsyncGenerator[bytes, None]:
27+
yield b''
28+
29+
@property
30+
def size(self) -> int:
31+
return 0
32+
33+
@property
34+
def modification_time(self) -> float:
35+
return self._modification_time
36+
37+
def get_predicted_crc(self) -> int:
38+
return 0

src/zipFly/GenFile.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,49 +8,72 @@
88
class GenFile(BaseFile):
99
"""DO NOT REUSE GenFile instances!"""
1010
def __init__(self, name: str, generator: Union[Generator[bytes, None, None], AsyncGenerator[bytes, None]], compression_method: int = consts.NO_COMPRESSION, modification_time: float = None, size: int = None, crc: int = None):
11-
super().__init__(compression_method)
12-
self._name = name
11+
if size and compression_method != consts.NO_COMPRESSION:
12+
raise ValueError("File size is allowed only with NO_COMPRESSION")
13+
14+
super().__init__(name, compression_method)
1315
self._generator = generator
14-
self._size = size
15-
self._overriden_crc = crc # used in byte offset mode
16+
self._predicted_size = size
17+
self._streamed_size = 0
18+
self._predicted_crc = crc # used in byte offset mode
1619
self._modification_time = modification_time if modification_time else time.time()
20+
self._finished_streaming = False
21+
22+
def __str__(self):
23+
return f"GenFile[name={self.name}]"
24+
25+
def __repr__(self):
26+
return f"GenFile({self.name})"
1727

1828
def _get_generator(self):
1929
return self._generator
2030

2131
def _generate_file_data(self) -> Generator[bytes, None, None]:
2232
generator = self._get_generator()
2333
if isinstance(generator, Generator):
24-
yield from generator
34+
for chunk in generator:
35+
self._streamed_size += len(chunk)
36+
yield chunk
2537
else:
2638
raise ValueError(f"generator must be of type Generator, not '{type(generator)}'")
2739

40+
self._finish_and_validate()
41+
2842
async def _async_generate_file_data(self) -> AsyncGenerator[bytes, None]:
2943
generator = self._get_generator()
3044
if isinstance(generator, AsyncGenerator):
3145
async for chunk in generator:
46+
self._streamed_size += len(chunk)
3247
yield chunk
3348
else:
3449
raise ValueError(f"generator must be of type AsyncGenerator, not '{type(generator)}'")
3550

36-
@property
37-
def name(self) -> str:
38-
return self._name
51+
self._finish_and_validate()
52+
53+
def _finish_and_validate(self):
54+
self._finished_streaming = True
55+
if self._predicted_size is not None and self._predicted_size != self._streamed_size:
56+
raise RuntimeError(f"Size({self._predicted_size}) != streamed size({self._streamed_size})")
57+
58+
if self._predicted_crc is not None and self._predicted_crc != self.get_crc():
59+
raise RuntimeError(f"Crc({self._predicted_crc}) != streamed crc({self.get_crc()})")
3960

4061
@property
4162
def size(self) -> int:
42-
if self._size is not None:
43-
return self._size
44-
raise RuntimeError("Archive size not known before streaming. Probably GenFile() is missing size attribute.")
63+
if not self._finished_streaming:
64+
if self.is_byte_offset_mode():
65+
if self._predicted_size is None:
66+
raise RuntimeError("Archive size not known before streaming. Probably GenFile() is missing size attribute.")
67+
return self._predicted_size
68+
return self._streamed_size
4569

4670
@property
4771
def modification_time(self) -> float:
4872
return self._modification_time
4973

50-
def set_file_name(self, new_name: str) -> None:
51-
self._name = new_name
52-
53-
def calculate_crc(self) -> int:
54-
if self._overriden_crc:
55-
return self._overriden_crc
56-
raise ValueError("Crc must be explicitly set to allow for byte offset streaming!")
74+
def get_predicted_crc(self) -> int:
75+
if self.is_byte_offset_mode():
76+
if self._predicted_crc is None:
77+
raise RuntimeError("Crc not known before streaming. Probably GenFile() is missing crc attribute.")
78+
return self._predicted_crc
79+
raise ValueError("Attempted to get predicted crc without byte offset mode")

src/zipFly/LocalFile.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,19 @@ def __init__(self, file_path: Union[str, Path], name: str = None, compression_me
1717

1818
self._file_path = str(file_path)
1919
self.chunk_size = chunk_size
20-
self._name = name if name else self._file_path
21-
super().__init__(compression_method)
20+
name = name if name else self._file_path
21+
super().__init__(name, compression_method)
22+
23+
def __str__(self):
24+
return f"LocalFile[name={self.name}]"
25+
26+
def __repr__(self):
27+
return f"LocalFile({self.name})"
2228

2329
async def _async_generate_file_data(self) -> AsyncGenerator[bytes, None]:
2430
if not self.chunk_size:
2531
self.chunk_size = 1048 * 1048 * 4
32+
2633
async with aiofiles.open(self._file_path, "rb") as fh:
2734
while True:
2835
part = await fh.read(self.chunk_size)
@@ -41,10 +48,6 @@ def _generate_file_data(self) -> Generator[bytes, None, None]:
4148
break
4249
yield chunk
4350

44-
@property
45-
def name(self) -> str:
46-
return self._name
47-
4851
@property
4952
def size(self) -> int:
5053
return os.path.getsize(self._file_path)
@@ -54,10 +57,7 @@ def modification_time(self) -> float:
5457
"""Returns the modification time as a Unix timestamp"""
5558
return os.path.getmtime(self._file_path)
5659

57-
def set_file_name(self, new_name: str) -> None:
58-
self._name = new_name
59-
60-
def calculate_crc(self) -> int:
60+
def get_predicted_crc(self) -> int:
6161
crc = 0
6262
with open(self._file_path, "rb") as f:
6363
while chunk := f.read(self.chunk_size):

0 commit comments

Comments
 (0)