1- from typing import Generator , AsyncGenerator , Union
1+ import asyncio
2+ from typing import Generator , AsyncGenerator , Union , Optional
23
34from . import consts
45from .BaseFile import BaseFile
6+ from .FilePrefetcher import FilePrefetcher
57from .ZipBase import ZipBase
68import copy
79import types
810
11+
912def process_file_names (files ) -> list [BaseFile ]:
1013 """Renames duplicated file names"""
1114 seen_names = set ()
@@ -185,7 +188,7 @@ def _make_end_structures(self) -> Generator[bytes, None, None]:
185188 yield self ._apply_remaining_offset (self ._make_end_of_cdir_record ())
186189
187190 async def _async_stream_single_file (self , file : BaseFile ) -> AsyncGenerator [bytes , None ]:
188- """This function streams a single file, it also applies running_offset is needed"""
191+ """This function streams a single file, it also applies remaining_offset if needed"""
189192
190193 yield self ._apply_remaining_offset (self ._make_local_file_header (file ))
191194
@@ -225,6 +228,51 @@ async def async_stream(self) -> AsyncGenerator[bytes, None]:
225228
226229 # self._cleanup()
227230
231+ async def async_stream_parallel (self , prefetch_files : int = 20 , max_chunks_per_file : int = 2 ):
232+ """
233+ Stream files in parallel.
234+ - prefetch_files: number of files' DATA to read ahead concurrently
235+ - queue_maxsize: per-file buffered DATA chunks (backpressure)
236+ """
237+ self ._check_if_can_stream ()
238+ start_idx , remaining_offset = self ._find_starting_file ()
239+ self ._remaining_offset = remaining_offset
240+ self ._set_offset (self ._byte_offset - remaining_offset )
241+
242+ if start_idx is not None :
243+
244+ files = self ._files [start_idx :]
245+ prefetch_mgr = FilePrefetcher (files , prefetch_files , max_chunks_per_file )
246+
247+ for i , file in enumerate (files ):
248+ await prefetch_mgr .ensure_prefetch (i )
249+
250+ # 1) Local File Header
251+ file .set_offset (self ._get_offset ())
252+ header = self ._make_local_file_header (file )
253+ header = self ._apply_remaining_offset (header )
254+ self ._add_offset (len (header ))
255+ if header :
256+ yield header
257+
258+ # 2) Stream DATA
259+ async for chunk in prefetch_mgr .stream_file_data (i ):
260+ out = self ._apply_remaining_offset (chunk )
261+ if out :
262+ self ._add_offset (len (out ))
263+ yield out
264+
265+ # 3) Data Descriptor
266+ dd = self ._make_data_descriptor (file )
267+ dd = self ._apply_remaining_offset (dd )
268+ self ._add_offset (len (dd ))
269+ if dd :
270+ yield dd
271+
272+ # 4) Central directory & end records
273+ for chunk in self ._make_end_structures ():
274+ yield chunk
275+
228276 def stream (self ) -> Generator [bytes , None , None ]:
229277 self ._check_if_can_stream ()
230278
@@ -243,8 +291,6 @@ def stream(self) -> Generator[bytes, None, None]:
243291 for chunk in self ._make_end_structures ():
244292 yield chunk
245293
246- # self._cleanup()
247-
248294 def _check_if_can_stream (self ):
249295 if self .__used :
250296 raise RuntimeError ("Do not re-use zipFly instances. Recreate it." )
@@ -266,10 +312,3 @@ def _apply_remaining_offset(self, data):
266312 self ._add_offset (self ._remaining_offset )
267313 self ._remaining_offset = 0 # Offset is fully applied
268314 return result
269-
270- # def _cleanup(self):
271- # pass
272- # """Clean all data after streaming"""
273- # super()._cleanup()
274- # self._remaining_offset = 0
275- # # self.__used = False
0 commit comments