1919class DownloadStrategy (Enum ):
2020 """Available download strategies for repository content."""
2121
22- ARCHIVE = "archive" # Download as ZIP/TAR archive
23- INDIVIDUAL = "individual" # Download files individually via API
24- GIT_CLONE = "git_clone" # Use git clone (for complete history)
25- SPARSE_CHECKOUT = "sparse" # Git sparse-checkout for partial downloads
22+ ARCHIVE = "archive" # Download as ZIP/TAR archive
23+ INDIVIDUAL = "individual" # Download files individually via API
24+ GIT_CLONE = "git_clone" # Use git clone (for complete history)
25+ SPARSE_CHECKOUT = "sparse" # Git sparse-checkout for partial downloads
2626
2727
2828class DownloadStatus (Enum ):
@@ -61,14 +61,18 @@ def matches_path(self, path: str) -> bool:
6161 return False
6262
6363 if self .include_patterns :
64- if not any (fnmatch .fnmatch (path , pattern ) for pattern in self .include_patterns ):
64+ if not any (
65+ fnmatch .fnmatch (path , pattern ) for pattern in self .include_patterns
66+ ):
6567 return False
6668
6769 if self .exclude_patterns :
6870 if any (fnmatch .fnmatch (path , pattern ) for pattern in self .exclude_patterns ):
6971 return False
7072
71- if (not self .include_hidden and any (part .startswith ('.' ) for part in _Path (path ).parts )):
73+ if not self .include_hidden and any (
74+ part .startswith ("." ) for part in _Path (path ).parts
75+ ):
7276 return False
7377
7478 file_ext = _Path (path ).suffix .lower ()
@@ -102,6 +106,7 @@ class DownloadRequest:
102106 max_concurrent_downloads : int = 5
103107 chunk_size : int = 8192
104108 timeout : int = 300
109+ stream_threshold : int = 10 * 1024 * 1024 # 10 MB default
105110
106111 # Authentication
107112 token : Optional [str ] = None
@@ -110,7 +115,9 @@ class DownloadRequest:
110115 dry_run : bool = False
111116
112117 # Metadata
113- request_id : str = field (default_factory = lambda : f"req_{ datetime .now ().strftime ('%Y%m%d_%H%M%S' )} " )
118+ request_id : str = field (
119+ default_factory = lambda : f"req_{ datetime .now ().strftime ('%Y%m%d_%H%M%S' )} "
120+ )
114121 created_at : datetime = field (default_factory = datetime .now )
115122
116123 def __post_init__ (self ) -> None :
@@ -122,6 +129,8 @@ def __post_init__(self) -> None:
122129 raise ValueError ("chunk_size must be positive" )
123130 if self .timeout <= 0 :
124131 raise ValueError ("timeout must be positive" )
132+ if self .stream_threshold < 0 :
133+ raise ValueError ("stream_threshold must be non-negative" )
125134
126135
127136@dataclass
@@ -170,7 +179,9 @@ def files_percentage(self) -> float:
170179 def elapsed_time (self ) -> float :
171180 return (datetime .now () - self .started_at ).total_seconds ()
172181
173- def update_file_progress (self , bytes_downloaded : int , current_file : Optional [str ] = None ) -> None :
182+ def update_file_progress (
183+ self , bytes_downloaded : int , current_file : Optional [str ] = None
184+ ) -> None :
174185 self .downloaded_bytes += bytes_downloaded
175186 if current_file :
176187 self .current_file = current_file
@@ -219,11 +230,17 @@ def success_rate(self) -> float:
219230
220231 def mark_completed (self ) -> None :
221232 self .completed_at = datetime .now ()
222- self .status = DownloadStatus .COMPLETED if not self .failed_files else DownloadStatus .FAILED
233+ self .status = (
234+ DownloadStatus .COMPLETED if not self .failed_files else DownloadStatus .FAILED
235+ )
223236 if self .completed_at :
224- self .total_download_time = (self .completed_at - self .started_at ).total_seconds ()
237+ self .total_download_time = (
238+ self .completed_at - self .started_at
239+ ).total_seconds ()
225240 if self .total_download_time > 0 and self .progress .downloaded_bytes > 0 :
226- self .average_speed = self .progress .downloaded_bytes / self .total_download_time
241+ self .average_speed = (
242+ self .progress .downloaded_bytes / self .total_download_time
243+ )
227244
228245
229246@dataclass
0 commit comments