|
3 | 3 | import asyncio |
4 | 4 | import tempfile |
5 | 5 | import time |
| 6 | +import zipfile |
6 | 7 | from pathlib import Path |
7 | 8 | from typing import TYPE_CHECKING |
8 | 9 |
|
@@ -203,28 +204,28 @@ async def wait_and_download( |
203 | 204 | output_dir: str | Path | None = None, |
204 | 205 | extract: bool = True, |
205 | 206 | ) -> list[Path]: |
206 | | - """Wait for an export job to complete and download the exported files. |
| 207 | + """Wait for a job to complete and download the result files. |
207 | 208 |
|
208 | 209 | Polls the job status at the given interval until the job is FINISHED, |
209 | | - FAILED, or CANCELLED, then downloads and extracts the exported data files. |
| 210 | + FAILED, or CANCELLED, then downloads the result files. |
210 | 211 |
|
211 | 212 | Args: |
212 | | - job: The export Job or job ID to wait for. |
| 213 | + job: The Job or job ID to wait for. |
213 | 214 | polling_interval_secs: Seconds between status polls. Defaults to 5. |
214 | 215 | timeout_secs: Maximum seconds to wait. If None, polls indefinitely. |
215 | | - output_dir: Directory to save the extracted files. If omitted, a |
| 216 | + output_dir: Directory to save the downloaded files. If omitted, a |
216 | 217 | temporary directory is created automatically. |
217 | | - extract: If True (default), extract the zip and delete it, |
218 | | - returning paths to the extracted files. If False, keep the |
219 | | - zip file and return its path. |
| 218 | + extract: If True (default) and the downloaded file is a zip, |
| 219 | + extract it and delete the archive, returning paths to the |
| 220 | + extracted files. Non-zip files are returned as-is regardless |
| 221 | + of this flag. |
220 | 222 |
|
221 | 223 | Returns: |
222 | | - List of paths to the extracted data files, or a single-element |
223 | | - list containing the zip path if extract is False. |
| 224 | + List of paths to the downloaded/extracted files. |
224 | 225 |
|
225 | 226 | Raises: |
226 | | - RuntimeError: If the export job fails or is cancelled. |
227 | | - TimeoutError: If the export job does not complete within timeout_secs. |
| 227 | + RuntimeError: If the job fails or is cancelled. |
| 228 | + TimeoutError: If the job does not complete within timeout_secs. |
228 | 229 | """ |
229 | 230 | job_id = job._id_or_error if isinstance(job, Job) else job |
230 | 231 |
|
@@ -253,14 +254,14 @@ async def wait_and_download( |
253 | 254 | if output_dir is not None |
254 | 255 | else Path(tempfile.mkdtemp(prefix="sift_export_")) |
255 | 256 | ) |
256 | | - zip_file_path = output_dir / f"{job_id}.zip" |
| 257 | + download_path = output_dir / job_id |
257 | 258 |
|
258 | 259 | # Run the synchronous download in a thread pool to avoid blocking the event loop |
259 | 260 | rest_client = self.client.rest_client |
260 | 261 | await run_sync_function( |
261 | | - lambda: download_file(presigned_url, zip_file_path, rest_client=rest_client) |
| 262 | + lambda: download_file(presigned_url, download_path, rest_client=rest_client) |
262 | 263 | ) |
263 | 264 |
|
264 | | - if not extract: |
265 | | - return [zip_file_path] |
266 | | - return extract_zip(zip_file_path, output_dir) |
| 265 | + if not extract or not zipfile.is_zipfile(download_path): |
| 266 | + return [download_path] |
| 267 | + return extract_zip(download_path, output_dir) |
0 commit comments