Skip to content

Commit 9a17655

Browse files
committed
python(refactor): allow wait_and_download to handle non-zip files
1 parent eee4b52 commit 9a17655

2 files changed

Lines changed: 27 additions & 26 deletions

File tree

python/lib/sift_client/resources/jobs.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import asyncio
44
import tempfile
55
import time
6+
import zipfile
67
from pathlib import Path
78
from typing import TYPE_CHECKING
89

@@ -203,28 +204,28 @@ async def wait_and_download(
203204
output_dir: str | Path | None = None,
204205
extract: bool = True,
205206
) -> list[Path]:
206-
"""Wait for an export job to complete and download the exported files.
207+
"""Wait for a job to complete and download the result files.
207208
208209
Polls the job status at the given interval until the job is FINISHED,
209-
FAILED, or CANCELLED, then downloads and extracts the exported data files.
210+
FAILED, or CANCELLED, then downloads the result files.
210211
211212
Args:
212-
job: The export Job or job ID to wait for.
213+
job: The Job or job ID to wait for.
213214
polling_interval_secs: Seconds between status polls. Defaults to 5.
214215
timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
215-
output_dir: Directory to save the extracted files. If omitted, a
216+
output_dir: Directory to save the downloaded files. If omitted, a
216217
temporary directory is created automatically.
217-
extract: If True (default), extract the zip and delete it,
218-
returning paths to the extracted files. If False, keep the
219-
zip file and return its path.
218+
extract: If True (default) and the downloaded file is a zip,
219+
extract it and delete the archive, returning paths to the
220+
extracted files. Non-zip files are returned as-is regardless
221+
of this flag.
220222
221223
Returns:
222-
List of paths to the extracted data files, or a single-element
223-
list containing the zip path if extract is False.
224+
List of paths to the downloaded/extracted files.
224225
225226
Raises:
226-
RuntimeError: If the export job fails or is cancelled.
227-
TimeoutError: If the export job does not complete within timeout_secs.
227+
RuntimeError: If the job fails or is cancelled.
228+
TimeoutError: If the job does not complete within timeout_secs.
228229
"""
229230
job_id = job._id_or_error if isinstance(job, Job) else job
230231

@@ -253,14 +254,14 @@ async def wait_and_download(
253254
if output_dir is not None
254255
else Path(tempfile.mkdtemp(prefix="sift_export_"))
255256
)
256-
zip_file_path = output_dir / f"{job_id}.zip"
257+
download_path = output_dir / job_id
257258

258259
# Run the synchronous download in a thread pool to avoid blocking the event loop
259260
rest_client = self.client.rest_client
260261
await run_sync_function(
261-
lambda: download_file(presigned_url, zip_file_path, rest_client=rest_client)
262+
lambda: download_file(presigned_url, download_path, rest_client=rest_client)
262263
)
263264

264-
if not extract:
265-
return [zip_file_path]
266-
return extract_zip(zip_file_path, output_dir)
265+
if not extract or not zipfile.is_zipfile(download_path):
266+
return [download_path]
267+
return extract_zip(download_path, output_dir)

python/lib/sift_client/sift_types/job.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -323,27 +323,27 @@ def wait_and_download(
323323
output_dir: str | Path | None = None,
324324
extract: bool = True,
325325
) -> list[Path]:
326-
"""Wait for an export job to complete and download the exported files.
326+
"""Wait for a job to complete and download the result files.
327327
328328
Polls the job status at the given interval until the job is FINISHED,
329-
FAILED, or CANCELLED, then downloads and extracts the exported data files.
329+
FAILED, or CANCELLED, then downloads the result files.
330330
331331
Args:
332332
polling_interval_secs: Seconds between status polls. Defaults to 5.
333333
timeout_secs: Maximum seconds to wait. If None, polls indefinitely.
334-
output_dir: Directory to save the extracted files. If omitted, a
334+
output_dir: Directory to save the downloaded files. If omitted, a
335335
temporary directory is created automatically.
336-
extract: If True (default), extract the zip and delete it,
337-
returning paths to the extracted files. If False, keep the
338-
zip file and return its path.
336+
extract: If True (default) and the downloaded file is a zip,
337+
extract it and delete the archive, returning paths to the
338+
extracted files. Non-zip files are returned as-is regardless
339+
of this flag.
339340
340341
Returns:
341-
List of paths to the extracted data files, or a single-element
342-
list containing the zip path if extract is False.
342+
List of paths to the downloaded/extracted files.
343343
344344
Raises:
345-
RuntimeError: If the export job fails or is cancelled.
346-
TimeoutError: If the export job does not complete within timeout_secs.
345+
RuntimeError: If the job fails or is cancelled.
346+
TimeoutError: If the job does not complete within timeout_secs.
347347
"""
348348
return self.client.jobs.wait_and_download(
349349
job=self,

0 commit comments

Comments
 (0)