-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathfile.py
More file actions
63 lines (49 loc) · 2.14 KB
/
Copy pathfile.py
File metadata and controls
63 lines (49 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from __future__ import annotations
import warnings
import zipfile
from typing import TYPE_CHECKING
from sift_client.errors import SiftWarning
if TYPE_CHECKING:
from pathlib import Path
from sift_client.transport.rest_transport import RestClient
def download_file(signed_url: str, output_path: Path, *, rest_client: RestClient) -> Path:
"""Download a file from a URL in streaming 4 MiB chunks.
Args:
url: The URL to download from.
dest: Path where the file will be saved. Parent directories are created if needed.
rest_client: The SDK rest client to use for the download.
Returns:
The path to the downloaded file.
Raises:
requests.HTTPError: If the download request fails.
"""
output_path.parent.mkdir(parents=True, exist_ok=True)
# Strip the session's default Authorization header, presigned URLs carry their own auth
with rest_client.get(signed_url, stream=True, headers={"Authorization": None}) as response:
response.raise_for_status()
with output_path.open("wb") as file:
for chunk in response.iter_content(chunk_size=4194304): # 4 MiB
if chunk:
file.write(chunk)
return output_path
def extract_zip(zip_path: Path, output_dir: Path, *, delete_zip: bool = True) -> list[Path]:
"""Extract a zip file to a directory.
Args:
zip_path: Path to the zip file.
output_dir: Directory to extract contents into. Created if it doesn't exist.
delete_zip: If True (default), delete the zip file after extraction.
Returns:
List of paths to the extracted files (excludes directories).
Raises:
zipfile.BadZipFile: If the file is not a valid zip.
"""
output_dir.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(zip_path, "r") as zip_file:
names = zip_file.namelist()
zip_file.extractall(output_dir)
if delete_zip:
try:
zip_path.unlink()
except OSError:
warnings.warn(f"Failed to delete zip file '{zip_path}'", SiftWarning, stacklevel=2)
return [output_dir / name for name in names if not name.endswith("/")]