Skip to content

Commit 2a6234d

Browse files
authored
Merge pull request #3782 from effigies/script/config
rf(script): Load config from file/env
2 parents 6d9a247 + 4bab540 commit 2a6234d

1 file changed

Lines changed: 112 additions & 39 deletions

File tree

scripts/s3-delete-all-versions.py

Lines changed: 112 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,131 @@
11
# /// script
22
# requires-python = ">=3.13"
33
# dependencies = [
4-
# "boto3",
4+
# "boto3>=1.42.45",
5+
# "pyyaml>=6.0.3",
6+
# "typer>=0.21.1",
57
# ]
68
# ///
7-
import argparse
8-
import logging
9+
import os
10+
import typing as ty
11+
from dataclasses import dataclass
12+
from pathlib import Path
913

1014
import boto3
11-
boto3.set_stream_logger('boto3.resources', logging.INFO)
15+
import typer
16+
import yaml
1217

13-
def nuke_prefix():
14-
client = boto3.client('s3',
15-
aws_access_key_id=AWS_ACCESS_KEY,
16-
aws_secret_access_key=AWS_SECRET_KEY)
17-
paginator = client.get_paginator('list_object_versions')
18+
19+
@dataclass
20+
class AWSConfig:
21+
AWS_ACCESS_KEY_ID: str
22+
AWS_SECRET_ACCESS_KEY: str
23+
AWS_S3_BUCKET_NAME: str
24+
AWS_REGION: str
25+
26+
@classmethod
27+
def from_dict(cls, data: dict) -> ty.Self:
28+
return cls(
29+
AWS_ACCESS_KEY_ID=data["AWS_ACCESS_KEY_ID"],
30+
AWS_SECRET_ACCESS_KEY=data["AWS_SECRET_ACCESS_KEY"],
31+
AWS_S3_BUCKET_NAME=data.get("AWS_S3_PUBLIC_BUCKET", "openneuro.org"),
32+
AWS_REGION=data.get("AWS_REGION", "us-east-1"),
33+
)
34+
35+
36+
def nuke_prefix(prefix: str, conf: AWSConfig, dry_run: bool = False) -> None:
37+
client = boto3.client(
38+
"s3",
39+
aws_access_key_id=conf.AWS_ACCESS_KEY_ID,
40+
aws_secret_access_key=conf.AWS_SECRET_ACCESS_KEY,
41+
region_name=conf.AWS_REGION,
42+
)
43+
paginator = client.get_paginator("list_object_versions")
1844
object_delete_list = []
19-
print(f"Remove all objects prefixed with s3://{BUCKET_NAME}/{PREFIX}")
20-
for object_response_itr in paginator.paginate(Bucket=BUCKET_NAME, Prefix=PREFIX):
21-
if 'DeleteMarkers' in object_response_itr:
22-
for delete_marker in object_response_itr['DeleteMarkers']:
45+
print(
46+
f"Remove all objects prefixed with s3://{conf.AWS_S3_BUCKET_NAME}/{prefix}"
47+
)
48+
for object_response_itr in paginator.paginate(
49+
Bucket=conf.AWS_S3_BUCKET_NAME, Prefix=prefix
50+
):
51+
if "DeleteMarkers" in object_response_itr:
52+
for delete_marker in object_response_itr["DeleteMarkers"]:
2353
object_delete_list.append(
24-
{'Key': delete_marker['Key'], 'VersionId': delete_marker['VersionId']})
54+
{
55+
"Key": delete_marker["Key"],
56+
"VersionId": delete_marker["VersionId"],
57+
}
58+
)
2559

26-
if 'Versions' in object_response_itr:
27-
for version in object_response_itr['Versions']:
60+
if "Versions" in object_response_itr:
61+
for version in object_response_itr["Versions"]:
2862
object_delete_list.append(
29-
{'Key': version['Key'], 'VersionId': version['VersionId']})
63+
{"Key": version["Key"], "VersionId": version["VersionId"]}
64+
)
3065

3166
for i in range(0, len(object_delete_list), 1000):
32-
response = client.delete_objects(
33-
Bucket=BUCKET_NAME,
34-
Delete={
35-
'Objects': object_delete_list[i:i+1000],
36-
'Quiet': True
37-
},
38-
)
39-
print(response)
67+
if not dry_run:
68+
response = client.delete_objects(
69+
Bucket=conf.AWS_S3_BUCKET_NAME,
70+
Delete={"Objects": object_delete_list[i : i + 1000], "Quiet": True},
71+
)
72+
print(response)
73+
else:
74+
print(f"Dry run: {len(object_delete_list)} objects to delete.")
75+
print(f"First object: {object_delete_list[0]}")
76+
print(f"Last object: {object_delete_list[-1]}")
4077

4178

42-
parser = argparse.ArgumentParser(description='...')
43-
parser.add_argument('--key', help='Your S3 Access Key',
44-
type=str, required=True)
45-
parser.add_argument('--secret', help='Your S3 Access Secret',
46-
type=str, required=True)
47-
parser.add_argument('--bucket', help='Your S3 Bucket', type=str, required=True)
48-
parser.add_argument(
49-
'--folder', help='Your folder within your S3 Bucket', type=str, required=True)
79+
def load_config(config_path: Path) -> AWSConfig:
80+
config_data = yaml.safe_load(Path(config_path).read_text())
81+
try:
82+
return AWSConfig.from_dict(config_data["secrets"]["aws"])
83+
except KeyError:
84+
raise ValueError("AWS credentials are missing in the config file.")
85+
86+
87+
def load_env_config() -> AWSConfig:
88+
try:
89+
return AWSConfig.from_dict(dict(os.environ))
90+
except KeyError:
91+
raise ValueError("AWS credentials are missing from environment variables.")
92+
93+
94+
def main(
95+
folder: ty.Annotated[str, typer.Option(help="Your folder within your S3 Bucket")],
96+
config: ty.Annotated[
97+
Path | None, typer.Option(help="Path to OpenNeuro secrets file")
98+
] = None,
99+
env: ty.Annotated[
100+
bool, typer.Option(help="Read AWS secrets from environment")
101+
] = False,
102+
key: ty.Annotated[str | None, typer.Option(help="Your S3 Access Key")] = None,
103+
secret: ty.Annotated[str | None, typer.Option(help="Your S3 Access Secret")] = None,
104+
bucket: ty.Annotated[str, typer.Option(help="Your S3 Bucket")] = "openneuro.org",
105+
dry_run: ty.Annotated[bool, typer.Option()] = False,
106+
log_level: ty.Annotated[
107+
ty.Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], typer.Option()
108+
] = "INFO",
109+
) -> None:
110+
boto3.set_stream_logger("boto3.resources", log_level)
111+
112+
if config:
113+
conf = load_config(config)
114+
elif env:
115+
conf = load_env_config()
116+
else:
117+
if not all([key, secret, bucket]):
118+
raise ValueError("AWS credentials and bucket information must be provided.")
119+
conf = AWSConfig.from_dict(
120+
{
121+
"AWS_ACCESS_KEY_ID": key,
122+
"AWS_SECRET_ACCESS_KEY": secret,
123+
"AWS_S3_BUCKET_NAME": bucket,
124+
}
125+
)
50126

51-
args = vars(parser.parse_args())
127+
nuke_prefix(prefix=folder, conf=conf, dry_run=dry_run)
52128

53-
AWS_ACCESS_KEY = args['key']
54-
AWS_SECRET_KEY = args['secret']
55-
BUCKET_NAME = args['bucket']
56-
PREFIX = args['folder']
57129

58-
nuke_prefix()
130+
if __name__ == "__main__":
131+
typer.run(main)

0 commit comments

Comments
 (0)