|
1 | 1 | # /// script |
2 | 2 | # requires-python = ">=3.13" |
3 | 3 | # dependencies = [ |
4 | | -# "boto3", |
| 4 | +# "boto3>=1.42.45", |
| 5 | +# "pyyaml>=6.0.3", |
| 6 | +# "typer>=0.21.1", |
5 | 7 | # ] |
6 | 8 | # /// |
7 | | -import argparse |
8 | | -import logging |
| 9 | +import os |
| 10 | +import typing as ty |
| 11 | +from dataclasses import dataclass |
| 12 | +from pathlib import Path |
9 | 13 |
|
10 | 14 | import boto3 |
11 | | -boto3.set_stream_logger('boto3.resources', logging.INFO) |
| 15 | +import typer |
| 16 | +import yaml |
12 | 17 |
|
13 | | -def nuke_prefix(): |
14 | | - client = boto3.client('s3', |
15 | | - aws_access_key_id=AWS_ACCESS_KEY, |
16 | | - aws_secret_access_key=AWS_SECRET_KEY) |
17 | | - paginator = client.get_paginator('list_object_versions') |
| 18 | + |
| 19 | +@dataclass |
| 20 | +class AWSConfig: |
| 21 | + AWS_ACCESS_KEY_ID: str |
| 22 | + AWS_SECRET_ACCESS_KEY: str |
| 23 | + AWS_S3_BUCKET_NAME: str |
| 24 | + AWS_REGION: str |
| 25 | + |
| 26 | + @classmethod |
| 27 | + def from_dict(cls, data: dict) -> ty.Self: |
| 28 | + return cls( |
| 29 | + AWS_ACCESS_KEY_ID=data["AWS_ACCESS_KEY_ID"], |
| 30 | + AWS_SECRET_ACCESS_KEY=data["AWS_SECRET_ACCESS_KEY"], |
| 31 | + AWS_S3_BUCKET_NAME=data.get("AWS_S3_PUBLIC_BUCKET", "openneuro.org"), |
| 32 | + AWS_REGION=data.get("AWS_REGION", "us-east-1"), |
| 33 | + ) |
| 34 | + |
| 35 | + |
| 36 | +def nuke_prefix(prefix: str, conf: AWSConfig, dry_run: bool = False) -> None: |
| 37 | + client = boto3.client( |
| 38 | + "s3", |
| 39 | + aws_access_key_id=conf.AWS_ACCESS_KEY_ID, |
| 40 | + aws_secret_access_key=conf.AWS_SECRET_ACCESS_KEY, |
| 41 | + region_name=conf.AWS_REGION, |
| 42 | + ) |
| 43 | + paginator = client.get_paginator("list_object_versions") |
18 | 44 | object_delete_list = [] |
19 | | - print(f"Remove all objects prefixed with s3://{BUCKET_NAME}/{PREFIX}") |
20 | | - for object_response_itr in paginator.paginate(Bucket=BUCKET_NAME, Prefix=PREFIX): |
21 | | - if 'DeleteMarkers' in object_response_itr: |
22 | | - for delete_marker in object_response_itr['DeleteMarkers']: |
| 45 | + print( |
| 46 | + f"Remove all objects prefixed with s3://{conf.AWS_S3_BUCKET_NAME}/{prefix}" |
| 47 | + ) |
| 48 | + for object_response_itr in paginator.paginate( |
| 49 | + Bucket=conf.AWS_S3_BUCKET_NAME, Prefix=prefix |
| 50 | + ): |
| 51 | + if "DeleteMarkers" in object_response_itr: |
| 52 | + for delete_marker in object_response_itr["DeleteMarkers"]: |
23 | 53 | object_delete_list.append( |
24 | | - {'Key': delete_marker['Key'], 'VersionId': delete_marker['VersionId']}) |
| 54 | + { |
| 55 | + "Key": delete_marker["Key"], |
| 56 | + "VersionId": delete_marker["VersionId"], |
| 57 | + } |
| 58 | + ) |
25 | 59 |
|
26 | | - if 'Versions' in object_response_itr: |
27 | | - for version in object_response_itr['Versions']: |
| 60 | + if "Versions" in object_response_itr: |
| 61 | + for version in object_response_itr["Versions"]: |
28 | 62 | object_delete_list.append( |
29 | | - {'Key': version['Key'], 'VersionId': version['VersionId']}) |
| 63 | + {"Key": version["Key"], "VersionId": version["VersionId"]} |
| 64 | + ) |
30 | 65 |
|
31 | 66 | for i in range(0, len(object_delete_list), 1000): |
32 | | - response = client.delete_objects( |
33 | | - Bucket=BUCKET_NAME, |
34 | | - Delete={ |
35 | | - 'Objects': object_delete_list[i:i+1000], |
36 | | - 'Quiet': True |
37 | | - }, |
38 | | - ) |
39 | | - print(response) |
| 67 | + if not dry_run: |
| 68 | + response = client.delete_objects( |
| 69 | + Bucket=conf.AWS_S3_BUCKET_NAME, |
| 70 | + Delete={"Objects": object_delete_list[i : i + 1000], "Quiet": True}, |
| 71 | + ) |
| 72 | + print(response) |
| 73 | + else: |
| 74 | + print(f"Dry run: {len(object_delete_list)} objects to delete.") |
| 75 | + print(f"First object: {object_delete_list[0]}") |
| 76 | + print(f"Last object: {object_delete_list[-1]}") |
40 | 77 |
|
41 | 78 |
|
42 | | -parser = argparse.ArgumentParser(description='...') |
43 | | -parser.add_argument('--key', help='Your S3 Access Key', |
44 | | - type=str, required=True) |
45 | | -parser.add_argument('--secret', help='Your S3 Access Secret', |
46 | | - type=str, required=True) |
47 | | -parser.add_argument('--bucket', help='Your S3 Bucket', type=str, required=True) |
48 | | -parser.add_argument( |
49 | | - '--folder', help='Your folder within your S3 Bucket', type=str, required=True) |
| 79 | +def load_config(config_path: Path) -> AWSConfig: |
| 80 | + config_data = yaml.safe_load(Path(config_path).read_text()) |
| 81 | + try: |
| 82 | + return AWSConfig.from_dict(config_data["secrets"]["aws"]) |
| 83 | + except KeyError: |
| 84 | + raise ValueError("AWS credentials are missing in the config file.") |
| 85 | + |
| 86 | + |
| 87 | +def load_env_config() -> AWSConfig: |
| 88 | + try: |
| 89 | + return AWSConfig.from_dict(dict(os.environ)) |
| 90 | + except KeyError: |
| 91 | + raise ValueError("AWS credentials are missing from environment variables.") |
| 92 | + |
| 93 | + |
| 94 | +def main( |
| 95 | + folder: ty.Annotated[str, typer.Option(help="Your folder within your S3 Bucket")], |
| 96 | + config: ty.Annotated[ |
| 97 | + Path | None, typer.Option(help="Path to OpenNeuro secrets file") |
| 98 | + ] = None, |
| 99 | + env: ty.Annotated[ |
| 100 | + bool, typer.Option(help="Read AWS secrets from environment") |
| 101 | + ] = False, |
| 102 | + key: ty.Annotated[str | None, typer.Option(help="Your S3 Access Key")] = None, |
| 103 | + secret: ty.Annotated[str | None, typer.Option(help="Your S3 Access Secret")] = None, |
| 104 | + bucket: ty.Annotated[str, typer.Option(help="Your S3 Bucket")] = "openneuro.org", |
| 105 | + dry_run: ty.Annotated[bool, typer.Option()] = False, |
| 106 | + log_level: ty.Annotated[ |
| 107 | + ty.Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], typer.Option() |
| 108 | + ] = "INFO", |
| 109 | +) -> None: |
| 110 | + boto3.set_stream_logger("boto3.resources", log_level) |
| 111 | + |
| 112 | + if config: |
| 113 | + conf = load_config(config) |
| 114 | + elif env: |
| 115 | + conf = load_env_config() |
| 116 | + else: |
| 117 | + if not all([key, secret, bucket]): |
| 118 | + raise ValueError("AWS credentials and bucket information must be provided.") |
| 119 | + conf = AWSConfig.from_dict( |
| 120 | + { |
| 121 | + "AWS_ACCESS_KEY_ID": key, |
| 122 | + "AWS_SECRET_ACCESS_KEY": secret, |
| 123 | + "AWS_S3_BUCKET_NAME": bucket, |
| 124 | + } |
| 125 | + ) |
50 | 126 |
|
51 | | -args = vars(parser.parse_args()) |
| 127 | + nuke_prefix(prefix=folder, conf=conf, dry_run=dry_run) |
52 | 128 |
|
53 | | -AWS_ACCESS_KEY = args['key'] |
54 | | -AWS_SECRET_KEY = args['secret'] |
55 | | -BUCKET_NAME = args['bucket'] |
56 | | -PREFIX = args['folder'] |
57 | 129 |
|
58 | | -nuke_prefix() |
| 130 | +if __name__ == "__main__": |
| 131 | + typer.run(main) |
0 commit comments