Skip to content

Commit 938e8f4

Browse files
first iteration of script
1 parent 3893cde commit 938e8f4

7 files changed

Lines changed: 291 additions & 0 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
logs/
2+
**/.log
3+
**/__pycache__

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,7 @@
11
# s3-batch-delete-objects
2+
Deletes S3 objects in batches specified by the user. Uses a list of objects, also to be specified by the user.
3+
4+
# Usage
5+
```
6+
python3 delete_s3_objects.py [-h] [-b BATCH_SIZE] [-l LOG_FILE] bucket filename
7+
```

aws_s3/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This was borrowed from https://docs.aws.amazon.com/AmazonS3/latest/userguide/example_s3_DeleteObjects_section.html

aws_s3/__init__.py

Whitespace-only changes.

aws_s3/delete.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import logging
2+
3+
import boto3
4+
from botocore.exceptions import ClientError
5+
6+
logger = logging.getLogger("file")
7+
8+
9+
class DeleteS3Objects:
10+
"""Manages S3 objects."""
11+
12+
def __init__(self, bucket: str):
13+
self.bucket = bucket
14+
15+
s3_resource = boto3.resource("s3")
16+
self.client = s3_resource.Bucket(self.bucket)
17+
18+
def delete_objects(self, object_keys: list):
19+
"""
20+
Removes a list of objects from a bucket.
21+
This operation is done as a batch in a single request.
22+
23+
:param bucket: The bucket that contains the objects. This is a Boto3 Bucket
24+
resource.
25+
:param object_keys: The list of keys that identify the objects to remove.
26+
:return: The response that contains data about which objects were deleted
27+
and any that could not be deleted.
28+
"""
29+
try:
30+
response = self.client.delete_objects(
31+
Delete={"Objects": [{"Key": key} for key in object_keys]}
32+
)
33+
if "Deleted" in response:
34+
logger.info(
35+
"Deleted objects '%s' from bucket '%s'.",
36+
[del_obj["Key"] for del_obj in response["Deleted"]],
37+
self.client.name,
38+
)
39+
if "Errors" in response:
40+
logger.warning(
41+
"Could not delete objects '%s' from bucket '%s'.",
42+
[
43+
f"{del_obj['Key']}: {del_obj['Code']}"
44+
for del_obj in response["Errors"]
45+
],
46+
self.client.name,
47+
)
48+
except ClientError:
49+
logger.exception(
50+
"Couldn't delete any objects from bucket %s.", self.client.name
51+
)
52+
raise
53+
else:
54+
return response

delete_s3_objects.py

Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import logging
5+
import os
6+
import sys
7+
8+
from datetime import datetime
9+
from itertools import islice
10+
11+
import aws_s3.delete
12+
13+
logger = logging.getLogger("file")
14+
slogger = logging.getLogger("stdout")
15+
16+
17+
class DeleteObjects:
18+
"""
19+
Deletes S3 objects from a bucket. S3 object list is read from a provided flat file.
20+
"""
21+
22+
def __init__(self, **kwargs):
23+
# We may not need to specify an action. Commenting it out for now.
24+
# self.action = None
25+
self.bucket = None
26+
self.batch_size = None
27+
self.data_file = None
28+
29+
if "log_file" in kwargs and kwargs["log_file"] != "":
30+
self.log_file = kwargs["log_file"]
31+
else:
32+
now = datetime.now().strftime("%Y%d%m%H%M%S")
33+
self.log_file = f"logs/results-{now}.log"
34+
35+
self._init_logging()
36+
37+
def _init_logging(self):
38+
# File logger
39+
logger.setLevel(logging.INFO)
40+
41+
log_dir = os.path.dirname(self.log_file)
42+
if not os.path.exists(log_dir):
43+
os.mkdir(log_dir, mode=755)
44+
45+
file_handler = logging.FileHandler(self.log_file)
46+
file_handler.setLevel(logging.INFO)
47+
48+
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
49+
file_handler.setFormatter(formatter)
50+
51+
logger.addHandler(file_handler)
52+
aws_s3.delete.logger.addHandler(file_handler)
53+
54+
# STDOUT logger
55+
slogger.setLevel(logging.INFO)
56+
57+
stdout_handler = logging.StreamHandler(sys.stdout)
58+
stdout_handler.setLevel(logging.INFO)
59+
60+
stdout_handler.setFormatter(formatter)
61+
62+
slogger.addHandler(stdout_handler)
63+
64+
# We may not need to specify an action. Commenting it out for now.
65+
# def caller(
66+
# self,
67+
# action: str = "",
68+
# bucket: str = "",
69+
# data_file: str = "",
70+
# batch_size: int = 1000,
71+
# ):
72+
def caller(
73+
self,
74+
bucket: str = "",
75+
data_file: str = "",
76+
batch_size: int = 1000,
77+
):
78+
"""
79+
Caller function. Parses arguments, creates iteration based on batch size, and calls
80+
downstream function.
81+
"""
82+
83+
if self.data_file is None:
84+
self.data_file = data_file
85+
if self.data_file == "":
86+
raise ValueError("'data_file' argument must not be empty")
87+
88+
if self.bucket is None:
89+
self.bucket = bucket
90+
if self.bucket == "":
91+
raise ValueError("'bucket' argument must not be empty")
92+
93+
# This may not be needed. Commenting it now for now.
94+
# if self.action is None:
95+
# if action == "delete":
96+
# self.action = "delete"
97+
# elif action == "delete_all_versions":
98+
# self.action = "delete_all_versions"
99+
# else:
100+
# raise ValueError(
101+
# "'action' argument must be one of the following: delete, delete_all_versions"
102+
# )
103+
104+
if self.batch_size is None:
105+
self.batch_size = batch_size
106+
107+
try:
108+
with open(data_file, "r+", encoding="utf8") as f:
109+
slogger.info("Logging to %s", self.log_file)
110+
logger.info("Bucket: %s", self.bucket)
111+
logger.info("Source file: %s", data_file)
112+
logger.info("Batch size: %s", batch_size)
113+
logger.info("Starting deletion...")
114+
115+
while True:
116+
next_lines = [
117+
s.replace("\n", "") for s in list(islice(f, batch_size))
118+
]
119+
if not next_lines:
120+
break
121+
122+
# This may not be needed. Commenting it out for now.
123+
# if self.action == "delete":
124+
# self._delete(next_lines)
125+
# elif self.action == "delete_all_versions":
126+
# self._delete_all_versions(next_lines)
127+
self._delete(next_lines)
128+
129+
logger.info("Deletion complete.")
130+
slogger.info("Deletion complete.")
131+
132+
except FileNotFoundError as error:
133+
logger.error("File %s was not found!", data_file)
134+
raise FileNotFoundError(f"File {data_file} was not found!") from error
135+
136+
def delete(self, bucket: str = "", data_file: str = "", batch_size: int = 1000):
137+
"""
138+
Delete all provided objects in a bucket using batching.
139+
"""
140+
141+
# We may not need to specify an action. Commenting it out for now.
142+
# self.action = "delete"
143+
self.batch_size = batch_size
144+
self.bucket = bucket
145+
self.data_file = data_file
146+
self.caller()
147+
148+
# This may not be needed. Commenting it out for now.
149+
# def delete_all_versions(
150+
# self, bucket: str = "", data_file: str = "", batch_size: str = 1000
151+
# ):
152+
# """
153+
# Delete all versions of all provided object in a bucket. Executes sequentially.
154+
# """
155+
# self.action = "delete_all_versions"
156+
# self.batch_size = batch_size
157+
# self.bucket = bucket
158+
# self.data_file = data_file
159+
# self.caller()
160+
161+
def _delete(self, object_keys: list):
162+
client = aws_s3.delete.DeleteS3Objects(self.bucket)
163+
client.delete_objects(object_keys)
164+
165+
# This may not be needed. Commenting it out for now.
166+
# def _delete_all_versions(self, object_keys: list):
167+
# pass
168+
169+
170+
if __name__ == "__main__":
171+
parser = argparse.ArgumentParser(
172+
prog="delete_s3_objects.py",
173+
description="Deletes objects from S3 in bulk using a list of objects in a flat file",
174+
)
175+
176+
# We may not need to specify an action. Commenting it out for now.
177+
# parser.add_argument(
178+
# "action",
179+
# choices=["delete", "delete_all_versions"],
180+
# help="Delete objects in bulk or all versions of objects sequentially",
181+
# type=str,
182+
# )
183+
184+
parser.add_argument("bucket", help="Name of S3 bucket", type=str)
185+
186+
parser.add_argument(
187+
"filename",
188+
help="Filename of flat file containing S3 objects to delete",
189+
type=str,
190+
)
191+
192+
def check_batch_size(batch_size: int):
193+
"""
194+
Verify that batch size does not exceed 1000.
195+
"""
196+
197+
batch_size = int(batch_size)
198+
if batch_size > 1000:
199+
raise argparse.ArgumentTypeError("Batch size cannot exceed 1000")
200+
201+
return batch_size
202+
203+
parser.add_argument(
204+
"-b",
205+
"--batch-size",
206+
dest="batch_size",
207+
help="Number of objects to delete in each batch (default: 1000, max: 1000)",
208+
default=1000,
209+
required=False,
210+
type=check_batch_size,
211+
)
212+
213+
parser.add_argument(
214+
"-l",
215+
"--log-file",
216+
dest="log_file",
217+
help="File to use for logging. (default: results-{datetime}.log)",
218+
default="",
219+
)
220+
221+
args = parser.parse_args()
222+
223+
delete_objects = DeleteObjects(log_file=args.log_file)
224+
# We may not need to specify an action. Commenting it out for now.
225+
# delete_objects.caller(args.action, args.bucket, args.filename, args.batch_size)
226+
delete_objects.caller(args.bucket, args.filename, args.batch_size)

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
boto3

0 commit comments

Comments
 (0)