Skip to content

Commit 35b067c

Browse files
committed
Update AWS parser for HTML emails
1 parent 262f70f commit 35b067c

1 file changed

Lines changed: 53 additions & 20 deletions

File tree

  • circuit_maintenance_parser/parsers

circuit_maintenance_parser/parsers/aws.py

Lines changed: 53 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -62,29 +62,62 @@ def parse_text(self, text):
6262
"""
6363
data = {"circuits": []}
6464
impact = Impact.OUTAGE
65-
maintenace_id = ""
65+
maintenance_id = ""
6666
status = Status.CONFIRMED
67-
for line in text.splitlines():
68-
if "planned maintenance" in line.lower() or "maintenance has been scheduled" in line.lower():
69-
data["summary"] = line
70-
search = re.search(
71-
r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})",
72-
line,
73-
)
74-
if search:
75-
data["start"] = self.dt2ts(parser.parse(search.group(1)))
76-
data["end"] = self.dt2ts(parser.parse(search.group(2)))
77-
maintenace_id += str(data["start"])
78-
maintenace_id += str(data["end"])
79-
if "may become unavailable" in line.lower():
80-
impact = Impact.OUTAGE
81-
elif "has been cancelled" in line.lower():
82-
status = Status.CANCELLED
83-
elif re.match(r"[a-z]{5}-[a-z0-9]{8}", line):
84-
maintenace_id += line
67+
if re.search(r'<!doctype html>', text, re.IGNORECASE):
68+
soup = bs4.BeautifulSoup(text, 'html.parser')
69+
clean_string = soup.get_text()
70+
clean_string = re.sub('=20', '', clean_string)
71+
clean_list = clean_string.splitlines()
72+
cleaner_list = []
73+
for line in clean_list:
74+
newline = line.strip()
75+
if newline != "":
76+
cleaner_list.append(newline)
77+
sumstart = cleaner_list.index('Hello,')
78+
try:
79+
sumend = cleaner_list.index('[1] https://aws.amazon.com/support')
80+
except ValueError:
81+
sumend = len(cleaner_list)
82+
summary = ""
83+
for line in cleaner_list[sumstart:sumend]:
84+
summary+=f"{line}\n"
85+
if "may become unavailable" in summary.lower():
86+
impact = Impact.OUTAGE
87+
elif "has been cancelled" in summary.lower():
88+
status = Status.CANCELLED
89+
start_time = cleaner_list[cleaner_list.index('Start time')+1]
90+
end_time = cleaner_list[cleaner_list.index('End time')+1]
91+
data["start"] = self.dt2ts(parser.parse(start_time))
92+
data["end"] = self.dt2ts(parser.parse(end_time))
93+
data["summary"] = summary
94+
for line in summary.splitlines():
95+
line = line.strip()
96+
maintenance_id += line
8597
data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact))
98+
maintenance_id += str(data["start"])
99+
maintenance_id += str(data["end"])
100+
else:
101+
for line in text.splitlines():
102+
if "planned maintenance" in line.lower() or "maintenance has been scheduled" in line.lower():
103+
data["summary"] = line
104+
search = re.search(
105+
r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})",
106+
line,
107+
)
108+
if search:
109+
data["start"] = self.dt2ts(parser.parse(search.group(1)))
110+
data["end"] = self.dt2ts(parser.parse(search.group(2)))
111+
maintenance_id += str(data["start"])
112+
maintenance_id += str(data["end"])
113+
if "may become unavailable" in line.lower():
114+
impact = Impact.OUTAGE
115+
elif "has been cancelled" in line.lower():
116+
status = Status.CANCELLED
117+
maintenance_id += line
118+
data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact))
86119
# No maintenance ID found in emails, so a hash value is being generated using the start,
87120
# end and IDs of all circuits in the notification.
88-
data["maintenance_id"] = hashlib.sha256(maintenace_id.encode("utf-8")).hexdigest() # nosec
121+
data["maintenance_id"] = hashlib.sha256(maintenance_id.encode("utf-8")).hexdigest() # nosec
89122
data["status"] = status
90123
return [data]

0 commit comments

Comments
 (0)