|
| 1 | +"""Circuit Maintenance Parser for FLAG Notifications. |
| 2 | +
|
| 3 | +Note: this is a fork of Globalcloudexchange parser. |
| 4 | +""" |
| 5 | + |
| 6 | +import re |
| 7 | +from datetime import datetime |
| 8 | +from typing import Any, Dict, List |
| 9 | + |
| 10 | +from bs4 import BeautifulSoup |
| 11 | +from bs4.element import ResultSet # type: ignore |
| 12 | + |
| 13 | +from circuit_maintenance_parser.output import Impact |
| 14 | +from circuit_maintenance_parser.parser import EmailSubjectParser, Html, Status |
| 15 | + |
| 16 | + |
| 17 | +class HtmlParserFlag1(Html): |
| 18 | + """Custom Parser for HTML portion of FLAG circuit maintenance notifications.""" |
| 19 | + |
| 20 | + def parse_html(self, soup: BeautifulSoup) -> List[Dict]: |
| 21 | + """Parse an FLAG circuit maintenance email. |
| 22 | +
|
| 23 | + Args: |
| 24 | + soup (BeautifulSoup): beautiful soup object containing the html portion of an email. |
| 25 | +
|
| 26 | + Returns: |
| 27 | + Dict: The data dict containing circuit maintenance data. |
| 28 | + """ |
| 29 | + data: Dict[str, Any] = {"circuits": []} |
| 30 | + self.parse_tables(soup.find_all("table", attrs={"border-collapse": "collapse"}), data) |
| 31 | + self.parse_paragraphs(soup.find_all("p"), data) |
| 32 | + |
| 33 | + return [data] |
| 34 | + |
| 35 | + def parse_tables(self, tables: ResultSet, data: Dict): |
| 36 | + """Parse table elements to find maintenance windows (start/end) and circuit ID's.""" |
| 37 | + date_format = "%d-%b-%Y %H:%M" |
| 38 | + for table in tables: |
| 39 | + table_type = "" |
| 40 | + for row in table.find_all("tr"): |
| 41 | + cols = row.find_all("td") |
| 42 | + if cols[0].text.strip() == "Service ID": |
| 43 | + table_type = "circuits" |
| 44 | + continue |
| 45 | + if cols[0].text.strip() == "Window": |
| 46 | + table_type = "windows" |
| 47 | + continue |
| 48 | + |
| 49 | + # this table is listing all circuits |
| 50 | + if table_type == "circuits": |
| 51 | + impact = Impact.OUTAGE |
| 52 | + if "at risk" in cols[1].text.lower(): |
| 53 | + impact = Impact.REDUCED_REDUNDANCY |
| 54 | + |
| 55 | + data["circuits"].append({"circuit_id": cols[0].text.strip(), "impact": impact}) |
| 56 | + # this table is listing windows (note: for now, we will only use the last listed window) |
| 57 | + elif table_type == "windows": |
| 58 | + data["start"] = self.dt2ts(datetime.strptime(cols[1].text.strip(), date_format)) |
| 59 | + data["end"] = self.dt2ts(datetime.strptime(cols[2].text.strip(), date_format)) |
| 60 | + |
| 61 | + def parse_paragraphs(self, paragraphs: ResultSet, data: Dict): |
| 62 | + """Parse paragraph elements to find account and summary.""" |
| 63 | + for p in paragraphs: |
| 64 | + for pstring in p.strings: |
| 65 | + search = re.search("Dear (.*),", pstring) |
| 66 | + if search: |
| 67 | + data["account"] = search.group(1).strip() |
| 68 | + continue |
| 69 | + |
| 70 | + # after account has been set, next paragraph is the summary |
| 71 | + if "account" in data and "summary" not in data: |
| 72 | + data["summary"] = pstring.strip() |
| 73 | + continue |
| 74 | + |
| 75 | + |
| 76 | +class SubjectParserFlag1(EmailSubjectParser): |
| 77 | + """Parse the subject of a FLAG circuit maintenance email. The subject contains the maintenance ID and status.""" |
| 78 | + |
| 79 | + def parse_subject(self, subject: str) -> List[Dict]: |
| 80 | + """Parse the FLAG Email subject for maintenance ID and status. |
| 81 | +
|
| 82 | + Args: |
| 83 | + subject (str): subject of email |
| 84 | + e.g. 'FLAG | PE2025102750538 | Planned Event | Rescheduled'. |
| 85 | +
|
| 86 | +
|
| 87 | + Returns: |
| 88 | + List[Dict]: Returns the data object with maintenance_id and status fields. |
| 89 | + """ |
| 90 | + data = {} |
| 91 | + search = re.search( |
| 92 | + r"^FLAG \| ([A-Z0-9]+)\b", |
| 93 | + subject, |
| 94 | + ) |
| 95 | + if search: |
| 96 | + data["maintenance_id"] = search.group(1) |
| 97 | + |
| 98 | + if "completed" in subject.lower(): |
| 99 | + data["status"] = Status.COMPLETED |
| 100 | + elif "rescheduled" in subject.lower(): |
| 101 | + data["status"] = Status.RE_SCHEDULED |
| 102 | + elif "scheduled" in subject.lower() or "reminder" in subject.lower() or "notice" in subject.lower(): |
| 103 | + data["status"] = Status.CONFIRMED |
| 104 | + elif "cancelled" in subject.lower(): |
| 105 | + data["status"] = Status.CANCELLED |
| 106 | + else: |
| 107 | + # Some FLAG notifications don't clearly state a status in their subject. |
| 108 | + # From inspection of examples, it looks like "Confirmed" would be the most appropriate in this case. |
| 109 | + data["status"] = Status.CONFIRMED |
| 110 | + |
| 111 | + return [data] |
0 commit comments