|
| 1 | +"""Cirion parser. |
| 2 | +
|
| 3 | +Based off Lumen parser as Cirion uses forked Lumen code-base. |
| 4 | +""" |
| 5 | + |
| 6 | +import logging |
| 7 | +import re |
| 8 | +from copy import deepcopy |
| 9 | +from typing import Dict |
| 10 | + |
| 11 | +import bs4 # type: ignore |
| 12 | +from bs4.element import ResultSet # type: ignore |
| 13 | +from dateutil import parser |
| 14 | + |
| 15 | +from circuit_maintenance_parser.parser import CircuitImpact, Html, Impact, Status |
| 16 | + |
| 17 | +# pylint: disable=too-many-nested-blocks, too-many-branches |
| 18 | + |
| 19 | + |
| 20 | +logger = logging.getLogger(__name__) |
| 21 | + |
| 22 | + |
| 23 | +class HtmlParserCirion1(Html): |
| 24 | + """Notifications Parser for Cirion notifications.""" |
| 25 | + |
| 26 | + def parse_html(self, soup): |
| 27 | + """Execute parsing.""" |
| 28 | + maintenances = [] |
| 29 | + data = {} |
| 30 | + self.parse_spans(soup.find_all("span"), data) |
| 31 | + self.parse_tables(soup.find_all("table"), data) |
| 32 | + |
| 33 | + # Iterates over multiple windows and duplicates other maintenance info to a new dictionary while also updating start and end times for the specific window. |
| 34 | + for window in data["windows"]: |
| 35 | + maintenance = deepcopy(data) |
| 36 | + maintenance["start"], maintenance["end"] = window |
| 37 | + del maintenance["windows"] |
| 38 | + maintenances.append(maintenance) |
| 39 | + |
| 40 | + # Deleting the key after we are finished checking for multiple windows and duplicating data. |
| 41 | + del data["windows"] |
| 42 | + |
| 43 | + return maintenances |
| 44 | + |
| 45 | + def parse_spans(self, spans: ResultSet, data: Dict): |
| 46 | + """Parse Span tag. |
| 47 | +
|
| 48 | + Note: Cirion maintenance email doesn't include an "easy" 1-line summary text, skipped for now. |
| 49 | + """ |
| 50 | + for line in spans: |
| 51 | + if isinstance(line, bs4.element.Tag): |
| 52 | + line_text = line.text.lower().strip() |
| 53 | + |
| 54 | + # Find maintenance_id based on CHG[0-9] (7 digits) pattern |
| 55 | + if re.findall(r"CHG\d{7}", line.text.strip()): |
| 56 | + data["maintenance_id"] = line.text.strip() |
| 57 | + # Maintenance status below |
| 58 | + elif "this maintenance is scheduled" in line_text: |
| 59 | + data["status"] = Status("CONFIRMED") |
| 60 | + elif "this maintenance is implement" in line_text: |
| 61 | + data["status"] = Status("IN-PROCESS") |
| 62 | + elif "this maintenance is closed" in line_text: |
| 63 | + data["status"] = Status("COMPLETED") |
| 64 | + |
| 65 | + def parse_tables(self, tables: ResultSet, data: Dict): # pylint: disable=too-many-locals |
| 66 | + """Parse Table tag.""" |
| 67 | + # Initialise multiple windows list that will be used in parse_html |
| 68 | + data["windows"] = [] |
| 69 | + |
| 70 | + circuits = [] |
| 71 | + for table in tables: |
| 72 | + cells = table.find_all("td") |
| 73 | + if not cells: |
| 74 | + continue |
| 75 | + if cells[0].string == "Start" and cells[1].string == "End": |
| 76 | + num_columns = 2 |
| 77 | + for idx in range(num_columns, len(cells), num_columns): |
| 78 | + if "Greenwich Mean Time" in cells[idx].string and "Greenwich Mean Time" in cells[idx + 1].string: |
| 79 | + start = parser.parse(cells[idx].string.split("(Greenwich Mean Time)")[0]) |
| 80 | + start_ts = self.dt2ts(start) |
| 81 | + end = parser.parse(cells[idx + 1].string.split("(Greenwich Mean Time)")[0]) |
| 82 | + end_ts = self.dt2ts(end) |
| 83 | + data["windows"].append((start_ts, end_ts)) |
| 84 | + break |
| 85 | + |
| 86 | + elif cells[0].string == "Customer Name": |
| 87 | + num_columns = 1 |
| 88 | + if len(cells) % 8 == 0: |
| 89 | + num_columns = 8 |
| 90 | + else: |
| 91 | + logger.error("Unexpected table format: %s", cells) |
| 92 | + |
| 93 | + for idx in range(num_columns, len(cells), num_columns): |
| 94 | + # Account and Status are defined per Circuit ID but we understand that are consistent |
| 95 | + if "account" not in data: |
| 96 | + data["account"] = cells[idx].string |
| 97 | + |
| 98 | + data_circuit = {} |
| 99 | + |
| 100 | + # The table can include "Circuit ID" or "Alt Circuit ID" as columns +1 and +2. |
| 101 | + # Use the Alt Circuit ID if available (Cirion uses this primarily in their portal as the service ID), else the Circuit ID if available |
| 102 | + circuit_id = cells[idx + 2].string |
| 103 | + if circuit_id in ("_", "N/A"): |
| 104 | + circuit_id = cells[idx + 1].string |
| 105 | + if circuit_id not in ("_", "N/A"): |
| 106 | + data_circuit["circuit_id"] = circuit_id |
| 107 | + |
| 108 | + impact = cells[idx + 6].string |
| 109 | + if "outage" in impact.lower(): |
| 110 | + data_circuit["impact"] = Impact("OUTAGE") |
| 111 | + circuits.append(CircuitImpact(**data_circuit)) |
| 112 | + data["circuits"] = circuits |
0 commit comments