22
33import logging
44import re
5- from typing import Dict , List
5+ from typing import Any , Dict , List
66
7+ from bs4 import BeautifulSoup
78from bs4 .element import ResultSet # type: ignore
89from dateutil import parser
910
10- from circuit_maintenance_parser .parser import Html , Impact , Status
11+ from circuit_maintenance_parser .parser import EmailSubjectParser , Html , Impact , Status
1112
1213logger = logging .getLogger (__name__ )
1314
1415
1516class HtmlParserVodafone1 (Html ):
1617 """Notifications Parser for Vodafone notifications."""
1718
18- def parse_html (self , soup : ResultSet ) -> List [Dict ]:
19+ def parse_html (self , soup : BeautifulSoup ) -> List [Dict ]:
1920 """Execute parsing."""
20- data : Dict [str ] = {"circuits" : []}
21+ data : Dict [str , Any ] = {"circuits" : []}
2122 self .parse_crq (soup , data )
2223 self .parse_tables (soup .find_all ("table" ), data )
2324 self .parse_bold (soup .find_all ("b" ), data )
@@ -46,11 +47,11 @@ def parse_tables(self, tables: ResultSet, data: Dict):
4647 elif "Services Affected" in col_mapping and col == col_mapping ["Services Affected" ]:
4748 cid = td_elem .text .strip ()
4849 elif "Service Impact" in col_mapping and col == col_mapping ["Service Impact" ]:
49- # not sure if other impact types exist, can be expanded of need-be
50+ # not sure if other impact types exist, can be expanded of need-be. Default to DEGRADED.
5051 if "loss of service" in td_elem .text .lower ():
5152 impact = Impact ("OUTAGE" )
5253 else :
53- impact = Impact ("OUTAGE " )
54+ impact = Impact ("DEGRADED " )
5455 col += 1
5556
5657 # at the end of the table row, add circuits to list, if defined
@@ -66,19 +67,24 @@ def parse_bold(self, bolds: ResultSet, data: Dict):
6667 """
6768 window = 0
6869 for bold in bolds :
70+ text_lower = bold .text .lower ()
71+
6972 # find start/end date/time
70- if (
71- data ["status" ] == Status ("RE-SCHEDULED" ) and "new scheduled start" in bold .text .lower ()
72- ) or "scheduled start" in bold .text .lower ():
73+ # in case the window is re-schedulded, the original and new window are listed; ignore original window
74+ if "original scheduled start" in text_lower :
75+ continue
76+
77+ if "scheduled start" in text_lower :
7378 window_next = bold .next_sibling
7479 while window_next :
7580 text = window_next .text .strip ()
7681 if text != "" :
7782 window = text
7883 break
7984 window_next = window_next .next_sibling
85+
8086 # find summary
81- elif "description" in bold . text . lower () :
87+ if "description" in text_lower :
8288 description_next = bold .next_sibling
8389 while description_next :
8490 text = description_next .text .strip ()
@@ -95,22 +101,33 @@ def parse_bold(self, bolds: ResultSet, data: Dict):
95101 def parse_crq (self , soup : ResultSet , data : Dict ):
96102 """Vodafone maintenance_id's are in the format of CRQ[0-9] with 12 digits.
97103
98- Before mentioning the CRQ, the status of the maintenance can be derived, for example:
99-
100104 Please be advised that the Planned Works have been Completed: CRQ000001312927
101105 """
102106 text = soup .get_text (separator = " " )
103- match = re .search (r"\b(.*)[\s:]+(CRQ\ d{12}) \b" , text )
107+ match = re .search (r"\bCRQ\ d{12}\b" , text )
104108 if match :
105- data ["maintenance_id" ] = match .group (2 )
106-
107- # derive status
108- if "postponed" in match .group (1 ).lower ():
109- data ["status" ] = Status ("CANCELLED" )
110- elif "completed" in match .group (1 ).lower ():
111- data ["status" ] = Status ("COMPLETED" )
112- elif "rescheduled" in match .group (1 ).lower ():
113- data ["status" ] = Status ("RE-SCHEDULED" )
114- # default status
115- else :
116- data ["status" ] = Status ("CONFIRMED" )
109+ data .setdefault ("maintenance_id" , match .group (0 ))
110+
111+
112+ class SubjectParserVodafone1 (EmailSubjectParser ):
113+ """Parse status and (when present) the CRQ from the subject line."""
114+
115+ def parse_subject (self , subject : str ) -> List [Dict ]:
116+ """Parse the email subject."""
117+ data : Dict = {}
118+ subject_lower = subject .lower ()
119+
120+ if "completed" in subject_lower :
121+ data ["status" ] = Status ("COMPLETED" )
122+ elif "rescheduled" in subject_lower :
123+ data ["status" ] = Status ("RE-SCHEDULED" )
124+ elif "postponed" in subject_lower or "cancelled" in subject_lower :
125+ data ["status" ] = Status ("CANCELLED" )
126+ else :
127+ data ["status" ] = Status ("CONFIRMED" )
128+
129+ crq_match = re .search (r"\bCRQ\d{12}\b" , subject )
130+ if crq_match :
131+ data ["maintenance_id" ] = crq_match .group (0 )
132+
133+ return [data ]
0 commit comments