Skip to content

Commit a5c25d1

Browse files
Add email subject parser for status and maintenance_id, plus small other fixes that came up during PR review.
1 parent 0583d6c commit a5c25d1

9 files changed

Lines changed: 59 additions & 33 deletions

File tree

circuit_maintenance_parser/parsers/vodafone.py

Lines changed: 42 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,23 @@
22

33
import logging
44
import re
5-
from typing import Dict, List
5+
from typing import Any, Dict, List
66

7+
from bs4 import BeautifulSoup
78
from bs4.element import ResultSet # type: ignore
89
from dateutil import parser
910

10-
from circuit_maintenance_parser.parser import Html, Impact, Status
11+
from circuit_maintenance_parser.parser import EmailSubjectParser, Html, Impact, Status
1112

1213
logger = logging.getLogger(__name__)
1314

1415

1516
class HtmlParserVodafone1(Html):
1617
"""Notifications Parser for Vodafone notifications."""
1718

18-
def parse_html(self, soup: ResultSet) -> List[Dict]:
19+
def parse_html(self, soup: BeautifulSoup) -> List[Dict]:
1920
"""Execute parsing."""
20-
data: Dict[str] = {"circuits": []}
21+
data: Dict[str, Any] = {"circuits": []}
2122
self.parse_crq(soup, data)
2223
self.parse_tables(soup.find_all("table"), data)
2324
self.parse_bold(soup.find_all("b"), data)
@@ -46,11 +47,11 @@ def parse_tables(self, tables: ResultSet, data: Dict):
4647
elif "Services Affected" in col_mapping and col == col_mapping["Services Affected"]:
4748
cid = td_elem.text.strip()
4849
elif "Service Impact" in col_mapping and col == col_mapping["Service Impact"]:
49-
# not sure if other impact types exist, can be expanded of need-be
50+
# not sure if other impact types exist, can be expanded of need-be. Default to DEGRADED.
5051
if "loss of service" in td_elem.text.lower():
5152
impact = Impact("OUTAGE")
5253
else:
53-
impact = Impact("OUTAGE")
54+
impact = Impact("DEGRADED")
5455
col += 1
5556

5657
# at the end of the table row, add circuits to list, if defined
@@ -66,19 +67,24 @@ def parse_bold(self, bolds: ResultSet, data: Dict):
6667
"""
6768
window = 0
6869
for bold in bolds:
70+
text_lower = bold.text.lower()
71+
6972
# find start/end date/time
70-
if (
71-
data["status"] == Status("RE-SCHEDULED") and "new scheduled start" in bold.text.lower()
72-
) or "scheduled start" in bold.text.lower():
73+
# in case the window is re-schedulded, the original and new window are listed; ignore original window
74+
if "original scheduled start" in text_lower:
75+
continue
76+
77+
if "scheduled start" in text_lower:
7378
window_next = bold.next_sibling
7479
while window_next:
7580
text = window_next.text.strip()
7681
if text != "":
7782
window = text
7883
break
7984
window_next = window_next.next_sibling
85+
8086
# find summary
81-
elif "description" in bold.text.lower():
87+
if "description" in text_lower:
8288
description_next = bold.next_sibling
8389
while description_next:
8490
text = description_next.text.strip()
@@ -95,22 +101,33 @@ def parse_bold(self, bolds: ResultSet, data: Dict):
95101
def parse_crq(self, soup: ResultSet, data: Dict):
96102
"""Vodafone maintenance_id's are in the format of CRQ[0-9] with 12 digits.
97103
98-
Before mentioning the CRQ, the status of the maintenance can be derived, for example:
99-
100104
Please be advised that the Planned Works have been Completed: CRQ000001312927
101105
"""
102106
text = soup.get_text(separator=" ")
103-
match = re.search(r"\b(.*)[\s:]+(CRQ\d{12})\b", text)
107+
match = re.search(r"\bCRQ\d{12}\b", text)
104108
if match:
105-
data["maintenance_id"] = match.group(2)
106-
107-
# derive status
108-
if "postponed" in match.group(1).lower():
109-
data["status"] = Status("CANCELLED")
110-
elif "completed" in match.group(1).lower():
111-
data["status"] = Status("COMPLETED")
112-
elif "rescheduled" in match.group(1).lower():
113-
data["status"] = Status("RE-SCHEDULED")
114-
# default status
115-
else:
116-
data["status"] = Status("CONFIRMED")
109+
data.setdefault("maintenance_id", match.group(0))
110+
111+
112+
class SubjectParserVodafone1(EmailSubjectParser):
113+
"""Parse status and (when present) the CRQ from the subject line."""
114+
115+
def parse_subject(self, subject: str) -> List[Dict]:
116+
"""Parse the email subject."""
117+
data: Dict = {}
118+
subject_lower = subject.lower()
119+
120+
if "completed" in subject_lower:
121+
data["status"] = Status("COMPLETED")
122+
elif "rescheduled" in subject_lower:
123+
data["status"] = Status("RE-SCHEDULED")
124+
elif "postponed" in subject_lower or "cancelled" in subject_lower:
125+
data["status"] = Status("CANCELLED")
126+
else:
127+
data["status"] = Status("CONFIRMED")
128+
129+
crq_match = re.search(r"\bCRQ\d{12}\b", subject)
130+
if crq_match:
131+
data["maintenance_id"] = crq_match.group(0)
132+
133+
return [data]

circuit_maintenance_parser/provider.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from circuit_maintenance_parser.parsers.telstra import HtmlParserTelstra1, HtmlParserTelstra2
4545
from circuit_maintenance_parser.parsers.turkcell import HtmlParserTurkcell1
4646
from circuit_maintenance_parser.parsers.verizon import HtmlParserVerizon1
47-
from circuit_maintenance_parser.parsers.vodafone import HtmlParserVodafone1
47+
from circuit_maintenance_parser.parsers.vodafone import HtmlParserVodafone1, SubjectParserVodafone1
4848
from circuit_maintenance_parser.parsers.windstream import HtmlParserWindstream1
4949
from circuit_maintenance_parser.parsers.zayo import HtmlParserZayo1, SubjectParserZayo1
5050
from circuit_maintenance_parser.processor import CombinedProcessor, GenericProcessor, SimpleProcessor
@@ -571,7 +571,7 @@ class Vodafone(GenericProvider):
571571

572572
_processors: List[GenericProcessor] = PrivateAttr(
573573
[
574-
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserVodafone1]),
574+
CombinedProcessor(data_parsers=[EmailDateParser, SubjectParserVodafone1, HtmlParserVodafone1]),
575575
]
576576
)
577577
_default_organizer = PrivateAttr("networkchangemanagement@vodafone.com")

tests/unit/data/vodafone/vodafone1_result.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
"end": 1778040000,
1111
"maintenance_id": "CRQ000001325565",
1212
"start": 1778011200,
13-
"status": "CONFIRMED",
1413
"summary": "3rd Party Essential Works to carry out maintenance to move their cable to new reroute Due to road construction works"
1514
}
1615
]

tests/unit/data/vodafone/vodafone2.eml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ From: Network Change <networkchangemanagement@vodafone.com>
8383
Reply-To: Network Change <networkchangemanagement@vodafone.com>
8484
To: <maintenance@example.com>
8585
Message-ID: <20012384.98430.1774448475992@GBVLS-AS360>
86-
Subject: Rescheduled 3rd Party Planned Works CRQ000001319054 Affecting I3
87-
NET BV Services
86+
Subject: Rescheduled 3rd Party Planned Works CRQ000001319054 Affecting ACME
87+
CORP Services
8888
Content-Type: multipart/alternative;
8989
boundary="----=_Part_98429_663222437.1774448475992"
9090
Return-Path: networkchangemanagement@vodafone.com

tests/unit/data/vodafone/vodafone2_result.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
"end": 1776038400,
1111
"maintenance_id": "CRQ000001319054",
1212
"start": 1775433600,
13-
"status": "RE-SCHEDULED",
1413
"summary": "Rescheduled 3rd Party Planned Works to perform cable repair work in their partner's network"
1514
}
1615
]

tests/unit/data/vodafone/vodafone3_result.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
"end": 1770676200,
1111
"maintenance_id": "CRQ000001312927",
1212
"start": 1770652860,
13-
"status": "COMPLETED",
1413
"summary": "Please be informed, MW has been scheduled for cut over works on FOC 96C SW-PR WESTERN BLOC at KM15.7 from TM Node SW on BBG001 Link 2 between Pantai Remis - Lekir (PR-LKR) sector"
1514
}
1615
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Completed Vodafone E2E Service Change Management Planned Works CRQ000001312927 Affecting ACME CORP Services
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[
2+
{
3+
"maintenance_id": "CRQ000001312927",
4+
"status": "COMPLETED"
5+
}
6+
]

tests/unit/test_parsers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
from circuit_maintenance_parser.parsers.telstra import HtmlParserTelstra1, HtmlParserTelstra2
3838
from circuit_maintenance_parser.parsers.turkcell import HtmlParserTurkcell1
3939
from circuit_maintenance_parser.parsers.verizon import HtmlParserVerizon1
40-
from circuit_maintenance_parser.parsers.vodafone import HtmlParserVodafone1
40+
from circuit_maintenance_parser.parsers.vodafone import HtmlParserVodafone1, SubjectParserVodafone1
4141
from circuit_maintenance_parser.parsers.windstream import HtmlParserWindstream1
4242
from circuit_maintenance_parser.parsers.zayo import HtmlParserZayo1, SubjectParserZayo1
4343

@@ -768,6 +768,11 @@ def default(self, o):
768768
Path(dir_path, "data", "vodafone", "vodafone3.eml"),
769769
Path(dir_path, "data", "vodafone", "vodafone3_result.json"),
770770
),
771+
(
772+
SubjectParserVodafone1,
773+
Path(dir_path, "data", "vodafone", "vodafone4_subject.eml"),
774+
Path(dir_path, "data", "vodafone", "vodafone4_subject_result.json"),
775+
),
771776
# Windstream
772777
(
773778
HtmlParserWindstream1,

0 commit comments

Comments
 (0)