Skip to content

Commit c0aefd4

Browse files
authored
Merge pull request #375 from AndriusV4/feature/improve-google-parser
Improvements to Google Parser
2 parents d6411f2 + 8db91c3 commit c0aefd4

22 files changed

+830
-17
lines changed

changes/375.added

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added Google subject parser with support for multiple status types and notifications without end times

circuit_maintenance_parser/parsers/google.py

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import re
55
from datetime import datetime
66

7-
from circuit_maintenance_parser.parser import CircuitImpact, Html, Impact, Status
7+
from circuit_maintenance_parser.parser import CircuitImpact, EmailSubjectParser, Html, Impact, Status
88

99
# pylint: disable=too-many-nested-blocks, too-many-branches
1010

@@ -18,7 +18,7 @@ def parse_html(self, soup):
1818
"""Execute parsing."""
1919
data = {}
2020
data["circuits"] = []
21-
data["status"] = Status.CONFIRMED
21+
end_time_explicit = False
2222

2323
for span in soup.find_all("span"):
2424
if span.string is None:
@@ -29,6 +29,7 @@ def parse_html(self, soup):
2929
elif span.string.strip() == "End Time:":
3030
dt_str = span.next_sibling.string.strip()
3131
data["end"] = self.dt2ts(datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S %z UTC"))
32+
end_time_explicit = True
3233
elif span.string.strip() == "Peer ASN:":
3334
data["account"] = span.parent.next_sibling.string.strip()
3435
elif span.string.strip() == "Google Neighbor Address(es):":
@@ -37,9 +38,42 @@ def parse_html(self, soup):
3738
cid = googleaddr + "-" + span.parent.next_sibling.string.strip()
3839
data["circuits"].append(CircuitImpact(circuit_id=cid, impact=Impact.OUTAGE))
3940

40-
summary = list(soup.find("div").find("div").strings)[-1].strip()
41-
match = re.search(r" - Reference (.*)$", summary)
42-
data["summary"] = summary
43-
data["maintenance_id"] = match[1]
41+
# Google sometimes send notifications without End Time specified
42+
if not end_time_explicit and data["start"]:
43+
# Since start and end times cannot be equal, manufacturing end date by adding 1hr to start date
44+
end_time_delta = 3600
45+
data["end"] = data["start"] + end_time_delta
46+
47+
return [data]
48+
49+
50+
class SubjectParserGoogle1(EmailSubjectParser):
51+
"""Subject Parser for Google notifications."""
52+
53+
def parse_subject(self, subject):
54+
"""Parse the subject line."""
55+
data = {}
56+
57+
# Example subject format - "[Scheduled] Google Planned Network Maintenance Notification - Reference PCR/123456"
58+
# Group 1: Status (e.g., Scheduled, Completed, Canceled)
59+
# Group 2: Maintenance ID (e.g., PCR/123456)
60+
match = re.search(r"(\[\S+\]).*Reference\s+(\S+)", subject, re.IGNORECASE | re.DOTALL)
61+
match_2 = re.search(r"\[\S+\]\s+(.*)", subject, re.IGNORECASE | re.DOTALL)
62+
63+
if match:
64+
status_str = match.group(1).upper()
65+
data["maintenance_id"] = match.group(2).strip()
66+
if "COMPLETED" in status_str:
67+
data["status"] = Status.COMPLETED
68+
# To handle both Cancelled and Canceled spelling options just in case
69+
elif "CANCEL" in status_str:
70+
data["status"] = Status.CANCELLED
71+
elif "SCHEDULED" in status_str:
72+
data["status"] = Status.CONFIRMED
73+
# If unable to match, we fallback to default confirmed
74+
else:
75+
data["status"] = Status.CONFIRMED
76+
if match_2:
77+
data["summary"] = match_2.group(1)
4478

4579
return [data]

circuit_maintenance_parser/provider.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from circuit_maintenance_parser.parsers.crowncastle import HtmlParserCrownCastle1
2525
from circuit_maintenance_parser.parsers.equinix import HtmlParserEquinix, SubjectParserEquinix
2626
from circuit_maintenance_parser.parsers.globalcloudxchange import HtmlParserGcx1, SubjectParserGcx1
27-
from circuit_maintenance_parser.parsers.google import HtmlParserGoogle1
27+
from circuit_maintenance_parser.parsers.google import HtmlParserGoogle1, SubjectParserGoogle1
2828
from circuit_maintenance_parser.parsers.gtt import HtmlParserGTT1
2929
from circuit_maintenance_parser.parsers.hgc import HtmlParserHGC1, HtmlParserHGC2, SubjectParserHGC1
3030
from circuit_maintenance_parser.parsers.lumen import HtmlParserLumen1
@@ -374,7 +374,7 @@ class Google(GenericProvider):
374374

375375
_processors: List[GenericProcessor] = PrivateAttr(
376376
[
377-
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserGoogle1]),
377+
CombinedProcessor(data_parsers=[EmailDateParser, HtmlParserGoogle1, SubjectParserGoogle1]),
378378
]
379379
)
380380
_default_organizer = PrivateAttr("noc-noreply@google.com")

tests/unit/data/google/google1_parser_result.json renamed to tests/unit/data/google/google1_html_parser_result.json

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@
1212
}
1313
],
1414
"end": 1701839100,
15-
"maintenance_id": "PCR/123456",
16-
"start": 1701809700,
17-
"status": "CONFIRMED",
18-
"summary": "Network Maintenance Notification - Reference PCR/123456"
15+
"start": 1701809700
1916
}
2017
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[Scheduled] Google Planned Network Maintenance Notification - Reference PCR/123456
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[
2+
{
3+
"maintenance_id": "PCR/123456",
4+
"status": "CONFIRMED",
5+
"summary": "Google Planned Network Maintenance Notification - Reference PCR/123456"
6+
}
7+
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[Completed] Google Planned Network Maintenance Notification - Reference PCR/123456
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[
2+
{
3+
"maintenance_id": "PCR/123456",
4+
"status": "COMPLETED",
5+
"summary": "Google Planned Network Maintenance Notification - Reference PCR/123456"
6+
}
7+
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[Canceled] Google Planned Network Maintenance Notification - Reference PCR/123456
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
[
2+
{
3+
"maintenance_id": "PCR/123456",
4+
"status": "CANCELLED",
5+
"summary": "Google Planned Network Maintenance Notification - Reference PCR/123456"
6+
}
7+
]

0 commit comments

Comments
 (0)