Skip to content

Commit cb47678

Browse files
committed
Refactor provider to use OpenAI parser as a fallback and enhance tests for processor behavior
1 parent 759cad1 commit cb47678

2 files changed

Lines changed: 103 additions & 12 deletions

File tree

circuit_maintenance_parser/provider.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,6 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]:
130130
logger.debug("Skipping notification %s due filtering policy for %s.", data, self.__class__.__name__)
131131
return []
132132

133-
if os.getenv("PARSER_OPENAI_API_KEY"):
134-
self._processors.append(CombinedProcessor(data_parsers=[EmailDateParser, OpenAIParser]))
135-
136-
# Add subject to all html or text/* data_parts if not already present.
137-
self.add_subject_to_text(data)
138-
139133
for processor in self._processors:
140134
try:
141135
return processor.process(data, self.get_extended_data())
@@ -150,6 +144,22 @@ def get_maintenances(self, data: NotificationData) -> Iterable[Maintenance]:
150144
related_exceptions.append(exc)
151145
continue
152146

147+
# Use OpenAI parser as a last resort if all other processors failed.
148+
if os.getenv("PARSER_OPENAI_API_KEY"):
149+
self.add_subject_to_text(data)
150+
openai_processor = CombinedProcessor(data_parsers=[EmailDateParser, OpenAIParser])
151+
try:
152+
return openai_processor.process(data, self.get_extended_data())
153+
except ProcessorError as exc:
154+
process_error_message = (
155+
f"- Processor {openai_processor.__class__.__name__} from {provider_name} failed due to: %s\n"
156+
)
157+
logger.debug(process_error_message, traceback.format_exc())
158+
159+
related_exc = rgetattr(exc, "__cause__")
160+
error_message += process_error_message % related_exc
161+
related_exceptions.append(exc)
162+
153163
raise ProviderError(
154164
(f"Failed creating Maintenance notification for {provider_name}.\nDetails:\n{error_message}"),
155165
related_exceptions=related_exceptions,
@@ -165,7 +175,7 @@ def add_subject_to_text(self, data: NotificationData):
165175
if subject:
166176
new_data_parts = []
167177
for part in data.data_parts:
168-
if part.type.startswith("text/") or part.type.startswith("html"):
178+
if (part.type.startswith("text/") or part.type.startswith("html")) and part.type != "text/calendar":
169179
content_str = part.content.decode(errors="ignore")
170180
if subject not in content_str:
171181
# Append subject and update content

tests/unit/test_providers.py

Lines changed: 86 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -116,21 +116,78 @@ class ProviderWithIncludeFilter(GenericProvider):
116116
"provider_class",
117117
[GenericProvider, AquaComms],
118118
)
119-
def test_provider_gets_mlparser(provider_class):
120-
"""Test to check the any provider gets a default ML parser when ENV is activated."""
119+
def test_provider_falls_back_to_openai(provider_class):
120+
"""Test that OpenAI parser is used as last resort when all other processors fail."""
121121
os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key"
122122
data = NotificationData.init_from_raw("text/plain", b"fake data")
123123
data.add_data_part("text/html", b"other data")
124124

125125
provider = provider_class()
126+
original_processor_count = len(provider._processors) # pylint: disable=protected-access
127+
128+
with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor:
129+
# All native processors fail, then OpenAI processor succeeds
130+
mock_processor.side_effect = [ProcessorError] * original_processor_count + [[{"a": "b"}]]
131+
provider.get_maintenances(data)
132+
# Native processors + 1 OpenAI call
133+
assert mock_processor.call_count == original_processor_count + 1
134+
135+
# OpenAI processor should NOT be appended to the provider's processor list
136+
assert len(provider._processors) == original_processor_count # pylint: disable=protected-access
137+
os.environ.pop("PARSER_OPENAI_API_KEY", None)
138+
139+
140+
def test_provider_does_not_use_openai_when_native_succeeds():
141+
"""Test that OpenAI parser is not invoked when a native processor succeeds."""
142+
os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key"
143+
data = NotificationData.init_from_raw("text/plain", b"fake data")
144+
145+
provider = GenericProvider()
126146

127147
with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor:
128148
mock_processor.return_value = [{"a": "b"}]
129149
provider.get_maintenances(data)
150+
# Only the native processor should be called
151+
assert mock_processor.call_count == 1
130152

131-
assert provider._processors[-1] == CombinedProcessor( # pylint: disable=protected-access
132-
data_parsers=[EmailDateParser, OpenAIParser]
133-
)
153+
os.environ.pop("PARSER_OPENAI_API_KEY", None)
154+
155+
156+
def test_provider_data_not_mutated_when_native_succeeds():
157+
"""Test that add_subject_to_text is not called when native processors succeed."""
158+
os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key"
159+
data = NotificationData.init_from_raw("text/plain", b"fake data")
160+
data.add_data_part("email-header-subject", b"Test Subject")
161+
162+
original_content = data.data_parts[0].content
163+
provider = GenericProvider()
164+
165+
with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor:
166+
mock_processor.return_value = [{"a": "b"}]
167+
provider.get_maintenances(data)
168+
169+
# Data should not have been mutated since native processor succeeded
170+
assert data.data_parts[0].content == original_content
171+
os.environ.pop("PARSER_OPENAI_API_KEY", None)
172+
173+
174+
def test_provider_no_repeated_append_on_multiple_calls():
175+
"""Test that calling get_maintenances multiple times doesn't grow the processor list."""
176+
os.environ["PARSER_OPENAI_API_KEY"] = "some_api_key"
177+
data = NotificationData.init_from_raw("text/plain", b"fake data")
178+
179+
provider = GenericProvider()
180+
original_processor_count = len(provider._processors) # pylint: disable=protected-access
181+
182+
with patch("circuit_maintenance_parser.processor.GenericProcessor.process") as mock_processor:
183+
mock_processor.return_value = [{"a": "b"}]
184+
provider.get_maintenances(data)
185+
provider.get_maintenances(data)
186+
provider.get_maintenances(data)
187+
188+
# Processor list should never grow
189+
assert len(provider._processors) == original_processor_count # pylint: disable=protected-access
190+
os.environ.pop("PARSER_OPENAI_API_KEY", None)
134191

135192

136193
def test_add_subject_to_text_appends_subject_to_text_parts():
@@ -230,3 +287,27 @@ def test_add_subject_to_text_handles_decode_errors():
230287

231288
# This should not raise an exception due to errors="ignore" in decode()
232289
provider.add_subject_to_text(data)
290+
291+
292+
def test_add_subject_to_text_skips_text_calendar():
293+
"""Test that add_subject_to_text does not modify text/calendar parts."""
294+
provider = GenericProvider()
295+
296+
data = NotificationData()
297+
data.add_data_part("email-header-subject", b"Planned Work Notification")
298+
data.add_data_part(
299+
"text/calendar",
300+
b"BEGIN:VCALENDAR\r\nVERSION:2.0\r\nBEGIN:VEVENT\r\nEND:VEVENT\r\nEND:VCALENDAR",
301+
)
302+
data.add_data_part("text/plain", b"Some plain text content")
303+
304+
original_calendar_content = data.data_parts[1].content
305+
306+
provider.add_subject_to_text(data)
307+
308+
# text/calendar part should remain unchanged
309+
assert data.data_parts[1].content == original_calendar_content
310+
assert b"Planned Work Notification" not in data.data_parts[1].content
311+
312+
# text/plain part should have subject appended
313+
assert b"Planned Work Notification" in data.data_parts[2].content

0 commit comments

Comments
 (0)