Skip to content

Commit 8791d57

Browse files
EmilienMlpiwowar
authored andcommitted
fix(parser): only keep final traceback for each failing test
The Tempest report parser was updated to correctly extract the final traceback when multiple tracebacks are present in a single log entry. Previously, the parser would capture the first traceback encountered. This could lead to large amount of inputs that our model can't handle. For now let's just focus on the last traceback that is found for each test.
1 parent a520d8e commit 8791d57

1 file changed

Lines changed: 17 additions & 10 deletions

File tree

  • src/rca_accelerator_chatbot

src/rca_accelerator_chatbot/api.py

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -175,8 +175,9 @@ def _extract_test_name(test_name_part: str) -> str:
175175
return test_name
176176

177177

178-
async def fetch_and_parse_tempest_report(url: str) -> List[Dict[str, str]]:
179-
"""Fetches and parses the Tempest HTML report to extract test names and tracebacks."""
178+
async def fetch_and_parse_tempest_report(url: str) -> List[Dict[str, str]]: # pylint: disable=too-many-locals
179+
"""Fetches and parses the Tempest HTML report to extract test names
180+
and the last traceback for each failed test."""
180181
async with httpx.AsyncClient(verify=False, follow_redirects=True) as client:
181182
try:
182183
response = await client.get(url, auth=HTTPSPNEGOAuth(mutual_authentication=OPTIONAL))
@@ -202,14 +203,20 @@ async def fetch_and_parse_tempest_report(url: str) -> List[Dict[str, str]]:
202203
test_name_part = row_text[:traceback_start_index].strip()
203204
test_name = _extract_test_name(test_name_part)
204205

205-
traceback_text = row_text[traceback_start_index:]
206-
end_marker_index = traceback_text.find("}}}")
207-
if end_marker_index != -1:
208-
traceback_text = traceback_text[:end_marker_index].strip()
209-
else:
210-
traceback_text = traceback_text.strip()
211-
212-
results.append({"test_name": test_name, "traceback": traceback_text})
206+
tb_marker = "Traceback (most recent call last):"
207+
traceback_pattern = re.compile(
208+
# Match from one tb_marker to the next (non-greedy), or to end of string
209+
f"{re.escape(tb_marker)}.*?(?={re.escape(tb_marker)}|$)",
210+
re.DOTALL
211+
)
212+
213+
traceback_parts = traceback_pattern.findall(row_text[traceback_start_index:])
214+
if traceback_parts:
215+
last_traceback = traceback_parts[-1].strip()
216+
end_marker_index = last_traceback.find("}}}")
217+
if end_marker_index != -1:
218+
last_traceback = last_traceback[:end_marker_index].strip()
219+
results.append({"test_name": test_name, "traceback": last_traceback})
213220

214221
if not results:
215222
pass

0 commit comments

Comments
 (0)