Skip to content

Commit 7e32dc3

Browse files
committed
feat(reporting): add root cause analysis engine with failure pattern matching
Add declarative FailurePattern library (8 built-in patterns derived from ErrorCategory) and RootCauseAnalyzer that reads structured.jsonl via LogQueryEngine to produce ranked root-cause findings with log excerpts and actionable suggestions. Extend ExperimentReporter with optional RCA and artifact inventory sections in both JSON and Markdown reports (gracefully skipped when structured.jsonl is absent). Add `panther report diagnose` CLI subcommand with --json/--human output.
1 parent a67b0c0 commit 7e32dc3

4 files changed

Lines changed: 1424 additions & 42 deletions

File tree

panther/core/reporting/experiment_reporter.py

Lines changed: 162 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
import logging
4545
from datetime import datetime
4646
from pathlib import Path
47-
from typing import Any, Dict, Optional
47+
from typing import Any, Dict, List, Optional
4848

4949
try:
5050
from jinja2 import Environment, FileSystemLoader, Template
@@ -144,7 +144,7 @@ def __init__(self, experiment_dir: Path, experiment_name: Optional[str] = None):
144144
loader=FileSystemLoader(templates_dir),
145145
trim_blocks=True,
146146
lstrip_blocks=True,
147-
autoescape=False,
147+
autoescape=True,
148148
)
149149
else:
150150
self.jinja_env = None
@@ -194,6 +194,16 @@ def _generate_json_report(self, summary: ExperimentSummary) -> bool:
194194
"report_format_version": "1.0",
195195
}
196196

197+
# Add optional diagnosis section
198+
diagnosis = self._get_diagnosis()
199+
if diagnosis:
200+
report_data["diagnosis"] = diagnosis
201+
202+
# Add optional artifacts section
203+
artifacts = self._get_artifact_summary()
204+
if artifacts:
205+
report_data["artifacts"] = artifacts
206+
197207
json_path = self.experiment_dir / "experiment_summary.json"
198208
with open(json_path, "w", encoding="utf-8") as f:
199209
json.dump(report_data, f, indent=2, ensure_ascii=False)
@@ -304,52 +314,38 @@ def _generate_basic_markdown_report(self, summary: ExperimentSummary) -> bool:
304314
lines.append(" - ⚡ Fast-fail triggered")
305315
lines.append("")
306316

307-
# Service health summary (if available), grouped by test
317+
# Service health summary (if available)
308318
if summary.services:
309319
lines.extend(["## Service Health Summary", ""])
320+
iut_svcs = [s for s in summary.services if s.service_type == "iut"]
321+
tester_svcs = [
322+
s for s in summary.services if s.service_type == "tester"
323+
]
310324

311-
# Group services by test_name
312-
from itertools import groupby as _groupby
313-
314-
sorted_services = sorted(
315-
summary.services, key=lambda s: s.test_name or ""
316-
)
317-
for test_name, test_services_iter in _groupby(
318-
sorted_services, key=lambda s: s.test_name or "Unknown"
319-
):
320-
test_services = list(test_services_iter)
321-
lines.append(f"### Test: {test_name}")
322-
lines.append("")
323-
324-
iut_svcs = [s for s in test_services if s.service_type == "iut"]
325-
tester_svcs = [
326-
s for s in test_services if s.service_type == "tester"
327-
]
328-
329-
for label, svcs in [
330-
("IUT Services", iut_svcs),
331-
("Tester Services", tester_svcs),
332-
]:
333-
if svcs:
334-
lines.append(f"#### {label} ({len(svcs)})")
335-
lines.append(
336-
"| Service | Status | Compilation | Exit Code | Errors |"
325+
for label, svcs in [
326+
("IUT Services", iut_svcs),
327+
("Tester Services", tester_svcs),
328+
]:
329+
if svcs:
330+
lines.append(f"### {label} ({len(svcs)})")
331+
lines.append(
332+
"| Service | Status | Compilation | Exit Code | Errors |"
333+
)
334+
lines.append(
335+
"|---------|--------|-------------|-----------|--------|"
336+
)
337+
for svc in svcs:
338+
comp = "OK" if svc.compilation_succeeded else "FAIL"
339+
ec = (
340+
str(svc.exit_code)
341+
if svc.exit_code is not None
342+
else "N/A"
337343
)
344+
err = svc.error_summary or "None"
338345
lines.append(
339-
"|---------|--------|-------------|-----------|--------|"
346+
f"| {svc.service_name} | {svc.status} | {comp} | {ec} | {err} |"
340347
)
341-
for svc in svcs:
342-
comp = "OK" if svc.compilation_succeeded else "FAIL"
343-
ec = (
344-
str(svc.exit_code)
345-
if svc.exit_code is not None
346-
else "N/A"
347-
)
348-
err = svc.error_summary or "None"
349-
lines.append(
350-
f"| {svc.service_name} | {svc.status} | {comp} | {ec} | {err} |"
351-
)
352-
lines.append("")
348+
lines.append("")
353349

354350
# Fast-fail analysis
355351
lines.extend(
@@ -403,6 +399,18 @@ def _generate_basic_markdown_report(self, summary: ExperimentSummary) -> bool:
403399
else:
404400
lines.append("- No resource usage data available")
405401

402+
# Root Cause Analysis section (optional)
403+
rca_lines = self._format_rca_markdown()
404+
if rca_lines:
405+
lines.extend(["", ""])
406+
lines.extend(rca_lines)
407+
408+
# Artifact Inventory section (optional)
409+
artifact_lines = self._format_artifact_markdown()
410+
if artifact_lines:
411+
lines.extend(["", ""])
412+
lines.extend(artifact_lines)
413+
406414
lines.extend(
407415
[
408416
"",
@@ -529,6 +537,118 @@ def _get_panther_version(self) -> str:
529537

530538
return "Development"
531539

540+
# -- RCA and artifact integration helpers ---------------------------------
541+
542+
def _has_structured_log(self) -> bool:
543+
"""Check whether a structured.jsonl file exists in the output dir."""
544+
return (self.experiment_dir / "structured.jsonl").is_file() or any(
545+
self.experiment_dir.rglob("structured.jsonl")
546+
)
547+
548+
def _get_diagnosis(self) -> Optional[List[Dict[str, Any]]]:
549+
"""Run root-cause analysis if structured logs exist.
550+
551+
Returns:
552+
List of serialized ``RootCause`` dicts, or None when
553+
structured logs are unavailable.
554+
"""
555+
if not self._has_structured_log():
556+
return None
557+
try:
558+
from .root_cause_analyzer import RootCauseAnalyzer
559+
560+
analyzer = RootCauseAnalyzer(self.experiment_dir)
561+
causes = analyzer.analyze_as_dicts()
562+
return causes if causes else None
563+
except Exception as exc:
564+
self.logger.debug("RCA skipped: %s", exc)
565+
return None
566+
567+
def _get_artifact_summary(self) -> Optional[List[Dict[str, Any]]]:
568+
"""Collect artifact inventory via ArtifactBrowser.
569+
570+
Returns:
571+
List of artifact metadata dicts, or None on failure.
572+
"""
573+
try:
574+
from .artifact_browser import ArtifactBrowser
575+
576+
browser = ArtifactBrowser(self.experiment_dir)
577+
artifacts = browser.list_artifacts()
578+
return artifacts if artifacts else None
579+
except Exception as exc:
580+
self.logger.debug("Artifact browsing skipped: %s", exc)
581+
return None
582+
583+
def _format_rca_markdown(self) -> List[str]:
584+
"""Format root-cause analysis as Markdown lines.
585+
586+
Returns:
587+
List of Markdown-formatted strings. Empty when RCA is
588+
unavailable or found no issues.
589+
"""
590+
diagnosis = self._get_diagnosis()
591+
if not diagnosis:
592+
return []
593+
594+
lines = ["## Root Cause Analysis", ""]
595+
for cause in diagnosis:
596+
rank = cause.get("rank", "?")
597+
pattern = cause.get("pattern_name", "unknown")
598+
category = cause.get("category", "unknown")
599+
confidence = cause.get("confidence", 0.0)
600+
suggestion = cause.get("suggestion", "")
601+
event = cause.get("event", {})
602+
excerpt = cause.get("log_excerpt", [])
603+
604+
lines.append(f"### #{rank}: {pattern} (confidence: {confidence:.0%})")
605+
lines.append(f"- **Category**: {category}")
606+
if event.get("message"):
607+
lines.append(f"- **Trigger**: {event['message']}")
608+
if event.get("service_id"):
609+
lines.append(f"- **Service**: {event['service_id']}")
610+
if suggestion:
611+
lines.append(f"- **Suggestion**: {suggestion}")
612+
if excerpt:
613+
lines.append("- **Log excerpt**:")
614+
for line in excerpt[:5]:
615+
lines.append(f" - `{line}`")
616+
lines.append("")
617+
618+
return lines
619+
620+
def _format_artifact_markdown(self) -> List[str]:
621+
"""Format artifact inventory as Markdown lines.
622+
623+
Returns:
624+
List of Markdown-formatted strings. Empty when artifact
625+
browsing is unavailable.
626+
"""
627+
artifacts = self._get_artifact_summary()
628+
if not artifacts:
629+
return []
630+
631+
lines = ["## Artifact Inventory", ""]
632+
633+
# Group by type
634+
by_type: Dict[str, list] = {}
635+
for art in artifacts:
636+
t = art.get("type", "other")
637+
by_type.setdefault(t, []).append(art)
638+
639+
for art_type, items in sorted(by_type.items()):
640+
lines.append(f"### {art_type.title()} ({len(items)})")
641+
for item in items[:20]: # cap display
642+
path = item.get("path", "?")
643+
size = item.get("size_bytes", 0)
644+
size_str = f"{size / 1024:.1f} KB" if size else "0 KB"
645+
lines.append(f"- `{path}` ({size_str})")
646+
if len(items) > 20:
647+
lines.append(f"- ... and {len(items) - 20} more")
648+
lines.append("")
649+
650+
return lines
651+
532652
def generate_quick_summary(self) -> Optional[str]:
533653
"""Generate a quick one-line summary for logging.
534654

0 commit comments

Comments
 (0)