|
| 1 | +from typing import Any, Dict, List, Optional, Annotated, TypedDict |
| 2 | +from typing_extensions import TypedDict |
| 3 | +from langgraph.graph import StateGraph, END, START |
| 4 | +from langchain_core.runnables import RunnableConfig |
| 5 | +from report_llm import generate_report |
| 6 | +from datetime import datetime |
| 7 | +import os |
| 8 | + |
| 9 | +import json |
| 10 | + |
| 11 | + |
| 12 | +class MigrationState(TypedDict): |
| 13 | + """State for the migration graph execution.""" |
| 14 | + input_dir: str |
| 15 | + latest_dir: str |
| 16 | + raw: Dict[str, Any] |
| 17 | + cleaned_raw: Dict[str, Any] |
| 18 | + count: Dict[str, Any] |
| 19 | + json_report: Dict[str, Any] |
| 20 | + md_report: str |
| 21 | + |
| 22 | +def input_node(state: MigrationState) -> MigrationState: |
| 23 | + """Input node for the migration graph.""" |
| 24 | + input_path = state["input_dir"] |
| 25 | + if not os.path.exists(input_path): |
| 26 | + raise FileNotFoundError(f"Output folder not found: {input_path}") |
| 27 | + ## Get output with most recent timestamp |
| 28 | + output_dirs = [] |
| 29 | + for name in os.listdir(input_path): |
| 30 | + res = os.path.join(input_path, name) |
| 31 | + if not os.path.isdir(res): |
| 32 | + continue |
| 33 | + run_dt = datetime.strptime(name, "%Y-%m-%dT%H-%M-%SZ") |
| 34 | + output_dirs.append((run_dt, res)) |
| 35 | + |
| 36 | + _ , latest = max(output_dirs, key=lambda x: x[0]) |
| 37 | + state["latest_dir"] = latest |
| 38 | + ## Get translation results and evaluation notes |
| 39 | + raw = {"translation_results": [], "evaluation": []} |
| 40 | + for name in os.listdir(latest): |
| 41 | + out = os.path.join(latest, name) |
| 42 | + if os.path.isdir(out): |
| 43 | + for file_path in os.listdir(out): |
| 44 | + file = os.path.join(out, file_path) |
| 45 | + if "evaluation" in os.path.basename(file).lower(): |
| 46 | + with open(file, "r", encoding="utf-8") as f: |
| 47 | + raw["evaluation"].append(json.load(f)) |
| 48 | + else: |
| 49 | + if "translation_results.json" in os.path.basename(out).lower(): |
| 50 | + with open(out, "r", encoding="utf-8") as f: |
| 51 | + raw["translation_results"].append(json.load(f)) |
| 52 | + state["raw"] = raw |
| 53 | + return state |
| 54 | + |
| 55 | +def clean_raw(obj: Any) -> Any: |
| 56 | + if obj is None: |
| 57 | + return None |
| 58 | + |
| 59 | + # Handle dicts |
| 60 | + if isinstance(obj, dict): |
| 61 | + cleaned = {} |
| 62 | + for k, v in obj.items(): |
| 63 | + pruned = clean_raw(v) |
| 64 | + if pruned not in (None, {}, [], ""): |
| 65 | + cleaned[k] = pruned |
| 66 | + return cleaned or None |
| 67 | + |
| 68 | + # Handle lists |
| 69 | + if isinstance(obj, list): |
| 70 | + cleaned = [] |
| 71 | + for item in obj: |
| 72 | + pruned = clean_raw(item) |
| 73 | + if pruned not in (None, {}, [], ""): |
| 74 | + cleaned.append(pruned) |
| 75 | + return cleaned or None |
| 76 | + |
| 77 | + # Handle strings (remove empty and prune to MAX_LEN length) |
| 78 | + MAX_LEN = 150 |
| 79 | + if isinstance(obj, str): |
| 80 | + s = obj.strip() |
| 81 | + if not s: |
| 82 | + return None |
| 83 | + return s if len(s) <= MAX_LEN else s[:MAX_LEN] + "…" |
| 84 | + |
| 85 | + return obj |
| 86 | + |
| 87 | + |
| 88 | +def clean_raw_node(state: MigrationState) -> MigrationState: |
| 89 | + """clean raw data by removing empty values and pruning long strings""" |
| 90 | + state["cleaned_raw"] = clean_raw(state["raw"]) |
| 91 | + return state |
| 92 | + |
| 93 | +def count_node(state: MigrationState) -> MigrationState: |
| 94 | + """Count trnslated artifacts, errors, warnings and validation errors for the report.""" |
| 95 | + count = {"artifact_type": {}, "migration_errors": 0, "migration_warnings": 0, "successes": 0, "validation_errors": 0} |
| 96 | + for trans in state.get("cleaned_raw", {}).get("translation_results", []): |
| 97 | + for type, value in trans.get("observability", {}).get("artifact_counts", {}).items(): |
| 98 | + if count["artifact_type"].get(type) is None: |
| 99 | + count["artifact_type"][type] = value |
| 100 | + count["successes"] += value |
| 101 | + else: |
| 102 | + count["artifact_type"][type] += value |
| 103 | + count["successes"] += value |
| 104 | + count["migration_errors"] += trans["observability"]["total_errors"] |
| 105 | + count["migration_warnings"] += trans["observability"]["total_warnings"] |
| 106 | + for eval in state.get("cleaned_raw", {}).get("evaluation", []): |
| 107 | + for res in eval.get("validation", {}).get("results", []): |
| 108 | + count["validation_errors"] += (1 if not res.get("syntax_valid", True) else 0) |
| 109 | + state["count"] = count |
| 110 | + return state |
| 111 | + |
| 112 | +def report_node(state: MigrationState) -> MigrationState: |
| 113 | + """Create report with LLM.""" |
| 114 | + result = generate_report(state["cleaned_raw"], state["count"]) |
| 115 | + return {**state, "md_report": result, "json_report": state["count"]} |
| 116 | + |
| 117 | +class MigrationReportGraph: |
| 118 | + def __init__(self, run_id: Optional[str] = None): |
| 119 | + """ |
| 120 | + Initialize migration report graph. |
| 121 | + |
| 122 | + Args: |
| 123 | + run_id: Unique identifier for this run |
| 124 | + """ |
| 125 | + # Create the StateGraph |
| 126 | + self.graph = StateGraph(MigrationState) |
| 127 | + |
| 128 | + # Add nodes |
| 129 | + self.graph.add_node("input", input_node) |
| 130 | + self.graph.add_node("clean", clean_raw_node) |
| 131 | + self.graph.add_node("count", count_node) |
| 132 | + self.graph.add_node("report", report_node) |
| 133 | + |
| 134 | + self.graph.add_edge(START, "input") |
| 135 | + self.graph.add_edge("input", "clean") |
| 136 | + self.graph.add_edge("clean", "count") |
| 137 | + self.graph.add_edge("count", "report") |
| 138 | + self.graph.add_edge("report", END) |
| 139 | + |
| 140 | + # Compile the graph |
| 141 | + self.compiled_graph = self.graph.compile() |
| 142 | + |
| 143 | + def run(self, input_path: str) -> Dict[str, Any]: |
| 144 | + try: |
| 145 | + initial_state: MigrationState = { |
| 146 | + "input_dir": input_path, |
| 147 | + "latest_dir": None, |
| 148 | + "raw": [], |
| 149 | + "count": None, |
| 150 | + "json_report": None, |
| 151 | + "md_report": None |
| 152 | + } |
| 153 | + |
| 154 | + final_state = self.compiled_graph.invoke(initial_state) |
| 155 | + report = final_state["md_report"] or {} |
| 156 | + json_report = final_state["json_report"] or {} |
| 157 | + latest_dir = final_state["latest_dir"] or {} |
| 158 | + return report[0], json_report, latest_dir |
| 159 | + |
| 160 | + except Exception as e: |
| 161 | + raise |
| 162 | + |
0 commit comments