Fail-fast simplifications in build-prompt.py and build-pr-body.py

trask · trask · commit 20b9c31725ca · 2026-04-30T19:43:59.000-07:00
diff --git a/.github/scripts/flaky-test-fix/build-pr-body.py b/.github/scripts/flaky-test-fix/build-pr-body.py
@@ -2,16 +2,13 @@
 """Render the PR body for an automated flaky-test fix.
 
 Inputs:
-  --selected   selected.json from find-flaky-test.py (or build-override.py)
-  --copilot-log  copilot-output.jsonl produced by `copilot ... --output-format json`
-                 (optional)
+  --selected   selected.json from find-flaky-test.py
   --diagnosis  build/flaky-fix/diagnosis.md written by Copilot (optional)
   --output     path to write the PR body to
 
 The body always includes Develocity scan links, a per-day flake breakdown,
 and the captured failure stack. If a Copilot diagnosis file is present, it
-is included verbatim. Otherwise the script falls back to extracting the
-final assistant message from the Copilot JSONL log.
+is included verbatim.
 """
 
 from __future__ import annotations
@@ -23,26 +20,6 @@
 from pathlib import Path
 
 
-def _last_assistant_message(jsonl_path: Path) -> str:
-    """Return the last non-empty content of an ``assistant.message`` event."""
-    last = ""
-    with jsonl_path.open("r", encoding="utf-8", errors="replace") as f:
-        for line in f:
-            line = line.strip()
-            if not line:
-                continue
-            try:
-                d = json.loads(line)
-            except json.JSONDecodeError:
-                continue
-            if d.get("type") != "assistant.message":
-                continue
-            content = (d.get("data") or {}).get("content") or ""
-            if isinstance(content, str) and content.strip():
-                last = content
-    return last.strip()
-
-
 def _format_per_day(rows: list) -> list[str]:
     if not rows:
         return []
@@ -51,11 +28,10 @@ def _format_per_day(rows: list) -> list[str]:
            "| Day | flaky | failed | passed |",
            "| --- | ---: | ---: | ---: |"]
     for r in rows:
-        day = dt.datetime.fromtimestamp((r.get("start_ms") or 0) / 1000,
-                                        tz=dt.timezone.utc)
+        day = dt.datetime.fromtimestamp(r["start_ms"] / 1000, tz=dt.timezone.utc)
         out.append(
             f"| {day.strftime('%Y-%m-%d')} | "
-            f"{r.get('flaky', 0)} | {r.get('failed', 0)} | {r.get('passed', 0)} |"
+            f"{r['flaky']} | {r['failed']} | {r['passed']} |"
         )
     out.append("")
     return out
@@ -66,12 +42,9 @@ def _format_recent_scans(scans: list) -> list[str]:
         return []
     out = ["### Recent failed/flaky scans", ""]
     for s in scans[:5]:
-        url = s.get("scan_url", "")
-        outcome = s.get("outcome", "")
-        wu = s.get("work_unit", "")
-        bullet = f"- [{s.get('build_id', '')[:13]}]({url}) ({outcome}"
-        if wu:
-            bullet += f", `{wu}`"
+        bullet = f"- [{s['build_id'][:13]}]({s['scan_url']}) ({s['outcome']}"
+        if s["work_unit"]:
+            bullet += f", `{s['work_unit']}`"
         bullet += ")"
         out.append(bullet)
     out.append("")
@@ -81,56 +54,45 @@ def _format_recent_scans(scans: list) -> list[str]:
 def main() -> int:
     ap = argparse.ArgumentParser()
     ap.add_argument("--selected", type=Path, required=True)
-    ap.add_argument("--copilot-log", type=Path, default=None)
     ap.add_argument("--diagnosis", type=Path, default=None)
     ap.add_argument("--output", type=Path, required=True)
     args = ap.parse_args()
 
     selected = json.loads(args.selected.read_text(encoding="utf-8"))
-    cls = selected.get("class", "")
-    method = selected.get("method", "")
-    fq = selected.get("fully_qualified") or f"{cls}.{method}"
-    source_file = selected.get("source_file", "")
-    flaky_count = selected.get("flaky_count", 0)
-    container_flaky = selected.get("container_flaky_count", 0)
-    window_days = selected.get("window_days", 0)
-    sample_url = selected.get("sample_scan_url") or ""
-    sample_failure = (selected.get("sample_failure") or "").rstrip()
+    fq = selected["fully_qualified"]
+    source_file = selected["source_file"]
+    window_days = selected["window_days"]
+    sample_url = selected["sample_scan_url"]
+    sample_failure = selected["sample_failure"].rstrip()
 
     lines: list[str] = [
         f"Automated attempt at fixing flakiness in `{fq}`.",
         "",
         f"- Source: [`{source_file}`]({source_file})",
-        f"- Flaky executions in last {window_days}d (this test): **{flaky_count}**",
+        f"- Flaky executions in last {window_days}d (this test): "
+        f"**{selected['flaky_count']}**",
         f"- Flaky executions in last {window_days}d (test container): "
-        f"**{container_flaky}**",
+        f"**{selected['container_flaky_count']}**",
     ]
     if sample_url:
         lines.append(f"- Primary failed scan: {sample_url}")
     lines.append("")
 
-    lines += _format_recent_scans(selected.get("recent_flaky_scans") or [])
-    lines += _format_per_day(selected.get("per_day_breakdown") or [])
+    lines += _format_recent_scans(selected["recent_flaky_scans"])
+    lines += _format_per_day(selected["per_day_breakdown"])
 
     lines += ["### Sample failure (from Develocity)", "", "```"]
-    lines.append(sample_failure if sample_failure else "(no failure message captured)")
+    lines.append(sample_failure or "(no failure message captured)")
     lines += ["```", ""]
 
-    diagnosis_text = ""
     if args.diagnosis and args.diagnosis.exists():
-        diagnosis_text = args.diagnosis.read_text(
-            encoding="utf-8", errors="replace"
-        ).strip()
-    if not diagnosis_text and args.copilot_log and args.copilot_log.exists():
-        diagnosis_text = _last_assistant_message(args.copilot_log)
-
-    if diagnosis_text:
-        lines += ["## Copilot diagnosis", "", diagnosis_text, ""]
+        diagnosis_text = args.diagnosis.read_text(encoding="utf-8").strip()
+        if diagnosis_text:
+            lines += ["## Copilot diagnosis", "", diagnosis_text, ""]
 
     lines += [
         "---",
         "",
-        "Generated locally by `.github/scripts/flaky-test-fix/run-local.sh`. "
         "Review the diagnosis and the diff carefully before merging - "
         "automated fixes can mask flakiness instead of addressing the root cause.",
     ]
diff --git a/.github/scripts/flaky-test-fix/build-prompt.py b/.github/scripts/flaky-test-fix/build-prompt.py
@@ -1,13 +1,14 @@
 #!/usr/bin/env python3
 """Render the Copilot CLI prompt for fixing one flaky test.
 
-Inputs the `selected.json` produced by `find-flaky-test.py` (or
-`build-override.py`) and writes a plain-text prompt suitable for `copilot -p`.
+Inputs the `selected.json` produced by `find-flaky-test.py` and writes a
+plain-text prompt suitable for `copilot -p`.
 """
 
 from __future__ import annotations
 
 import argparse
+import datetime as dt
 import json
 import sys
 from pathlib import Path
@@ -73,18 +74,13 @@ def _format_recent_scans(scans: list) -> str:
         return ""
     lines = ["Other recent flaky/failed scans for this test:"]
     for s in scans[:5]:
-        bid = s.get("build_id", "")
-        url = s.get("scan_url", "")
-        outcome = s.get("outcome", "")
-        wu = s.get("work_unit", "")
-        excerpt = (s.get("failure_excerpt") or "").splitlines()
-        first = excerpt[0] if excerpt else ""
-        bullet = f"- {url} ({outcome}"
-        if wu:
-            bullet += f", {wu}"
+        bullet = f"- {s['scan_url']} ({s['outcome']}"
+        if s["work_unit"]:
+            bullet += f", {s['work_unit']}"
         bullet += ")"
-        if first:
-            bullet += f"\n    first line: `{first[:160]}`"
+        excerpt = s["failure_excerpt"].splitlines()
+        if excerpt:
+            bullet += f"\n    first line: `{excerpt[0][:160]}`"
         lines.append(bullet)
     lines.append("")
     return "\n".join(lines) + "\n"
@@ -93,16 +89,14 @@ def _format_recent_scans(scans: list) -> str:
 def _format_per_day(rows: list) -> str:
     if not rows:
         return ""
-    import datetime as _dt
     lines = ["Per-day outcome breakdown for this test:", "",
              "| Day (UTC) | flaky | failed | passed |",
              "| --- | ---: | ---: | ---: |"]
     for r in rows:
-        day = _dt.datetime.fromtimestamp((r.get("start_ms") or 0) / 1000,
-                                         tz=_dt.timezone.utc)
+        day = dt.datetime.fromtimestamp(r["start_ms"] / 1000, tz=dt.timezone.utc)
         lines.append(
             f"| {day.strftime('%Y-%m-%d')} | "
-            f"{r.get('flaky', 0)} | {r.get('failed', 0)} | {r.get('passed', 0)} |"
+            f"{r['flaky']} | {r['failed']} | {r['passed']} |"
         )
     lines.append("")
     return "\n".join(lines) + "\n"
@@ -115,23 +109,21 @@ def main() -> int:
     args = ap.parse_args()
 
     selected = json.loads(args.selected.read_text(encoding="utf-8"))
-    failure = selected.get("sample_failure") or "(no failure message captured)"
+    failure = (selected["sample_failure"] or "(no failure message captured)").strip()
     # Trim aggressively to keep the prompt within token limits.
-    failure = failure.strip()
     if len(failure) > 4000:
         failure = failure[:4000] + "\n... [truncated]"
 
     prompt = PROMPT_TEMPLATE.format(
-        fq=selected.get("fully_qualified")
-            or f"{selected.get('class', '')}.{selected.get('method', '')}",
-        source=selected.get("source_file", ""),
-        window_days=selected.get("window_days", 0),
-        flaky_count=selected.get("flaky_count", 0),
-        container_flaky_count=selected.get("container_flaky_count", 0),
-        scan_url=selected.get("sample_scan_url") or "(none)",
+        fq=selected["fully_qualified"],
+        source=selected["source_file"],
+        window_days=selected["window_days"],
+        flaky_count=selected["flaky_count"],
+        container_flaky_count=selected["container_flaky_count"],
+        scan_url=selected["sample_scan_url"] or "(none)",
         failure=failure,
-        recent_section=_format_recent_scans(selected.get("recent_flaky_scans") or []),
-        per_day_section=_format_per_day(selected.get("per_day_breakdown") or []),
+        recent_section=_format_recent_scans(selected["recent_flaky_scans"]),
+        per_day_section=_format_per_day(selected["per_day_breakdown"]),
     )
 
     args.output.parent.mkdir(parents=True, exist_ok=True)
diff --git a/.github/scripts/flaky-test-fix/run-local.sh b/.github/scripts/flaky-test-fix/run-local.sh
@@ -226,7 +226,6 @@ git push -u "$REMOTE" "$BRANCH"
 PR_BODY="$OUT_DIR/pr-body.md"
 python .github/scripts/flaky-test-fix/build-pr-body.py \
   --selected "$SELECTED" \
-  --copilot-log "$COPILOT_LOG" \
   --diagnosis "$DIAGNOSIS" \
   --output "$PR_BODY"
 
diff --git a/.github/workflows/flaky-test-fix.yml b/.github/workflows/flaky-test-fix.yml
@@ -220,7 +220,6 @@ jobs:
           mkdir -p "$(dirname "$body_file")"
           python .github/scripts/flaky-test-fix/build-pr-body.py \
             --selected "$COPILOT_ROOT/selected.json" \
-            --copilot-log "$COPILOT_ROOT/run/copilot-output.jsonl" \
             --diagnosis build/flaky-fix/diagnosis.md \
             --output "$body_file"
           if [[ -n "${ARTIFACT_URL:-}" ]]; then