|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""scripts/build_daily_visual_report.py -- SR-173 daily aggregator (#178). |
| 3 | +
|
| 4 | +PURPOSE |
| 5 | +======= |
| 6 | +Per-step HTML debug reports written by `survey.observability.visual_debug` |
| 7 | +land in: |
| 8 | +
|
| 9 | + <VISUAL_DEBUG_OUTPUT_DIR>/YYYY-MM-DD/step-<id>.html |
| 10 | +
|
| 11 | +This script crawls one day's directory and builds: |
| 12 | +
|
| 13 | + <VISUAL_DEBUG_OUTPUT_DIR>/YYYY-MM-DD/index.html |
| 14 | +
|
| 15 | +A grid-layout index with a thumbnail (the first <img data:image/jpeg...> |
| 16 | +extracted from each per-step file), a status pill (OK / FAIL inferred from |
| 17 | +the `class="pill bad|good"` attribute the renderer emits), and the click |
| 18 | +URL for keyboard-friendly drill-down. |
| 19 | +
|
| 20 | +Optional Vercel-Blob upload: if `BLOB_READ_WRITE_TOKEN` is set we upload the |
| 21 | +*index* (and each step file) to Vercel Blob and print the signed URL. We |
| 22 | +deliberately do NOT upload the original full-page PNGs -- the JPEGs inside |
| 23 | +the per-step HTML are the canonical artifact. |
| 24 | +
|
| 25 | +USAGE |
| 26 | +===== |
| 27 | + # Build today's index (no upload): |
| 28 | + python scripts/build_daily_visual_report.py |
| 29 | +
|
| 30 | + # Build a specific day: |
| 31 | + python scripts/build_daily_visual_report.py --date 2026-05-13 |
| 32 | +
|
| 33 | + # Build + upload to Vercel Blob: |
| 34 | + BLOB_READ_WRITE_TOKEN=vercel_blob_rw_xxx \\ |
| 35 | + python scripts/build_daily_visual_report.py --upload |
| 36 | +
|
| 37 | +EXIT CODES |
| 38 | +========== |
| 39 | + 0 -- index built (possibly uploaded) |
| 40 | + 1 -- no per-step files found for the requested date |
| 41 | + 2 -- upload was requested but BLOB_READ_WRITE_TOKEN missing |
| 42 | + 3 -- unexpected I/O error |
| 43 | +
|
| 44 | +BANNED METHODS -- NIEMALS VERWENDEN (see AGENTS.md for full list) |
| 45 | +================================================================ |
| 46 | +- pkill -f "Google Chrome" - killall Google Chrome |
| 47 | +- webauto-nodriver - playstealth launch |
| 48 | +- skylight-cli click --element-index - cua-driver click (raw index) |
| 49 | +""" |
| 50 | + |
| 51 | +from __future__ import annotations |
| 52 | + |
| 53 | +import argparse |
| 54 | +import json |
| 55 | +import logging |
| 56 | +import os |
| 57 | +import re |
| 58 | +import sys |
| 59 | +from datetime import datetime, timezone |
| 60 | +from pathlib import Path |
| 61 | +from typing import Iterable |
| 62 | + |
| 63 | +# We deliberately do NOT import survey.* here -- the aggregator runs as a |
| 64 | +# standalone cron job and we want zero import-time coupling to the runner. |
| 65 | +logger = logging.getLogger("daily_visual_report") |
| 66 | + |
| 67 | +# Regexes pre-compiled at module load -- the renderer's output format is |
| 68 | +# stable (see `_HTML_TEMPLATE` in visual_debug.py). |
| 69 | +_RE_THUMB = re.compile(rb'src="(data:image/jpeg;base64,[^"]{0,200000})"', re.I) |
| 70 | +_RE_PILL = re.compile(rb'class="pill (good|bad)">([A-Z]+)<', re.I) |
| 71 | +_RE_URL_PILL = re.compile(rb'<span class="pill">([^<]{1,200})</span>') |
| 72 | +_RE_STEP_ID = re.compile(r"step-(.+)\.html$") |
| 73 | + |
| 74 | + |
| 75 | +# index template |
| 76 | +_INDEX_TEMPLATE = """<!doctype html> |
| 77 | +<html lang="en"> |
| 78 | +<head> |
| 79 | +<meta charset="utf-8"> |
| 80 | +<title>Visual Debug Index -- {date}</title> |
| 81 | +<style> |
| 82 | + :root {{ --bg:#0f1115; --panel:#181b22; --fg:#e7ebf0; --muted:#8a93a3; |
| 83 | + --ok:#1f7a3a; --fail:#b00020; }} |
| 84 | + * {{ box-sizing: border-box; }} |
| 85 | + body {{ margin:0; background:var(--bg); color:var(--fg); font-family:ui-sans-serif,system-ui; |
| 86 | + font-size:13px; }} |
| 87 | + header {{ padding:12px 16px; border-bottom:1px solid #2a2f3a; display:flex; |
| 88 | + gap:12px; align-items:baseline; }} |
| 89 | + header h1 {{ margin:0; font-size:14px; }} |
| 90 | + header .meta {{ color:var(--muted); }} |
| 91 | + nav.filters {{ padding:8px 16px; display:flex; gap:10px; }} |
| 92 | + nav.filters button {{ |
| 93 | + background:var(--panel); color:var(--fg); border:1px solid #2a2f3a; |
| 94 | + padding:4px 10px; border-radius:14px; font:inherit; cursor:pointer; |
| 95 | + }} |
| 96 | + nav.filters button.active {{ background:#2a3142; border-color:#3a425a; }} |
| 97 | + .grid {{ display:grid; grid-template-columns:repeat(auto-fill, minmax(220px, 1fr)); |
| 98 | + gap:10px; padding:12px; }} |
| 99 | + .card {{ background:var(--panel); border-radius:6px; overflow:hidden; |
| 100 | + border-top:4px solid var(--muted); text-decoration:none; color:inherit; |
| 101 | + display:flex; flex-direction:column; }} |
| 102 | + .card.good {{ border-top-color:var(--ok); }} |
| 103 | + .card.bad {{ border-top-color:var(--fail); }} |
| 104 | + .card img {{ display:block; width:100%; height:140px; object-fit:cover; |
| 105 | + background:#000; }} |
| 106 | + .card .body {{ padding:8px 10px; font-size:11px; }} |
| 107 | + .card .body .id {{ font-weight:600; }} |
| 108 | + .card .body .url {{ color:var(--muted); display:block; white-space:nowrap; |
| 109 | + overflow:hidden; text-overflow:ellipsis; }} |
| 110 | +</style> |
| 111 | +</head> |
| 112 | +<body> |
| 113 | +<header> |
| 114 | + <h1>Visual Debug Index -- {date}</h1> |
| 115 | + <span class="meta">{count} steps -- generated {generated_at}</span> |
| 116 | +</header> |
| 117 | +<nav class="filters" role="tablist" aria-label="status filter"> |
| 118 | + <button class="active" data-filter="all">All ({count})</button> |
| 119 | + <button data-filter="good">OK ({n_ok})</button> |
| 120 | + <button data-filter="bad">FAIL ({n_fail})</button> |
| 121 | +</nav> |
| 122 | +<div class="grid" id="grid"> |
| 123 | +{cards} |
| 124 | +</div> |
| 125 | +<script> |
| 126 | +(function () {{ |
| 127 | + const grid = document.getElementById('grid'); |
| 128 | + const buttons = document.querySelectorAll('nav.filters button'); |
| 129 | + buttons.forEach(b => b.addEventListener('click', () => {{ |
| 130 | + buttons.forEach(x => x.classList.remove('active')); |
| 131 | + b.classList.add('active'); |
| 132 | + const f = b.dataset.filter; |
| 133 | + grid.querySelectorAll('.card').forEach(c => {{ |
| 134 | + c.style.display = (f === 'all' || c.classList.contains(f)) ? '' : 'none'; |
| 135 | + }}); |
| 136 | + }})); |
| 137 | +}})(); |
| 138 | +</script> |
| 139 | +</body> |
| 140 | +</html> |
| 141 | +""" |
| 142 | + |
| 143 | +_CARD_TEMPLATE = """ <a class="card {status}" href="{href}"> |
| 144 | + <img alt="step thumbnail" src="{thumb}"> |
| 145 | + <div class="body"> |
| 146 | + <div class="id">{step_id}</div> |
| 147 | + <span class="url">{url_pill}</span> |
| 148 | + </div> |
| 149 | + </a>""" |
| 150 | + |
| 151 | + |
| 152 | +def _extract_card_data(path: Path) -> dict[str, str] | None: |
| 153 | + """Pull thumbnail + status + url-pill out of one per-step HTML file.""" |
| 154 | + try: |
| 155 | + data = path.read_bytes() |
| 156 | + except OSError: |
| 157 | + return None |
| 158 | + m_thumb = _RE_THUMB.search(data) |
| 159 | + if not m_thumb: |
| 160 | + # Renderer must have failed mid-write; ignore. |
| 161 | + return None |
| 162 | + m_pill = _RE_PILL.search(data) |
| 163 | + status = (m_pill.group(1).decode() if m_pill else "good").lower() |
| 164 | + m_url = _RE_URL_PILL.search(data) |
| 165 | + url_pill = m_url.group(1).decode() if m_url else "" |
| 166 | + m_id = _RE_STEP_ID.search(path.name) |
| 167 | + step_id = m_id.group(1) if m_id else path.stem |
| 168 | + return { |
| 169 | + "status": status, |
| 170 | + "thumb": m_thumb.group(1).decode(), |
| 171 | + "step_id": step_id, |
| 172 | + "url_pill": url_pill, |
| 173 | + "href": path.name, |
| 174 | + } |
| 175 | + |
| 176 | + |
| 177 | +def _iter_step_files(day_dir: Path) -> Iterable[Path]: |
| 178 | + yield from sorted(day_dir.glob("step-*.html")) |
| 179 | + |
| 180 | + |
| 181 | +def build_index(day_dir: Path) -> tuple[Path, dict[str, int]]: |
| 182 | + """Build `index.html` inside `day_dir` and return (path, counts).""" |
| 183 | + cards: list[str] = [] |
| 184 | + counts = {"all": 0, "good": 0, "bad": 0} |
| 185 | + for f in _iter_step_files(day_dir): |
| 186 | + if f.name == "index.html": |
| 187 | + continue |
| 188 | + card = _extract_card_data(f) |
| 189 | + if not card: |
| 190 | + continue |
| 191 | + counts["all"] += 1 |
| 192 | + counts[card["status"]] = counts.get(card["status"], 0) + 1 |
| 193 | + cards.append(_CARD_TEMPLATE.format(**card)) |
| 194 | + |
| 195 | + if counts["all"] == 0: |
| 196 | + raise FileNotFoundError(f"no per-step HTML files in {day_dir}") |
| 197 | + |
| 198 | + html = _INDEX_TEMPLATE.format( |
| 199 | + date=day_dir.name, |
| 200 | + count=counts["all"], |
| 201 | + n_ok=counts.get("good", 0), |
| 202 | + n_fail=counts.get("bad", 0), |
| 203 | + generated_at=datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC"), |
| 204 | + cards="\n".join(cards), |
| 205 | + ) |
| 206 | + out = day_dir / "index.html" |
| 207 | + tmp = out.with_suffix(".tmp") |
| 208 | + tmp.write_text(html, encoding="utf-8") |
| 209 | + os.replace(tmp, out) # atomic |
| 210 | + return out, counts |
| 211 | + |
| 212 | + |
| 213 | +# Vercel Blob upload (optional) |
| 214 | +# We use the public HTTP API documented at https://vercel.com/docs/storage/vercel-blob |
| 215 | +# Endpoint: `PUT https://blob.vercel-storage.com/<pathname>` with header |
| 216 | +# `authorization: Bearer $BLOB_READ_WRITE_TOKEN` |
| 217 | +# Response: JSON {url: "https://...", downloadUrl: "..."}. |
| 218 | +# We avoid the SDK to keep the script dep-free. |
| 219 | + |
| 220 | +def _upload_to_blob(file: Path, blob_path: str, token: str) -> str: |
| 221 | + """Upload one file to Vercel Blob; return the public URL. |
| 222 | +
|
| 223 | + The URL is *immutable* per upload, so naming with date+step_id gives us |
| 224 | + de-facto signed URLs without TTL bookkeeping. For private buckets the |
| 225 | + URL token in the path is the access secret. |
| 226 | + """ |
| 227 | + import urllib.request |
| 228 | + |
| 229 | + body = file.read_bytes() |
| 230 | + req = urllib.request.Request( |
| 231 | + f"https://blob.vercel-storage.com/{blob_path}", |
| 232 | + method="PUT", |
| 233 | + data=body, |
| 234 | + headers={ |
| 235 | + "authorization": f"Bearer {token}", |
| 236 | + "x-content-type": "text/html; charset=utf-8", |
| 237 | + }, |
| 238 | + ) |
| 239 | + with urllib.request.urlopen(req, timeout=30) as resp: # nosec: trusted endpoint |
| 240 | + payload = json.loads(resp.read()) |
| 241 | + return payload["url"] |
| 242 | + |
| 243 | + |
| 244 | +def upload_day(day_dir: Path, *, token: str) -> dict[str, str]: |
| 245 | + """Upload every HTML in day_dir; return {filename: blob_url}.""" |
| 246 | + out: dict[str, str] = {} |
| 247 | + for f in sorted(day_dir.glob("*.html")): |
| 248 | + url = _upload_to_blob(f, f"survey-debug-{day_dir.name}/{f.name}", token) |
| 249 | + out[f.name] = url |
| 250 | + return out |
| 251 | + |
| 252 | + |
| 253 | +# CLI |
| 254 | +def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: |
| 255 | + p = argparse.ArgumentParser(description="Build daily visual-debug index.") |
| 256 | + p.add_argument( |
| 257 | + "--date", |
| 258 | + default=datetime.now(timezone.utc).strftime("%Y-%m-%d"), |
| 259 | + help="UTC date in YYYY-MM-DD; defaults to today (UTC).", |
| 260 | + ) |
| 261 | + p.add_argument( |
| 262 | + "--root", |
| 263 | + default=os.environ.get( |
| 264 | + "VISUAL_DEBUG_OUTPUT_DIR", |
| 265 | + str(Path.cwd() / "debug-reports"), |
| 266 | + ), |
| 267 | + help="Root directory containing per-day folders. " |
| 268 | + "Default: $VISUAL_DEBUG_OUTPUT_DIR or ./debug-reports.", |
| 269 | + ) |
| 270 | + p.add_argument( |
| 271 | + "--upload", |
| 272 | + action="store_true", |
| 273 | + help="Upload index + step files to Vercel Blob. Requires $BLOB_READ_WRITE_TOKEN.", |
| 274 | + ) |
| 275 | + return p.parse_args(argv) |
| 276 | + |
| 277 | + |
| 278 | +def main(argv: list[str] | None = None) -> int: |
| 279 | + logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") |
| 280 | + args = _parse_args(argv) |
| 281 | + day_dir = Path(args.root) / args.date |
| 282 | + if not day_dir.is_dir(): |
| 283 | + logger.error("day dir does not exist: %s", day_dir) |
| 284 | + return 1 |
| 285 | + |
| 286 | + try: |
| 287 | + index_path, counts = build_index(day_dir) |
| 288 | + except FileNotFoundError as e: |
| 289 | + logger.error("%s", e) |
| 290 | + return 1 |
| 291 | + except OSError as e: # pragma: no cover -- defensive |
| 292 | + logger.exception("I/O error building index: %s", e) |
| 293 | + return 3 |
| 294 | + |
| 295 | + logger.info( |
| 296 | + "built %s -- %d steps (ok=%d, fail=%d)", |
| 297 | + index_path, |
| 298 | + counts.get("all", 0), |
| 299 | + counts.get("good", 0), |
| 300 | + counts.get("bad", 0), |
| 301 | + ) |
| 302 | + |
| 303 | + if args.upload: |
| 304 | + token = os.environ.get("BLOB_READ_WRITE_TOKEN") |
| 305 | + if not token: |
| 306 | + logger.error("--upload requested but BLOB_READ_WRITE_TOKEN is unset") |
| 307 | + return 2 |
| 308 | + urls = upload_day(day_dir, token=token) |
| 309 | + logger.info("uploaded %d files to Vercel Blob", len(urls)) |
| 310 | + # Public URL of the index goes to stdout so cron jobs can capture it. |
| 311 | + if "index.html" in urls: |
| 312 | + print(urls["index.html"]) |
| 313 | + return 0 |
| 314 | + |
| 315 | + |
| 316 | +if __name__ == "__main__": |
| 317 | + sys.exit(main()) |
0 commit comments