Skip to content

Commit 80639b2

Browse files
author
Rex
committed
feat(metrics): add onboard daily report script and docs
1 parent 095b970 commit 80639b2

6 files changed

Lines changed: 519 additions & 1 deletion

File tree

.github/workflows/ci.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ jobs:
2626
scripts.tests.test_verify_version_changelog \
2727
scripts.tests.test_verify_release_consistency \
2828
scripts.tests.test_provider_health_report \
29-
scripts.tests.test_package_release
29+
scripts.tests.test_package_release \
30+
scripts.tests.test_onboard_metrics_report
3031
3132
versioning-guard:
3233
name: versioning guard (PR)

docs-site/explanation/reliability-baseline.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,41 @@ Each line records timestamp, workspace, session id, outcome, and (for failures)
4141
Get-Content $HOME/.rexos/onboard-events.jsonl -Tail 20
4242
```
4343

44+
## Daily report script
45+
46+
LoopForge includes a daily aggregation script:
47+
48+
- `scripts/onboard_metrics_report.py`
49+
50+
Run it from repository root:
51+
52+
=== "macOS/Linux"
53+
```bash
54+
python3 scripts/onboard_metrics_report.py \
55+
--base-dir ~/.rexos \
56+
--out-dir .tmp/onboard-report \
57+
--days 7 \
58+
--window-hours 24
59+
60+
cat .tmp/onboard-report/onboard-report.md
61+
```
62+
63+
=== "Windows (PowerShell)"
64+
```powershell
65+
python scripts/onboard_metrics_report.py `
66+
--base-dir $HOME/.rexos `
67+
--out-dir .tmp/onboard-report `
68+
--days 7 `
69+
--window-hours 24
70+
71+
Get-Content .tmp/onboard-report/onboard-report.md
72+
```
73+
74+
Output files:
75+
76+
- `.tmp/onboard-report/onboard-report.json`
77+
- `.tmp/onboard-report/onboard-report.md`
78+
4479
## Suggested initial targets
4580

4681
- First-task success rate >= 70%

docs-site/zh-CN/explanation/reliability-baseline.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,41 @@ LoopForge 会把首任务结果写入 `~/.rexos/onboard-metrics.json`:
4141
Get-Content $HOME/.rexos/onboard-events.jsonl -Tail 20
4242
```
4343

44+
## 日报汇总脚本
45+
46+
LoopForge 内置了一个 onboarding 指标日报脚本:
47+
48+
- `scripts/onboard_metrics_report.py`
49+
50+
在仓库根目录执行:
51+
52+
=== "macOS/Linux"
53+
```bash
54+
python3 scripts/onboard_metrics_report.py \
55+
--base-dir ~/.rexos \
56+
--out-dir .tmp/onboard-report \
57+
--days 7 \
58+
--window-hours 24
59+
60+
cat .tmp/onboard-report/onboard-report.md
61+
```
62+
63+
=== "Windows (PowerShell)"
64+
```powershell
65+
python scripts/onboard_metrics_report.py `
66+
--base-dir $HOME/.rexos `
67+
--out-dir .tmp/onboard-report `
68+
--days 7 `
69+
--window-hours 24
70+
71+
Get-Content .tmp/onboard-report/onboard-report.md
72+
```
73+
74+
输出文件:
75+
76+
- `.tmp/onboard-report/onboard-report.json`
77+
- `.tmp/onboard-report/onboard-report.md`
78+
4479
## 初始目标建议
4580

4681
- 首任务成功率 >= 70%

scripts/onboard_metrics_report.py

Lines changed: 279 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,279 @@
1+
#!/usr/bin/env python3
2+
from __future__ import annotations
3+
4+
import argparse
5+
import datetime as dt
6+
import json
7+
from pathlib import Path
8+
9+
10+
def _iso_now(now_ms: int | None = None) -> str:
11+
if now_ms is None:
12+
now = dt.datetime.now(dt.timezone.utc)
13+
else:
14+
now = dt.datetime.fromtimestamp(now_ms / 1000, tz=dt.timezone.utc)
15+
return now.replace(microsecond=0).isoformat()
16+
17+
18+
def _safe_rate(numerator: int, denominator: int) -> str:
19+
if denominator <= 0:
20+
return "0.00%"
21+
return f"{(numerator / denominator) * 100:.2f}%"
22+
23+
24+
def load_metrics(base_dir: Path) -> dict[str, object]:
25+
path = base_dir / "onboard-metrics.json"
26+
if not path.exists():
27+
return {
28+
"attempted_first_task": 0,
29+
"first_task_success": 0,
30+
"first_task_failed": 0,
31+
"failure_by_category": {},
32+
"updated_at_ms": 0,
33+
}
34+
try:
35+
return json.loads(path.read_text(encoding="utf-8"))
36+
except Exception:
37+
return {
38+
"attempted_first_task": 0,
39+
"first_task_success": 0,
40+
"first_task_failed": 0,
41+
"failure_by_category": {},
42+
"updated_at_ms": 0,
43+
}
44+
45+
46+
def load_events(base_dir: Path) -> list[dict[str, object]]:
47+
path = base_dir / "onboard-events.jsonl"
48+
if not path.exists():
49+
return []
50+
51+
rows: list[dict[str, object]] = []
52+
for raw in path.read_text(encoding="utf-8").splitlines():
53+
line = raw.strip()
54+
if not line:
55+
continue
56+
try:
57+
obj = json.loads(line)
58+
except Exception:
59+
continue
60+
if isinstance(obj, dict):
61+
rows.append(obj)
62+
return rows
63+
64+
65+
def _date_from_ms(ts_ms: int) -> str:
66+
return dt.datetime.fromtimestamp(ts_ms / 1000, tz=dt.timezone.utc).date().isoformat()
67+
68+
69+
def summarize_daily(events: list[dict[str, object]], days: int, now_ms: int) -> list[dict[str, object]]:
70+
now_date = dt.datetime.fromtimestamp(now_ms / 1000, tz=dt.timezone.utc).date()
71+
start_date = now_date - dt.timedelta(days=days - 1)
72+
73+
slots: dict[str, dict[str, object]] = {}
74+
for i in range(days):
75+
d = (start_date + dt.timedelta(days=i)).isoformat()
76+
slots[d] = {
77+
"date": d,
78+
"attempted": 0,
79+
"success": 0,
80+
"failed": 0,
81+
"success_rate": "0.00%",
82+
}
83+
84+
for row in events:
85+
ts_ms = row.get("ts_ms")
86+
if not isinstance(ts_ms, int):
87+
continue
88+
day = _date_from_ms(ts_ms)
89+
slot = slots.get(day)
90+
if slot is None:
91+
continue
92+
slot["attempted"] = int(slot["attempted"]) + 1
93+
outcome = str(row.get("outcome", "")).strip().lower()
94+
if outcome == "success":
95+
slot["success"] = int(slot["success"]) + 1
96+
else:
97+
slot["failed"] = int(slot["failed"]) + 1
98+
99+
out: list[dict[str, object]] = []
100+
for day in sorted(slots.keys()):
101+
row = slots[day]
102+
attempted = int(row["attempted"])
103+
success = int(row["success"])
104+
row["success_rate"] = _safe_rate(success, attempted)
105+
out.append(row)
106+
return out
107+
108+
109+
def summarize_recent_window(
110+
events: list[dict[str, object]], window_hours: int, now_ms: int
111+
) -> dict[str, object]:
112+
cutoff_ms = now_ms - window_hours * 60 * 60 * 1000
113+
recent = [
114+
row
115+
for row in events
116+
if isinstance(row.get("ts_ms"), int) and int(row["ts_ms"]) >= cutoff_ms
117+
]
118+
119+
attempted = len(recent)
120+
success = sum(1 for row in recent if str(row.get("outcome", "")).lower() == "success")
121+
failed = attempted - success
122+
123+
by_category: dict[str, int] = {}
124+
for row in recent:
125+
if str(row.get("outcome", "")).lower() == "success":
126+
continue
127+
category = str(row.get("failure_category", "unknown")).strip() or "unknown"
128+
by_category[category] = by_category.get(category, 0) + 1
129+
130+
return {
131+
"window_hours": window_hours,
132+
"attempted": attempted,
133+
"success": success,
134+
"failed": failed,
135+
"success_rate": _safe_rate(success, attempted),
136+
"failure_by_category": dict(sorted(by_category.items(), key=lambda kv: (-kv[1], kv[0]))),
137+
}
138+
139+
140+
def build_report(base_dir: Path, days: int, window_hours: int, now_ms: int | None = None) -> dict[str, object]:
141+
now_ms = now_ms or int(dt.datetime.now(dt.timezone.utc).timestamp() * 1000)
142+
metrics = load_metrics(base_dir)
143+
events = load_events(base_dir)
144+
145+
attempted_total = int(metrics.get("attempted_first_task", 0) or 0)
146+
success_total = int(metrics.get("first_task_success", 0) or 0)
147+
failed_total = int(metrics.get("first_task_failed", 0) or 0)
148+
149+
report = {
150+
"generated_at": _iso_now(now_ms),
151+
"base_dir": str(base_dir),
152+
"metrics_snapshot": {
153+
"attempted_first_task": attempted_total,
154+
"first_task_success": success_total,
155+
"first_task_failed": failed_total,
156+
"success_rate": _safe_rate(success_total, attempted_total),
157+
"failure_by_category": metrics.get("failure_by_category", {}),
158+
"updated_at_ms": int(metrics.get("updated_at_ms", 0) or 0),
159+
},
160+
"recent_window": summarize_recent_window(events, window_hours=window_hours, now_ms=now_ms),
161+
"daily": summarize_daily(events, days=days, now_ms=now_ms),
162+
"event_count": len(events),
163+
}
164+
return report
165+
166+
167+
def render_markdown(report: dict[str, object]) -> str:
168+
snapshot = report.get("metrics_snapshot", {})
169+
recent = report.get("recent_window", {})
170+
171+
lines: list[str] = []
172+
lines.append("# Onboard Metrics Report")
173+
lines.append("")
174+
lines.append(f"- Generated: {report.get('generated_at', '')}")
175+
lines.append(f"- Base dir: `{report.get('base_dir', '')}`")
176+
lines.append(f"- Events loaded: {report.get('event_count', 0)}")
177+
lines.append("")
178+
179+
lines.append("## Metrics Snapshot")
180+
lines.append("")
181+
lines.append(f"- Attempted: {snapshot.get('attempted_first_task', 0)}")
182+
lines.append(f"- Success: {snapshot.get('first_task_success', 0)}")
183+
lines.append(f"- Failed: {snapshot.get('first_task_failed', 0)}")
184+
lines.append(f"- Success rate: {snapshot.get('success_rate', '0.00%')}")
185+
lines.append("")
186+
187+
lines.append(f"## Recent Window (Last {recent.get('window_hours', 24)}h)")
188+
lines.append("")
189+
lines.append(f"- Attempted: {recent.get('attempted', 0)}")
190+
lines.append(f"- Success: {recent.get('success', 0)}")
191+
lines.append(f"- Failed: {recent.get('failed', 0)}")
192+
lines.append(f"- Success rate: {recent.get('success_rate', '0.00%')}")
193+
lines.append("")
194+
195+
lines.append("### Recent Failure Categories")
196+
lines.append("")
197+
lines.append("| Category | Count |")
198+
lines.append("|---|---:|")
199+
failure_by_category = recent.get("failure_by_category", {})
200+
if isinstance(failure_by_category, dict) and failure_by_category:
201+
for k, v in failure_by_category.items():
202+
lines.append(f"| {k} | {int(v)} |")
203+
else:
204+
lines.append("| (none) | 0 |")
205+
lines.append("")
206+
207+
lines.append("## Daily Trend")
208+
lines.append("")
209+
lines.append("| Date (UTC) | Attempted | Success | Failed | Success rate |")
210+
lines.append("|---|---:|---:|---:|---:|")
211+
daily = report.get("daily", [])
212+
if isinstance(daily, list) and daily:
213+
for row in daily:
214+
if not isinstance(row, dict):
215+
continue
216+
lines.append(
217+
"| {date} | {attempted} | {success} | {failed} | {rate} |".format(
218+
date=row.get("date", ""),
219+
attempted=int(row.get("attempted", 0)),
220+
success=int(row.get("success", 0)),
221+
failed=int(row.get("failed", 0)),
222+
rate=row.get("success_rate", "0.00%"),
223+
)
224+
)
225+
else:
226+
lines.append("| (no data) | 0 | 0 | 0 | 0.00% |")
227+
lines.append("")
228+
229+
return "\n".join(lines)
230+
231+
232+
def main(argv: list[str]) -> int:
233+
parser = argparse.ArgumentParser(
234+
description="Generate onboarding success/failure daily report from ~/.rexos metrics/events"
235+
)
236+
parser.add_argument(
237+
"--base-dir",
238+
default=str(Path.home() / ".rexos"),
239+
help="LoopForge data dir (default: ~/.rexos)",
240+
)
241+
parser.add_argument(
242+
"--out-dir",
243+
default=".tmp/onboard-report",
244+
help="Output directory for report files (default: .tmp/onboard-report)",
245+
)
246+
parser.add_argument(
247+
"--days",
248+
type=int,
249+
default=7,
250+
help="Number of UTC days in daily trend table (default: 7)",
251+
)
252+
parser.add_argument(
253+
"--window-hours",
254+
type=int,
255+
default=24,
256+
help="Rolling window size in hours (default: 24)",
257+
)
258+
args = parser.parse_args(argv)
259+
260+
days = max(1, int(args.days))
261+
window_hours = max(1, int(args.window_hours))
262+
base_dir = Path(args.base_dir).expanduser().resolve()
263+
out_dir = Path(args.out_dir).resolve()
264+
out_dir.mkdir(parents=True, exist_ok=True)
265+
266+
report = build_report(base_dir=base_dir, days=days, window_hours=window_hours)
267+
268+
json_path = out_dir / "onboard-report.json"
269+
md_path = out_dir / "onboard-report.md"
270+
json_path.write_text(json.dumps(report, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
271+
md_path.write_text(render_markdown(report) + "\n", encoding="utf-8")
272+
273+
print(f"wrote: {json_path}")
274+
print(f"wrote: {md_path}")
275+
return 0
276+
277+
278+
if __name__ == "__main__":
279+
raise SystemExit(main(__import__("sys").argv[1:]))

scripts/tests/test_ci_workflows.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ def test_ci_runs_versioning_script_tests(self):
1414
self.assertIn("scripts.tests.test_verify_release_consistency", ci)
1515
self.assertIn("scripts.tests.test_provider_health_report", ci)
1616
self.assertIn("scripts.tests.test_package_release", ci)
17+
self.assertIn("scripts.tests.test_onboard_metrics_report", ci)
1718

1819
def test_provider_nightly_workflow_generates_health_artifacts(self):
1920
workflow = (

0 commit comments

Comments
 (0)