Skip to content

Commit e472d02

Browse files
[ci] Add process_metrics.py script for benchmark processing and performance history updates
1 parent a8928ba commit e472d02

1 file changed

Lines changed: 173 additions & 0 deletions

File tree

scripts/process_metrics.py

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Process benchmark JSON metrics: print a Markdown summary to stdout and, when
4+
--history is supplied, append one JSONL row per configuration to the history file.
5+
6+
Workflow usage (single step, one script call):
7+
8+
python3 scripts/process_metrics.py \\
9+
--metrics metrics-standard.json:standard \\
10+
--metrics metrics-prefill-decode.json:prefill-decode \\
11+
--backend ptx \\
12+
--model Llama-3.2-1B-Instruct \\
13+
--quantization F16 \\
14+
--commit $GITHUB_SHA --branch main \\
15+
--run-id $GITHUB_RUN_ID --run-attempt 1 \\
16+
--workflow "GPULlama3 Build & Run" \\
17+
--history docs/perf-history.jsonl \\
18+
>> $GITHUB_STEP_SUMMARY
19+
"""
20+
21+
import argparse
22+
import json
23+
import os
24+
import sys
25+
from datetime import datetime, timezone
26+
from pathlib import Path
27+
28+
REQUIRED_FIELDS = [
29+
"total_duration", "load_duration",
30+
"prompt_eval_count", "prompt_eval_duration",
31+
"eval_count", "eval_duration",
32+
"total_count", "prompt_eval_rate", "eval_rate", "total_rate",
33+
]
34+
35+
36+
def parse_args():
37+
p = argparse.ArgumentParser(description=__doc__,
38+
formatter_class=argparse.RawDescriptionHelpFormatter)
39+
p.add_argument("--metrics", action="append", required=True,
40+
metavar="PATH:CONFIGURATION",
41+
help="Metrics JSON file and its configuration label "
42+
"(e.g. metrics-standard.json:standard). Repeatable.")
43+
44+
history = p.add_argument_group("history (all required together when --history is given)")
45+
history.add_argument("--history", default=None,
46+
help="JSONL history file to append rows to")
47+
history.add_argument("--backend", default=None)
48+
history.add_argument("--model", default=None,
49+
help="Model name, e.g. Llama-3.2-1B-Instruct")
50+
history.add_argument("--quantization", default=None,
51+
help="Quantization, e.g. F16 or Q8_0")
52+
history.add_argument("--commit", default=None)
53+
history.add_argument("--branch", default=None)
54+
history.add_argument("--run-id", default=None, dest="run_id")
55+
history.add_argument("--run-number", default=None, dest="run_number")
56+
history.add_argument("--run-attempt", default=None, dest="run_attempt")
57+
history.add_argument("--workflow", default=None)
58+
return p.parse_args()
59+
60+
61+
def load_metrics(path):
62+
if not os.path.exists(path):
63+
print(f"WARNING: metrics file not found, skipping: {path}", file=sys.stderr)
64+
return None
65+
with open(path) as f:
66+
try:
67+
data = json.load(f)
68+
except json.JSONDecodeError as e:
69+
print(f"ERROR: failed to parse {path}: {e}", file=sys.stderr)
70+
sys.exit(1)
71+
missing = [k for k in REQUIRED_FIELDS if k not in data]
72+
if missing:
73+
print(f"ERROR: {path} missing required fields: {', '.join(missing)}", file=sys.stderr)
74+
sys.exit(1)
75+
return data
76+
77+
78+
def build_summary(rows):
79+
lines = [
80+
"| configuration | eval tok/s | prompt tok/s | total tok/s"
81+
" | eval tokens | prompt tokens | total ms |",
82+
"|---|---:|---:|---:|---:|---:|---:|",
83+
]
84+
for r in rows:
85+
lines.append(
86+
f"| {r['configuration']}"
87+
f" | {r['eval_rate']:.2f}"
88+
f" | {r['prompt_eval_rate']:.2f}"
89+
f" | {r['total_rate']:.2f}"
90+
f" | {r['eval_count']}"
91+
f" | {r['prompt_eval_count']}"
92+
f" | {r['total_duration_ms']:.0f} |"
93+
)
94+
return "\n".join(lines) + "\n"
95+
96+
97+
def build_history_row(m, configuration, args):
98+
return {
99+
"timestamp": datetime.now(timezone.utc).isoformat(),
100+
"commit": args.commit,
101+
"short_commit": args.commit[:8],
102+
"branch": args.branch,
103+
"run_id": args.run_id,
104+
"run_number": args.run_number or "",
105+
"run_attempt": args.run_attempt,
106+
"workflow": args.workflow,
107+
"backend": args.backend,
108+
"model": args.model,
109+
"quantization": args.quantization,
110+
"configuration": configuration,
111+
"eval_rate": m["eval_rate"],
112+
"prompt_eval_rate": m["prompt_eval_rate"],
113+
"total_rate": m["total_rate"],
114+
"eval_count": m["eval_count"],
115+
"prompt_eval_count": m["prompt_eval_count"],
116+
"total_count": m["total_count"],
117+
"total_duration": m["total_duration"],
118+
"load_duration": m["load_duration"],
119+
"prompt_eval_duration": m["prompt_eval_duration"],
120+
"eval_duration": m["eval_duration"],
121+
}
122+
123+
124+
def main():
125+
args = parse_args()
126+
127+
if args.history and not all([args.backend, args.model, args.quantization,
128+
args.commit, args.branch, args.run_id,
129+
args.run_attempt, args.workflow]):
130+
print("ERROR: --history requires --backend, --model, --quantization, "
131+
"--commit, --branch, --run-id, --run-attempt, --workflow", file=sys.stderr)
132+
sys.exit(1)
133+
134+
summary_rows = []
135+
history_rows = []
136+
137+
for spec in args.metrics:
138+
parts = spec.split(":", 1)
139+
if len(parts) != 2:
140+
print(f"ERROR: --metrics expects 'path:configuration', got '{spec}'", file=sys.stderr)
141+
sys.exit(1)
142+
path, configuration = parts
143+
m = load_metrics(path)
144+
if m is None:
145+
continue
146+
147+
summary_rows.append({
148+
"configuration": configuration,
149+
"eval_rate": m.get("eval_rate", 0),
150+
"prompt_eval_rate": m.get("prompt_eval_rate", 0),
151+
"total_rate": m.get("total_rate", 0),
152+
"eval_count": m.get("eval_count", 0),
153+
"prompt_eval_count": m.get("prompt_eval_count", 0),
154+
"total_duration_ms": m.get("total_duration", 0) / 1_000_000,
155+
})
156+
157+
if args.history:
158+
history_rows.append(build_history_row(m, configuration, args))
159+
160+
if summary_rows:
161+
sys.stdout.write(build_summary(summary_rows))
162+
163+
if history_rows:
164+
history = Path(args.history)
165+
history.parent.mkdir(parents=True, exist_ok=True)
166+
with open(history, "a") as f:
167+
for row in history_rows:
168+
f.write(json.dumps(row) + "\n")
169+
print(f"Appended {len(history_rows)} row(s) to {history}", file=sys.stderr)
170+
171+
172+
if __name__ == "__main__":
173+
main()

0 commit comments

Comments
 (0)