Skip to content

Commit 0fb9042

Browse files
committed
DPL MCP: add percentile plots for trains running on the grid
1 parent 74f50ca commit 0fb9042

1 file changed

Lines changed: 86 additions & 0 deletions

File tree

Framework/Core/scripts/hyperloop-server/hyperloop_server.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,92 @@ async def validate_train_composition(train_ids: list[int]) -> str:
501501
return "\n".join(out)
502502

503503

504+
def _percentiles(vals: list[float], ps=(0, 5, 10, 25, 50, 75, 90, 95, 100)) -> dict:
505+
"""Nearest-rank percentiles of a value list (no numpy in the server env)."""
506+
s = sorted(vals)
507+
n = len(s)
508+
out = {}
509+
for p in ps:
510+
if n == 1:
511+
out[p] = s[0]
512+
continue
513+
k = (n - 1) * (p / 100.0)
514+
lo, hi = int(k), min(int(k) + 1, n - 1)
515+
out[p] = s[lo] + (s[hi] - s[lo]) * (k - lo)
516+
return out
517+
518+
519+
@mcp.tool()
520+
async def grid_job_bands(train_ids: list[int], check_composition: bool = True) -> str:
521+
"""Per-JOB grid throughput distribution (percentile bands) across trains over time.
522+
523+
For each train, fetches its per-run grid results (train.jsp jobResults) and
524+
builds percentile bands over the *individual jobs'* throughput_per_core — the
525+
distribution behind the grid-statistics "jobs per CPU time" histogram — NOT
526+
the single train-average throughput, which collapses that spread to one
527+
number. Use this to watch a job-performance distribution shift over time
528+
(e.g. an optimization landing) rather than chasing a noisy mean.
529+
530+
By default runs validate_train_composition first and keeps only the trains
531+
that share the reference composition (set check_composition=False to skip the
532+
guard and band every train as given). Returns a per-train percentile table
533+
(p0/p10/p50/p90/p100 KB/s/core, job count) ordered by date, plus a fenced
534+
```jsonl block (one {date,train,n,tpc:[...]} per train) ready to feed a
535+
band/fan-chart plotting script.
536+
"""
537+
if check_composition and len(train_ids) > 1:
538+
groups, ref, matched, failed = await _match_compositions(train_ids)
539+
if ref is None:
540+
return "Could not resolve composition for any train: " + \
541+
", ".join(map(str, train_ids))
542+
dropped = [t for t in train_ids if t not in matched]
543+
keep = matched
544+
else:
545+
keep, dropped = list(train_ids), []
546+
547+
async def fetch(tid: int):
548+
try:
549+
t = await _get("trains/train.jsp", {"train_id": tid})
550+
t = t[0] if isinstance(t, list) else t
551+
jr = t.get("jobResults") or []
552+
tpc = [j["throughput_per_core"] for j in jr
553+
if (j.get("throughput_per_core") or 0) > 0]
554+
created = t.get("created")
555+
date = (datetime.datetime.fromtimestamp(
556+
created / 1000, datetime.timezone.utc).strftime("%Y-%m-%d")
557+
if created else "?")
558+
return tid, date, tpc
559+
except Exception as e:
560+
return tid, None, str(e)
561+
562+
rows = await asyncio.gather(*(fetch(t) for t in keep))
563+
good = [(tid, d, tpc) for tid, d, tpc in rows if d is not None and tpc]
564+
good.sort(key=lambda r: (r[1], r[0]))
565+
if not good:
566+
return "No usable per-job throughput for: " + ", ".join(map(str, keep))
567+
568+
out = ["Per-job grid throughput bands (KB/s/core), over individual jobs "
569+
"(not train average):\n"]
570+
if dropped:
571+
out.append(f"Dropped (composition mismatch): {', '.join(map(str, dropped))}\n")
572+
out.append(f"{'date':<11}{'train':>8}{'jobs':>6}"
573+
f"{'p0':>8}{'p10':>8}{'p50':>8}{'p90':>8}{'p100':>8}")
574+
out.append("-" * 65)
575+
jsonl = []
576+
for tid, date, tpc in good:
577+
pc = _percentiles(tpc)
578+
k = {p: pc[p] / 1e3 for p in pc} # KB/s/core
579+
out.append(f"{date:<11}{tid:>8}{len(tpc):>6}"
580+
f"{k[0]:>8.0f}{k[10]:>8.0f}{k[50]:>8.0f}{k[90]:>8.0f}{k[100]:>8.0f}")
581+
jsonl.append(json.dumps({"date": date, "train": tid,
582+
"n": len(tpc), "tpc": tpc}))
583+
out.append("\nData (write to a .jsonl and feed the band plot):")
584+
out.append("```jsonl")
585+
out.extend(jsonl)
586+
out.append("```")
587+
return "\n".join(out)
588+
589+
504590
# ---------------------------------------------------------------------------
505591
# Analysis / wagon browsing
506592
#

0 commit comments

Comments
 (0)