Skip to content

Commit 0d038c3

Browse files
committed
Summarize Microplex release readiness by tier
1 parent 96ed348 commit 0d038c3

2 files changed

Lines changed: 230 additions & 15 deletions

File tree

src/microplex_us/pipelines/dashboard.py

Lines changed: 159 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def build_dashboard_payload(
116116
"actual_l0_objective_runs": actual_l0_runs,
117117
"materialized_policyengine_l0_scores": materialized_l0_scores,
118118
"mp300k_artifact_gate_reports": artifact_gate_reports,
119+
"release_readiness": build_release_readiness(artifact_gate_reports),
119120
"run_contracts": run_contracts,
120121
"active_logs": active_logs,
121122
"tmux_sessions": tmux_sessions,
@@ -220,6 +221,15 @@ def collect_mp300k_artifact_gate_reports(
220221
runtime = _gate_report_gate(gates, "runtime")
221222
ecps = _gate_report_gate(gates, "ecps_comparison")
222223
compatibility_metrics = compatibility.get("metrics", {})
224+
candidate_loss = _gate_metric(
225+
ecps,
226+
"candidate_enhanced_cps_native_loss",
227+
)
228+
baseline_loss = _gate_metric(
229+
ecps,
230+
"baseline_enhanced_cps_native_loss",
231+
)
232+
n_targets_kept = _gate_metric(ecps, "n_targets_kept")
223233
reports.append(
224234
{
225235
"artifact_path": str(path),
@@ -259,15 +269,17 @@ def collect_mp300k_artifact_gate_reports(
259269
"runtime_status": runtime.get("status"),
260270
"runtime_ratio": _gate_metric(runtime, "runtime_ratio"),
261271
"ecps_comparison_status": ecps.get("status"),
262-
"candidate_loss": _gate_metric(
263-
ecps,
264-
"candidate_enhanced_cps_native_loss",
265-
),
266-
"baseline_loss": _gate_metric(
267-
ecps,
268-
"baseline_enhanced_cps_native_loss",
269-
),
272+
"candidate_loss": candidate_loss,
273+
"baseline_loss": baseline_loss,
270274
"loss_delta": _gate_metric(ecps, "enhanced_cps_native_loss_delta"),
275+
"n_targets_kept": n_targets_kept,
276+
"metric_runtime": _infer_metric_runtime(
277+
path,
278+
{
279+
"baseline_enhanced_cps_native_loss": baseline_loss,
280+
"n_targets_kept": n_targets_kept,
281+
},
282+
),
271283
}
272284
)
273285
return sorted(
@@ -281,6 +293,145 @@ def collect_mp300k_artifact_gate_reports(
281293
)
282294

283295

296+
def build_release_readiness(
297+
artifact_gate_reports: list[dict[str, Any]],
298+
) -> list[dict[str, Any]]:
299+
"""Summarize release readiness by product and target surface."""
300+
301+
grouped: dict[tuple[str, str], list[dict[str, Any]]] = {}
302+
for report in artifact_gate_reports:
303+
product = str(report.get("product") or "unknown")
304+
metric_runtime = str(report.get("metric_runtime") or "unknown")
305+
grouped.setdefault((product, metric_runtime), []).append(report)
306+
307+
readiness: list[dict[str, Any]] = []
308+
for (product, metric_runtime), reports in grouped.items():
309+
passing = [row for row in reports if row.get("status") == "passed"]
310+
failed = [row for row in reports if row.get("status") == "failed"]
311+
incomplete = [row for row in reports if row.get("status") == "incomplete"]
312+
best_passing = min(passing, key=_release_artifact_sort_key) if passing else None
313+
best_fit = min(reports, key=_release_artifact_sort_key)
314+
readiness.append(
315+
{
316+
"product": product,
317+
"metric_runtime": metric_runtime,
318+
"target_surface": _target_surface_label(metric_runtime),
319+
"status": _release_status(passing, failed, incomplete),
320+
"artifact_count": len(reports),
321+
"passed_artifact_count": len(passing),
322+
"failed_artifact_count": len(failed),
323+
"incomplete_artifact_count": len(incomplete),
324+
"best_passing_artifact": (
325+
_release_artifact_summary(best_passing)
326+
if best_passing is not None
327+
else None
328+
),
329+
"best_fit_artifact": _release_artifact_summary(best_fit),
330+
"best_fit_is_release_ready": best_fit.get("status") == "passed",
331+
"best_fit_release_blockers": _release_blockers(best_fit),
332+
"release_blockers": _group_release_blockers(reports)
333+
if best_passing is None
334+
else [],
335+
"fit_loss_gap_to_best_passing": _fit_loss_gap(
336+
best_passing,
337+
best_fit,
338+
),
339+
}
340+
)
341+
return sorted(
342+
readiness,
343+
key=lambda row: (
344+
row.get("status") != "release_ready",
345+
row.get("product") or "",
346+
row.get("metric_runtime") or "",
347+
),
348+
)
349+
350+
351+
def _release_artifact_sort_key(row: dict[str, Any]) -> tuple[bool, float, str]:
352+
return (
353+
row.get("candidate_loss") is None,
354+
row.get("candidate_loss") or float("inf"),
355+
row.get("artifact_path") or "",
356+
)
357+
358+
359+
def _release_status(
360+
passing: list[dict[str, Any]],
361+
failed: list[dict[str, Any]],
362+
incomplete: list[dict[str, Any]],
363+
) -> str:
364+
if passing:
365+
return "release_ready"
366+
if failed:
367+
return "blocked"
368+
if incomplete:
369+
return "incomplete"
370+
return "unmeasured"
371+
372+
373+
def _target_surface_label(metric_runtime: str) -> str:
374+
if metric_runtime == "latest_policyengine_us":
375+
return "latest-us-data targets"
376+
if metric_runtime == "legacy_or_patched_runtime":
377+
return "legacy/patched targets"
378+
return "unknown targets"
379+
380+
381+
def _release_artifact_summary(row: dict[str, Any]) -> dict[str, Any]:
382+
return {
383+
"artifact_id": row.get("artifact_id"),
384+
"artifact_path": row.get("artifact_path"),
385+
"artifact_dir": row.get("artifact_dir"),
386+
"status": row.get("status"),
387+
"candidate_dataset_path": row.get("candidate_dataset_path"),
388+
"candidate_loss": row.get("candidate_loss"),
389+
"baseline_loss": row.get("baseline_loss"),
390+
"loss_delta": row.get("loss_delta"),
391+
"n_targets_kept": row.get("n_targets_kept"),
392+
"candidate_households": row.get("candidate_households"),
393+
"candidate_persons": row.get("candidate_persons"),
394+
"compatibility_status": row.get("compatibility_status"),
395+
"artifact_size_status": row.get("artifact_size_status"),
396+
"artifact_size_ratio": row.get("artifact_size_ratio"),
397+
"runtime_status": row.get("runtime_status"),
398+
"runtime_ratio": row.get("runtime_ratio"),
399+
"ecps_comparison_status": row.get("ecps_comparison_status"),
400+
"failed_required_gates": row.get("failed_required_gates") or [],
401+
"unmeasured_required_gates": row.get("unmeasured_required_gates") or [],
402+
}
403+
404+
405+
def _release_blockers(row: dict[str, Any]) -> list[str]:
406+
return sorted(
407+
{
408+
str(name)
409+
for name in (row.get("failed_required_gates") or [])
410+
+ (row.get("unmeasured_required_gates") or [])
411+
}
412+
)
413+
414+
415+
def _group_release_blockers(reports: list[dict[str, Any]]) -> list[str]:
416+
blockers: set[str] = set()
417+
for report in reports:
418+
blockers.update(_release_blockers(report))
419+
return sorted(blockers)
420+
421+
422+
def _fit_loss_gap(
423+
best_passing: dict[str, Any] | None,
424+
best_fit: dict[str, Any],
425+
) -> float | None:
426+
if best_passing is None:
427+
return None
428+
passing_loss = _number_or_none(best_passing.get("candidate_loss"))
429+
fit_loss = _number_or_none(best_fit.get("candidate_loss"))
430+
if passing_loss is None or fit_loss is None:
431+
return None
432+
return passing_loss - fit_loss
433+
434+
284435
def _gate_report_gate(gates: dict[str, Any], name: str) -> dict[str, Any]:
285436
gate = gates.get(name)
286437
return gate if isinstance(gate, dict) else {}

tests/pipelines/test_dashboard.py

Lines changed: 71 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,55 @@ def test_dashboard_payload_reads_mp300k_artifact_gate_reports(tmp_path):
469469
}
470470
)
471471
)
472+
blocked_dir = artifacts / "mp120k_better_fit_blocked"
473+
blocked_dir.mkdir(parents=True)
474+
(blocked_dir / "mp300k_artifact_gates.json").write_text(
475+
json.dumps(
476+
{
477+
"artifact_id": "mp120k_better_fit_blocked",
478+
"product": "mp-120k",
479+
"period": 2024,
480+
"summary": {
481+
"status": "failed",
482+
"passing_required_gate_count": 5,
483+
"failed_required_gate_count": 1,
484+
"unmeasured_required_gate_count": 0,
485+
"failed_required_gates": ["runtime"],
486+
"unmeasured_required_gates": [],
487+
},
488+
"candidate_dataset": {
489+
"path": "/tmp/better_fit_candidate.h5",
490+
"size_bytes": 150_658_539,
491+
},
492+
"gates": {
493+
"compatibility": {
494+
"status": "pass",
495+
"metrics": {
496+
"household_count": 120_000,
497+
"person_count": 261_177,
498+
},
499+
},
500+
"artifact_size": {
501+
"status": "pass",
502+
"metrics": {"artifact_size_ratio": 1.36},
503+
},
504+
"runtime": {
505+
"status": "fail",
506+
"metrics": {"runtime_ratio": 1.31},
507+
},
508+
"ecps_comparison": {
509+
"status": "pass",
510+
"metrics": {
511+
"candidate_enhanced_cps_native_loss": 0.0836,
512+
"baseline_enhanced_cps_native_loss": 0.1664,
513+
"enhanced_cps_native_loss_delta": -0.0828,
514+
"n_targets_kept": 2818,
515+
},
516+
},
517+
},
518+
}
519+
)
520+
)
472521

473522
payload = build_dashboard_payload(
474523
artifact_root=artifacts,
@@ -477,10 +526,25 @@ def test_dashboard_payload_reads_mp300k_artifact_gate_reports(tmp_path):
477526
)
478527

479528
reports = payload["run_board"]["mp300k_artifact_gate_reports"]
480-
assert len(reports) == 1
481-
assert reports[0]["status"] == "passed"
482-
assert reports[0]["product"] == "mp-120k"
483-
assert reports[0]["candidate_households"] == 120_000
484-
assert reports[0]["artifact_size_ratio"] == 1.36
485-
assert reports[0]["runtime_ratio"] == 1.19
486-
assert reports[0]["candidate_loss"] == 0.0936
529+
assert len(reports) == 2
530+
passed_report = next(row for row in reports if row["status"] == "passed")
531+
assert passed_report["product"] == "mp-120k"
532+
assert passed_report["candidate_households"] == 120_000
533+
assert passed_report["artifact_size_ratio"] == 1.36
534+
assert passed_report["runtime_ratio"] == 1.19
535+
assert passed_report["candidate_loss"] == 0.0936
536+
537+
readiness = payload["run_board"]["release_readiness"]
538+
assert len(readiness) == 1
539+
assert readiness[0]["product"] == "mp-120k"
540+
assert readiness[0]["metric_runtime"] == "latest_policyengine_us"
541+
assert readiness[0]["status"] == "release_ready"
542+
assert readiness[0]["passed_artifact_count"] == 1
543+
assert readiness[0]["failed_artifact_count"] == 1
544+
assert readiness[0]["best_passing_artifact"]["artifact_id"] == "mp120k_release"
545+
assert (
546+
readiness[0]["best_fit_artifact"]["artifact_id"] == "mp120k_better_fit_blocked"
547+
)
548+
assert readiness[0]["best_fit_is_release_ready"] is False
549+
assert readiness[0]["best_fit_release_blockers"] == ["runtime"]
550+
assert readiness[0]["fit_loss_gap_to_best_passing"] == pytest.approx(0.01)

0 commit comments

Comments
 (0)