Skip to content

Commit 7a961fb

Browse files
authored
Fill release readiness from score smoke artifacts (#37)
1 parent fa316a0 commit 7a961fb

2 files changed

Lines changed: 193 additions & 4 deletions

File tree

src/microplex_us/pipelines/dashboard.py

Lines changed: 139 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,13 @@ def build_dashboard_payload(
9494
actual_l0_runs = collect_actual_l0_objective_runs(artifact_root)
9595
materialized_l0_scores = collect_materialized_policyengine_l0_scores(artifact_root)
9696
artifact_gate_reports = collect_mp300k_artifact_gate_reports(artifact_root)
97+
release_gate_reports = [
98+
*artifact_gate_reports,
99+
*_release_gate_reports_from_score_runs(
100+
score_runs,
101+
artifact_gate_reports,
102+
),
103+
]
97104
run_contracts = collect_run_contracts(artifact_root)
98105
active_logs = collect_recent_log_summaries(artifact_root)
99106
tmux_sessions = collect_tmux_sessions() if include_tmux else []
@@ -116,7 +123,7 @@ def build_dashboard_payload(
116123
"actual_l0_objective_runs": actual_l0_runs,
117124
"materialized_policyengine_l0_scores": materialized_l0_scores,
118125
"mp300k_artifact_gate_reports": artifact_gate_reports,
119-
"release_readiness": build_release_readiness(artifact_gate_reports),
126+
"release_readiness": build_release_readiness(release_gate_reports),
120127
"run_contracts": run_contracts,
121128
"active_logs": active_logs,
122129
"tmux_sessions": tmux_sessions,
@@ -293,6 +300,114 @@ def collect_mp300k_artifact_gate_reports(
293300
)
294301

295302

303+
def _release_gate_reports_from_score_runs(
304+
score_runs: list[dict[str, Any]],
305+
artifact_gate_reports: list[dict[str, Any]],
306+
) -> list[dict[str, Any]]:
307+
"""Build release-readiness rows from scored artifacts with smoke metadata.
308+
309+
Full gate reports are preferred when present. This fallback keeps the living
310+
dashboard useful for older candidate artifacts that persisted PE-native
311+
scores and loader-smoke results before the full gate sidecar existed.
312+
"""
313+
314+
gate_report_dirs = {
315+
str(row.get("artifact_dir"))
316+
for row in artifact_gate_reports
317+
if row.get("artifact_dir")
318+
}
319+
reports: list[dict[str, Any]] = []
320+
for score in score_runs:
321+
artifact_dir = str(score.get("artifact_dir") or "")
322+
if not artifact_dir or artifact_dir in gate_report_dirs:
323+
continue
324+
release_smoke = score.get("release_smoke")
325+
if not isinstance(release_smoke, dict):
326+
continue
327+
product = score.get("record_count_tier")
328+
if not product:
329+
continue
330+
331+
file_size_passes = release_smoke.get("passes_file_size_ratio_2x")
332+
runtime_passes = release_smoke.get("passes_runtime_ratio_1_25x")
333+
candidate_beats_baseline = score.get("candidate_beats_baseline")
334+
failed_required_gates = []
335+
unmeasured_required_gates = ["full_gate_report"]
336+
for gate_name, gate_value in (
337+
("artifact_size", file_size_passes),
338+
("runtime", runtime_passes),
339+
("ecps_comparison", candidate_beats_baseline),
340+
):
341+
if gate_value is True:
342+
continue
343+
if gate_value is False:
344+
failed_required_gates.append(gate_name)
345+
else:
346+
unmeasured_required_gates.append(gate_name)
347+
348+
reports.append(
349+
{
350+
"artifact_path": release_smoke.get("artifact_path")
351+
or score.get("artifact_path"),
352+
"artifact_dir": artifact_dir,
353+
"artifact_id": Path(artifact_dir).name,
354+
"product": product,
355+
"period": score.get("period"),
356+
"status": _release_smoke_gate_status(
357+
failed_required_gates,
358+
unmeasured_required_gates,
359+
),
360+
"passing_required_gate_count": 4
361+
- len(failed_required_gates)
362+
- len(unmeasured_required_gates),
363+
"failed_required_gate_count": len(failed_required_gates),
364+
"unmeasured_required_gate_count": len(unmeasured_required_gates),
365+
"failed_required_gates": failed_required_gates,
366+
"unmeasured_required_gates": unmeasured_required_gates,
367+
"candidate_dataset_path": score.get("candidate_dataset"),
368+
"candidate_size_bytes": release_smoke.get(
369+
"candidate_file_size_bytes"
370+
),
371+
"candidate_households": release_smoke.get("candidate_households"),
372+
"candidate_persons": None,
373+
"compatibility_status": "smoke_only",
374+
"artifact_size_status": _gate_bool_status(file_size_passes),
375+
"artifact_size_ratio": release_smoke.get("file_size_ratio"),
376+
"runtime_status": _gate_bool_status(runtime_passes),
377+
"runtime_ratio": release_smoke.get("median_runtime_ratio"),
378+
"ecps_comparison_status": _gate_bool_status(
379+
candidate_beats_baseline
380+
),
381+
"candidate_loss": score.get("candidate_loss"),
382+
"baseline_loss": score.get("baseline_loss"),
383+
"loss_delta": score.get("loss_delta"),
384+
"n_targets_kept": score.get("n_targets_kept"),
385+
"metric_runtime": score.get("metric_runtime"),
386+
"source_kind": "score_release_smoke",
387+
}
388+
)
389+
return reports
390+
391+
392+
def _release_smoke_gate_status(
393+
failed_required_gates: list[str],
394+
unmeasured_required_gates: list[str],
395+
) -> str:
396+
if failed_required_gates:
397+
return "failed"
398+
if unmeasured_required_gates:
399+
return "incomplete"
400+
return "passed"
401+
402+
403+
def _gate_bool_status(value: Any) -> str | None:
404+
if value is True:
405+
return "pass"
406+
if value is False:
407+
return "fail"
408+
return None
409+
410+
296411
def build_release_readiness(
297412
artifact_gate_reports: list[dict[str, Any]],
298413
) -> list[dict[str, Any]]:
@@ -1627,8 +1742,10 @@ def _score_entries_from_payload(path: Path, payload: Any) -> list[dict[str, Any]
16271742
"loss_delta": _number_or_none(
16281743
summary.get("enhanced_cps_native_loss_delta")
16291744
),
1630-
"candidate_beats_baseline": bool(
1631-
summary.get("candidate_beats_baseline")
1745+
"candidate_beats_baseline": _candidate_beats_baseline(
1746+
summary,
1747+
candidate_loss,
1748+
baseline_loss,
16321749
),
16331750
"candidate_unweighted_msre": _number_or_none(
16341751
summary.get("candidate_unweighted_msre")
@@ -1700,6 +1817,25 @@ def _release_smoke_summary(artifact_dir: Path) -> dict[str, Any] | None:
17001817
}
17011818

17021819

1820+
def _candidate_beats_baseline(
1821+
summary: dict[str, Any],
1822+
candidate_loss: float,
1823+
baseline_loss: float,
1824+
) -> bool:
1825+
raw_value = summary.get("candidate_beats_baseline")
1826+
if isinstance(raw_value, bool):
1827+
return raw_value
1828+
if raw_value is None:
1829+
return candidate_loss < baseline_loss
1830+
if isinstance(raw_value, str):
1831+
lowered = raw_value.strip().lower()
1832+
if lowered in {"true", "1", "yes"}:
1833+
return True
1834+
if lowered in {"false", "0", "no"}:
1835+
return False
1836+
return bool(raw_value)
1837+
1838+
17031839
def _summarize_unified_diagnostics(path: Path) -> dict[str, Any] | None:
17041840
try:
17051841
with path.open(newline="") as file:

tests/pipelines/test_dashboard.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,6 @@ def test_dashboard_payload_reads_release_smoke_for_record_tiers(tmp_path):
203203
"period": 2024,
204204
"summary": {
205205
"baseline_enhanced_cps_native_loss": 0.1664,
206-
"candidate_beats_baseline": True,
207206
"candidate_enhanced_cps_native_loss": 0.0936,
208207
"enhanced_cps_native_loss_delta": -0.0728,
209208
"n_targets_kept": 2818,
@@ -254,6 +253,7 @@ def test_dashboard_payload_reads_release_smoke_for_record_tiers(tmp_path):
254253
assert score_run["release_smoke"]["median_runtime_ratio"] == 1.19
255254
assert score_run["release_smoke"]["passes_file_size_ratio_2x"] is True
256255
assert score_run["release_smoke"]["passes_runtime_ratio_1_25x"] is True
256+
assert score_run["candidate_beats_baseline"] is True
257257
current_best = next(
258258
row
259259
for row in payload["run_board"]["comparison_matrix"]
@@ -264,6 +264,21 @@ def test_dashboard_payload_reads_release_smoke_for_record_tiers(tmp_path):
264264
assertions = payload["run_board"]["assertions"]
265265
assert assertions["microplex_current_best_has_release_smoke"] is True
266266
assert assertions["microplex_current_best_release_smoke_passes"] is True
267+
readiness = payload["run_board"]["release_readiness"]
268+
assert len(readiness) == 1
269+
assert readiness[0]["product"] == "mp-120k"
270+
assert readiness[0]["metric_runtime"] == "latest_policyengine_us"
271+
assert readiness[0]["status"] == "incomplete"
272+
assert readiness[0]["best_passing_artifact"] is None
273+
assert readiness[0]["release_blockers"] == ["full_gate_report"]
274+
assert readiness[0]["best_fit_artifact"]["artifact_id"] == (
275+
"mp120k_latest_us_data_refit"
276+
)
277+
assert readiness[0]["best_fit_artifact"]["compatibility_status"] == (
278+
"smoke_only"
279+
)
280+
assert readiness[0]["best_fit_artifact"]["candidate_households"] == 120_000
281+
assert readiness[0]["best_fit_release_blockers"] == ["full_gate_report"]
267282

268283

269284
def test_dashboard_payload_wires_materialized_pe_l0_score_jsons(tmp_path):
@@ -469,6 +484,41 @@ def test_dashboard_payload_reads_mp300k_artifact_gate_reports(tmp_path):
469484
}
470485
)
471486
)
487+
(gate_dir / "scores.json").write_text(
488+
json.dumps(
489+
[
490+
{
491+
"summary": {
492+
"baseline_enhanced_cps_native_loss": 0.1664,
493+
"candidate_enhanced_cps_native_loss": 0.0936,
494+
"n_targets_kept": 2818,
495+
},
496+
"broad_loss": {
497+
"candidate_dataset": str(gate_dir / "pe_l0_candidate.h5"),
498+
"baseline_dataset": "enhanced_cps_2024.h5",
499+
},
500+
}
501+
]
502+
)
503+
)
504+
(gate_dir / "runtime_smoke_loader.json").write_text(
505+
json.dumps(
506+
{
507+
"file_size_ratio": 1.36,
508+
"median_runtime_ratio": 1.19,
509+
"candidate": {
510+
"file_size_bytes": 150_658_539,
511+
"households": 120_000,
512+
"median_elapsed_seconds": 0.137,
513+
},
514+
"baseline": {
515+
"file_size_bytes": 110_717_166,
516+
"households": 41_314,
517+
"median_elapsed_seconds": 0.115,
518+
},
519+
}
520+
)
521+
)
472522
blocked_dir = artifacts / "mp120k_better_fit_blocked"
473523
blocked_dir.mkdir(parents=True)
474524
(blocked_dir / "mp300k_artifact_gates.json").write_text(
@@ -542,6 +592,9 @@ def test_dashboard_payload_reads_mp300k_artifact_gate_reports(tmp_path):
542592
assert readiness[0]["passed_artifact_count"] == 1
543593
assert readiness[0]["failed_artifact_count"] == 1
544594
assert readiness[0]["best_passing_artifact"]["artifact_id"] == "mp120k_release"
595+
assert readiness[0]["best_passing_artifact"]["artifact_path"].endswith(
596+
"mp300k_artifact_gates.json"
597+
)
545598
assert (
546599
readiness[0]["best_fit_artifact"]["artifact_id"] == "mp120k_better_fit_blocked"
547600
)

0 commit comments

Comments
 (0)