Skip to content

Commit 5bc831a

Browse files
[cross-repo from workflow#395] server + workflow + sdk-python: make replay verification a first-class platform contract (#22)
1 parent 78bc242 commit 5bc831a

4 files changed

Lines changed: 107 additions & 3 deletions

File tree

src/durable_workflow/history_bundle_verify.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -700,8 +700,19 @@ def _finding(
700700
severity: str,
701701
message: str,
702702
context: Mapping[str, Any] | None = None,
703+
path: str | None = None,
703704
) -> dict[str, Any]:
704-
finding: dict[str, Any] = {"rule": rule, "severity": severity, "message": message}
705+
if path is None and context is not None:
706+
context_path = context.get("path")
707+
if isinstance(context_path, str) and context_path:
708+
path = context_path
709+
710+
finding: dict[str, Any] = {
711+
"rule": rule,
712+
"severity": severity,
713+
"message": message,
714+
"path": path,
715+
}
705716
if context:
706717
finding["context"] = dict(context)
707718
return finding

src/durable_workflow/replay_verify.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ class GoldenHistoryReport:
191191
fixture_schema: str = FIXTURE_SCHEMA
192192
cases: list[CaseReport] = field(default_factory=list)
193193
missing_families: list[str] = field(default_factory=list)
194+
required_families: list[str] = field(default_factory=list)
195+
covered_families: list[str] = field(default_factory=list)
194196
summary: dict[str, int] = field(default_factory=dict)
195197

196198
@property
@@ -212,13 +214,25 @@ def verdict(self) -> str:
212214
def promotion_decision(self) -> str:
213215
return promotion_decision_for(self.verdict)
214216

217+
@property
218+
def evidence(self) -> dict[str, Any]:
219+
return {
220+
"fixture_count": int(self.summary.get("fixtures", 0)),
221+
"case_count": int(self.summary.get("cases", 0)),
222+
"required_families": list(self.required_families),
223+
"covered_families": list(self.covered_families),
224+
"missing_family_count": len(self.missing_families),
225+
"missing_families": list(self.missing_families),
226+
}
227+
215228
def to_dict(self) -> dict[str, Any]:
216229
return {
217230
"schema": self.schema,
218231
"schema_version": self.schema_version,
219232
"status": self.status,
220233
"verdict": self.verdict,
221234
"promotion_decision": self.promotion_decision,
235+
"evidence": self.evidence,
222236
"fixture_schema": self.fixture_schema,
223237
"summary": dict(self.summary),
224238
"missing_families": list(self.missing_families),
@@ -234,6 +248,7 @@ class BundleEntry:
234248
verdict: str
235249
promotion_decision: str
236250
integrity: Mapping[str, Any] | None = None
251+
evidence: Mapping[str, Any] | None = None
237252
reason: str | None = None
238253

239254
def to_dict(self) -> dict[str, Any]:
@@ -242,6 +257,7 @@ def to_dict(self) -> dict[str, Any]:
242257
"verdict": self.verdict,
243258
"promotion_decision": self.promotion_decision,
244259
"reason": self.reason,
260+
"evidence": dict(self.evidence) if self.evidence is not None else None,
245261
"integrity": dict(self.integrity) if self.integrity is not None else None,
246262
}
247263

@@ -263,12 +279,34 @@ class SimulationReport:
263279
missing_bundles: list[str] = field(default_factory=list)
264280
error: str | None = None
265281

282+
@property
283+
def evidence(self) -> dict[str, Any]:
284+
bundle_count = int(self.summary.get("total", len(self.bundles)))
285+
integrity_checked = 0
286+
for entry in self.bundles:
287+
evidence = entry.evidence or {}
288+
if evidence.get("integrity_checked") is True:
289+
integrity_checked += 1
290+
291+
return {
292+
"bundle_count": bundle_count,
293+
"missing_bundle_count": len(self.missing_bundles),
294+
"integrity_checked_count": integrity_checked,
295+
"replay_checked_count": 0,
296+
"replay_skipped": True,
297+
"strict_warnings": any(
298+
(entry.evidence or {}).get("strict_warnings") is True
299+
for entry in self.bundles
300+
),
301+
}
302+
266303
def to_dict(self) -> dict[str, Any]:
267304
payload: dict[str, Any] = {
268305
"schema": self.schema,
269306
"schema_version": self.schema_version,
270307
"verdict": self.verdict,
271308
"promotion_decision": self.promotion_decision,
309+
"evidence": self.evidence,
272310
"summary": dict(self.summary),
273311
"bundles": [entry.to_dict() for entry in self.bundles],
274312
"missing_bundles": list(self.missing_bundles),
@@ -325,10 +363,16 @@ def verify_golden_history(
325363
caller decides whether to gate promotion on them.
326364
"""
327365

366+
required = sorted(set(required_families))
328367
fixtures = sorted(Path(fixture_dir).glob("*.json"))
329368

330369
if not fixtures:
331-
report = GoldenHistoryReport(status=STATUS_FAILED)
370+
report = GoldenHistoryReport(
371+
status=STATUS_FAILED,
372+
missing_families=list(required),
373+
required_families=list(required),
374+
covered_families=[],
375+
)
332376
report.summary = {
333377
"fixtures": 0,
334378
"cases": 0,
@@ -386,7 +430,7 @@ def verify_golden_history(
386430
)
387431
)
388432

389-
missing = sorted(set(required_families) - covered_families)
433+
missing = sorted(set(required) - covered_families)
390434
summary = _summarize(cases)
391435
summary["fixtures"] = len(fixtures)
392436

@@ -401,6 +445,8 @@ def verify_golden_history(
401445
status=overall,
402446
cases=cases,
403447
missing_families=missing,
448+
required_families=list(required),
449+
covered_families=sorted(covered_families),
404450
summary=summary,
405451
)
406452

@@ -695,6 +741,15 @@ def simulate_bundles(
695741
path=str(path),
696742
verdict=VERDICT_FAILED,
697743
promotion_decision=PROMOTION_BLOCK_AND_INVESTIGATE,
744+
evidence={
745+
"integrity_checked": False,
746+
"integrity_status": None,
747+
"integrity_finding_count": 0,
748+
"replay_checked": False,
749+
"replay_status": None,
750+
"replay_skipped": True,
751+
"strict_warnings": strict_warnings,
752+
},
698753
reason=f"bundle_unreadable: {exc}",
699754
)
700755
bundles.append(entry)
@@ -713,6 +768,19 @@ def simulate_bundles(
713768
verdict=verdict,
714769
promotion_decision=decision,
715770
integrity=integrity,
771+
evidence={
772+
"integrity_checked": True,
773+
"integrity_status": integrity.get("status"),
774+
"integrity_finding_count": int(
775+
(integrity.get("summary") or {}).get(
776+
"findings", len(integrity.get("findings") or [])
777+
)
778+
),
779+
"replay_checked": False,
780+
"replay_status": None,
781+
"replay_skipped": True,
782+
"strict_warnings": strict_warnings,
783+
},
716784
)
717785
)
718786
verdicts.append(verdict)

tests/test_history_bundle_verify.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,12 @@ def test_writer_schema_fingerprint_mismatch_in_payload_manifest() -> None:
262262

263263
report = verify_bundle(bundle, signing_key=signing_key)
264264
assert "payload_manifest.writer_schema_fingerprint_mismatch" in _rule_names(report)
265+
finding = next(
266+
finding
267+
for finding in report["findings"]
268+
if finding["rule"] == "payload_manifest.writer_schema_fingerprint_mismatch"
269+
)
270+
assert finding["path"] == "payloads.output.data"
265271
assert report["status"] == STATUS_FAILED
266272

267273

@@ -295,6 +301,12 @@ def test_payload_marked_available_but_missing_is_failed() -> None:
295301
report = verify_bundle(bundle, signing_key=signing_key)
296302

297303
assert "payload_manifest.payload_missing" in _rule_names(report)
304+
finding = next(
305+
finding
306+
for finding in report["findings"]
307+
if finding["rule"] == "payload_manifest.payload_missing"
308+
)
309+
assert finding["path"] == "payloads.arguments.data"
298310
assert report["status"] == STATUS_FAILED
299311

300312

tests/test_replay_verify.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,9 @@ def test_verify_golden_history_replays_clean_fixture(tmp_path: Path) -> None:
136136
"failed": 0,
137137
}
138138
assert report.missing_families == []
139+
assert report.evidence["required_families"] == ["activity"]
140+
assert report.evidence["covered_families"] == ["activity"]
141+
assert report.evidence["missing_family_count"] == 0
139142
assert report.cases[0].status == STATUS_REPLAYED
140143
assert report.cases[0].family == "activity"
141144

@@ -237,6 +240,8 @@ def test_report_to_dict_uses_published_schema(tmp_path: Path) -> None:
237240
payload = report.to_dict()
238241
assert payload["schema"] == REPORT_SCHEMA
239242
assert payload["schema_version"] == REPORT_SCHEMA_VERSION
243+
assert payload["evidence"]["fixture_count"] == 1
244+
assert payload["evidence"]["case_count"] == 0
240245

241246

242247
def _greet_workflows() -> list[type]:
@@ -386,6 +391,7 @@ def test_golden_history_report_to_dict_includes_promotion_decision(tmp_path: Pat
386391
payload = report.to_dict()
387392
assert payload["verdict"] == VERDICT_OK
388393
assert payload["promotion_decision"] == PROMOTION_SAFE_TO_PROMOTE
394+
assert payload["evidence"]["missing_family_count"] == 0
389395
assert payload["cases"][0]["promotion_decision"] == PROMOTION_SAFE_TO_PROMOTE
390396

391397

@@ -461,9 +467,15 @@ def test_simulate_bundles_aggregates_per_bundle_verdicts(tmp_path: Path) -> None
461467
assert payload["promotion_decision"] == PROMOTION_BLOCK_AND_INVESTIGATE
462468
assert payload["summary"]["total"] == 2
463469
assert payload["summary"][VERDICT_FAILED] == 2
470+
assert payload["evidence"]["bundle_count"] == 2
471+
assert payload["evidence"]["missing_bundle_count"] == 0
472+
assert payload["evidence"]["integrity_checked_count"] == 2
473+
assert payload["evidence"]["replay_checked_count"] == 0
474+
assert payload["evidence"]["replay_skipped"] is True
464475
for entry in payload["bundles"]:
465476
assert entry["verdict"] == VERDICT_FAILED
466477
assert entry["promotion_decision"] == PROMOTION_BLOCK_AND_INVESTIGATE
478+
assert entry["evidence"]["integrity_checked"] is True
467479

468480

469481
def test_simulate_bundles_cli(tmp_path: Path) -> None:
@@ -486,6 +498,7 @@ def test_simulate_bundles_cli(tmp_path: Path) -> None:
486498
assert payload["schema"] == SIMULATION_REPORT_SCHEMA
487499
assert payload["verdict"] == VERDICT_FAILED
488500
assert payload["promotion_decision"] == PROMOTION_BLOCK_AND_INVESTIGATE
501+
assert payload["evidence"]["bundle_count"] == 1
489502

490503

491504
def test_cli_requires_workflows_when_not_simulating(tmp_path: Path) -> None:

0 commit comments

Comments
 (0)