|
| 1 | +"""Generate deterministic evidence index for committed artifacts.""" |
| 2 | + |
| 3 | +from __future__ import annotations |
| 4 | + |
| 5 | +import json |
| 6 | +from pathlib import Path |
| 7 | +from typing import Any |
| 8 | + |
| 9 | +REPO_ROOT = Path(__file__).resolve().parents[1] |
| 10 | +MANIFEST_PATH = REPO_ROOT / "fixtures" / "manifest.json" |
| 11 | +OUTPUT_PATH = REPO_ROOT / "artifacts" / "evidence_index.json" |
| 12 | + |
| 13 | +ARTIFACT_SPECS: tuple[dict[str, Any], ...] = ( |
| 14 | + { |
| 15 | + "path": "artifacts/capability_boundary_replay_results.json", |
| 16 | + "format": "json", |
| 17 | + "generator": "scripts/generate_capability_boundary_replay_artifact.py", |
| 18 | + "evidence_category": "capability_boundary_replay", |
| 19 | + "evidence_role": "capability boundary drift replay evidence", |
| 20 | + "evidence_bearing": True, |
| 21 | + "visualization_only": False, |
| 22 | + }, |
| 23 | + { |
| 24 | + "path": "artifacts/graph_diff_results.json", |
| 25 | + "format": "json", |
| 26 | + "generator": "scripts/generate_graph_diff_artifact.py", |
| 27 | + "evidence_category": "graph_diff", |
| 28 | + "evidence_role": "relational replay graph evidence", |
| 29 | + "evidence_bearing": True, |
| 30 | + "visualization_only": False, |
| 31 | + }, |
| 32 | + { |
| 33 | + "path": "artifacts/mcp_trace_replay_results.json", |
| 34 | + "format": "json", |
| 35 | + "generator": "scripts/generate_mcp_trace_replay_artifact.py", |
| 36 | + "evidence_category": "mcp_trace_replay", |
| 37 | + "evidence_role": "single-family MCP trace replay evidence", |
| 38 | + "evidence_bearing": True, |
| 39 | + "visualization_only": False, |
| 40 | + }, |
| 41 | + { |
| 42 | + "path": "artifacts/multi_family_admissibility_curves.svg", |
| 43 | + "format": "svg", |
| 44 | + "generator": "scripts/render_multi_family_admissibility_svg.py", |
| 45 | + "evidence_category": "multi_family_admissibility_visualization", |
| 46 | + "evidence_role": "visualization of admissibility outcomes", |
| 47 | + "evidence_bearing": False, |
| 48 | + "visualization_only": True, |
| 49 | + }, |
| 50 | + { |
| 51 | + "path": "artifacts/multi_family_admissibility_results.json", |
| 52 | + "format": "json", |
| 53 | + "generator": "scripts/generate_multi_family_admissibility_artifact.py", |
| 54 | + "evidence_category": "multi_family_admissibility", |
| 55 | + "evidence_role": "cross-family admissibility evidence", |
| 56 | + "evidence_bearing": True, |
| 57 | + "visualization_only": False, |
| 58 | + }, |
| 59 | + { |
| 60 | + "path": "artifacts/replay_semantic_integrity_results.json", |
| 61 | + "format": "json", |
| 62 | + "generator": "scripts/generate_replay_semantic_integrity_artifact.py", |
| 63 | + "evidence_category": "replay_semantic_integrity", |
| 64 | + "evidence_role": "semantic replay integrity evidence", |
| 65 | + "evidence_bearing": True, |
| 66 | + "visualization_only": False, |
| 67 | + }, |
| 68 | + { |
| 69 | + "path": "artifacts/tool_ordering_replay_results.json", |
| 70 | + "format": "json", |
| 71 | + "generator": "scripts/generate_tool_ordering_replay_artifact.py", |
| 72 | + "evidence_category": "tool_ordering_replay", |
| 73 | + "evidence_role": "tool-order replay drift evidence", |
| 74 | + "evidence_bearing": True, |
| 75 | + "visualization_only": False, |
| 76 | + }, |
| 77 | +) |
| 78 | + |
| 79 | + |
| 80 | +def _load_json(path: Path) -> dict[str, Any]: |
| 81 | + return json.loads(path.read_text(encoding="utf-8")) |
| 82 | + |
| 83 | + |
| 84 | +def _manifest_families() -> set[str]: |
| 85 | + manifest = _load_json(MANIFEST_PATH) |
| 86 | + return {str(fixture["family"]) for fixture in manifest["fixtures"]} |
| 87 | + |
| 88 | + |
| 89 | +def _extract_fixture_families(payload: dict[str, Any]) -> list[str]: |
| 90 | + families: set[str] = set() |
| 91 | + if isinstance(payload.get("families"), list): |
| 92 | + for family in payload["families"]: |
| 93 | + if isinstance(family, dict) and isinstance(family.get("family"), str): |
| 94 | + families.add(family["family"]) |
| 95 | + if isinstance(payload.get("family"), str): |
| 96 | + families.add(payload["family"]) |
| 97 | + return sorted(families) |
| 98 | + |
| 99 | + |
| 100 | +def _build_artifact_entry(spec: dict[str, Any], manifest_families: set[str]) -> dict[str, Any] | None: |
| 101 | + artifact_path = REPO_ROOT / spec["path"] |
| 102 | + if not artifact_path.exists(): |
| 103 | + return None |
| 104 | + |
| 105 | + entry = { |
| 106 | + "path": spec["path"], |
| 107 | + "format": spec["format"], |
| 108 | + "generator": spec["generator"], |
| 109 | + "evidence_category": spec["evidence_category"], |
| 110 | + "evidence_role": spec["evidence_role"], |
| 111 | + "fixture_families": [], |
| 112 | + "top_level_keys": [], |
| 113 | + "deterministic_evaluation": True, |
| 114 | + "llm_judges": "none", |
| 115 | + "external_apis": "none", |
| 116 | + "manifest_aligned": False, |
| 117 | + "evidence_bearing": spec["evidence_bearing"], |
| 118 | + "visualization_only": spec["visualization_only"], |
| 119 | + } |
| 120 | + |
| 121 | + if spec["format"] == "json": |
| 122 | + payload = _load_json(artifact_path) |
| 123 | + families = _extract_fixture_families(payload) |
| 124 | + entry["fixture_families"] = families |
| 125 | + entry["top_level_keys"] = sorted(payload.keys()) |
| 126 | + entry["deterministic_evaluation"] = payload.get("evaluation_mode", "deterministic") == "deterministic" |
| 127 | + entry["llm_judges"] = payload.get("llm_judges", "none") |
| 128 | + entry["external_apis"] = payload.get("external_apis", "none") |
| 129 | + if families: |
| 130 | + entry["manifest_aligned"] = set(families) == manifest_families |
| 131 | + |
| 132 | + return entry |
| 133 | + |
| 134 | + |
| 135 | +def generate_evidence_index(output_path: Path = OUTPUT_PATH) -> Path: |
| 136 | + manifest_families = _manifest_families() |
| 137 | + artifacts = [ |
| 138 | + entry |
| 139 | + for spec in sorted(ARTIFACT_SPECS, key=lambda item: item["path"]) |
| 140 | + for entry in [_build_artifact_entry(spec, manifest_families)] |
| 141 | + if entry is not None |
| 142 | + ] |
| 143 | + |
| 144 | + index = { |
| 145 | + "artifact_id": "evidence_index_v1", |
| 146 | + "generated_by": "EvidenceIndexGenerator", |
| 147 | + "version": "1.0", |
| 148 | + "evaluation_mode": "deterministic", |
| 149 | + "llm_judges": "none", |
| 150 | + "external_apis": "none", |
| 151 | + "artifacts": artifacts, |
| 152 | + "global_summary": { |
| 153 | + "artifact_count": len(artifacts), |
| 154 | + "json_artifact_count": sum(1 for item in artifacts if item["format"] == "json"), |
| 155 | + "svg_artifact_count": sum(1 for item in artifacts if item["format"] == "svg"), |
| 156 | + "evidence_bearing_count": sum(1 for item in artifacts if item["evidence_bearing"]), |
| 157 | + "visualization_only_count": sum(1 for item in artifacts if item["visualization_only"]), |
| 158 | + "deterministic_artifact_count": sum(1 for item in artifacts if item["deterministic_evaluation"]), |
| 159 | + "llm_free_artifact_count": sum(1 for item in artifacts if item["llm_judges"] == "none"), |
| 160 | + "external_api_free_artifact_count": sum(1 for item in artifacts if item["external_apis"] == "none"), |
| 161 | + }, |
| 162 | + } |
| 163 | + |
| 164 | + output_path.parent.mkdir(parents=True, exist_ok=True) |
| 165 | + output_path.write_text(json.dumps(index, indent=2) + "\n", encoding="utf-8") |
| 166 | + return output_path |
| 167 | + |
| 168 | + |
| 169 | +if __name__ == "__main__": |
| 170 | + path = generate_evidence_index() |
| 171 | + print(path.relative_to(REPO_ROOT).as_posix()) |
0 commit comments