Skip to content

Commit fc17b46

Browse files
authored
Add artifact evidence index
Add deterministic evidence index generator, committed artifact index, and tests. Scope: artifact metadata only; no runtime, fixture, validator, workflow, taxonomy, LLM, embedding, fuzzy, or external API changes.
1 parent ff63e92 commit fc17b46

3 files changed

Lines changed: 473 additions & 0 deletions

File tree

artifacts/evidence_index.json

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
{
2+
"artifact_id": "evidence_index_v1",
3+
"generated_by": "EvidenceIndexGenerator",
4+
"version": "1.0",
5+
"evaluation_mode": "deterministic",
6+
"llm_judges": "none",
7+
"external_apis": "none",
8+
"artifacts": [
9+
{
10+
"path": "artifacts/capability_boundary_replay_results.json",
11+
"format": "json",
12+
"generator": "scripts/generate_capability_boundary_replay_artifact.py",
13+
"evidence_category": "capability_boundary_replay",
14+
"evidence_role": "capability boundary drift replay evidence",
15+
"fixture_families": [
16+
"coding_workflow_pr_review",
17+
"cross_domain_operational_dependency_workflow",
18+
"incident_response_page_triage",
19+
"mcp_trace_replay"
20+
],
21+
"top_level_keys": [
22+
"artifact_id",
23+
"evaluation_mode",
24+
"external_apis",
25+
"families",
26+
"generated_by",
27+
"global_summary",
28+
"llm_judges",
29+
"version"
30+
],
31+
"deterministic_evaluation": true,
32+
"llm_judges": "none",
33+
"external_apis": "none",
34+
"manifest_aligned": true,
35+
"evidence_bearing": true,
36+
"visualization_only": false
37+
},
38+
{
39+
"path": "artifacts/graph_diff_results.json",
40+
"format": "json",
41+
"generator": "scripts/generate_graph_diff_artifact.py",
42+
"evidence_category": "graph_diff",
43+
"evidence_role": "relational replay graph evidence",
44+
"fixture_families": [
45+
"coding_workflow_pr_review",
46+
"cross_domain_operational_dependency_workflow",
47+
"incident_response_page_triage",
48+
"mcp_trace_replay"
49+
],
50+
"top_level_keys": [
51+
"artifact_id",
52+
"evaluation_mode",
53+
"external_apis",
54+
"families",
55+
"generated_by",
56+
"global_summary",
57+
"llm_judges",
58+
"version"
59+
],
60+
"deterministic_evaluation": true,
61+
"llm_judges": "none",
62+
"external_apis": "none",
63+
"manifest_aligned": true,
64+
"evidence_bearing": true,
65+
"visualization_only": false
66+
},
67+
{
68+
"path": "artifacts/mcp_trace_replay_results.json",
69+
"format": "json",
70+
"generator": "scripts/generate_mcp_trace_replay_artifact.py",
71+
"evidence_category": "mcp_trace_replay",
72+
"evidence_role": "single-family MCP trace replay evidence",
73+
"fixture_families": [
74+
"mcp_trace_replay"
75+
],
76+
"top_level_keys": [
77+
"artifact_id",
78+
"family",
79+
"fixtures",
80+
"generated_by",
81+
"summary",
82+
"version"
83+
],
84+
"deterministic_evaluation": true,
85+
"llm_judges": "none",
86+
"external_apis": "none",
87+
"manifest_aligned": false,
88+
"evidence_bearing": true,
89+
"visualization_only": false
90+
},
91+
{
92+
"path": "artifacts/multi_family_admissibility_curves.svg",
93+
"format": "svg",
94+
"generator": "scripts/render_multi_family_admissibility_svg.py",
95+
"evidence_category": "multi_family_admissibility_visualization",
96+
"evidence_role": "visualization of admissibility outcomes",
97+
"fixture_families": [],
98+
"top_level_keys": [],
99+
"deterministic_evaluation": true,
100+
"llm_judges": "none",
101+
"external_apis": "none",
102+
"manifest_aligned": false,
103+
"evidence_bearing": false,
104+
"visualization_only": true
105+
},
106+
{
107+
"path": "artifacts/multi_family_admissibility_results.json",
108+
"format": "json",
109+
"generator": "scripts/generate_multi_family_admissibility_artifact.py",
110+
"evidence_category": "multi_family_admissibility",
111+
"evidence_role": "cross-family admissibility evidence",
112+
"fixture_families": [
113+
"coding_workflow_pr_review",
114+
"cross_domain_operational_dependency_workflow",
115+
"incident_response_page_triage",
116+
"mcp_trace_replay"
117+
],
118+
"top_level_keys": [
119+
"artifact_id",
120+
"families",
121+
"generated_by",
122+
"version"
123+
],
124+
"deterministic_evaluation": true,
125+
"llm_judges": "none",
126+
"external_apis": "none",
127+
"manifest_aligned": true,
128+
"evidence_bearing": true,
129+
"visualization_only": false
130+
},
131+
{
132+
"path": "artifacts/replay_semantic_integrity_results.json",
133+
"format": "json",
134+
"generator": "scripts/generate_replay_semantic_integrity_artifact.py",
135+
"evidence_category": "replay_semantic_integrity",
136+
"evidence_role": "semantic replay integrity evidence",
137+
"fixture_families": [
138+
"coding_workflow_pr_review",
139+
"cross_domain_operational_dependency_workflow",
140+
"incident_response_page_triage",
141+
"mcp_trace_replay"
142+
],
143+
"top_level_keys": [
144+
"artifact_id",
145+
"evaluation_mode",
146+
"external_apis",
147+
"families",
148+
"generated_by",
149+
"global_summary",
150+
"llm_judges",
151+
"version"
152+
],
153+
"deterministic_evaluation": true,
154+
"llm_judges": "none",
155+
"external_apis": "none",
156+
"manifest_aligned": true,
157+
"evidence_bearing": true,
158+
"visualization_only": false
159+
},
160+
{
161+
"path": "artifacts/tool_ordering_replay_results.json",
162+
"format": "json",
163+
"generator": "scripts/generate_tool_ordering_replay_artifact.py",
164+
"evidence_category": "tool_ordering_replay",
165+
"evidence_role": "tool-order replay drift evidence",
166+
"fixture_families": [
167+
"coding_workflow_pr_review",
168+
"cross_domain_operational_dependency_workflow",
169+
"incident_response_page_triage",
170+
"mcp_trace_replay"
171+
],
172+
"top_level_keys": [
173+
"artifact_id",
174+
"evaluation_mode",
175+
"external_apis",
176+
"families",
177+
"generated_by",
178+
"global_summary",
179+
"llm_judges",
180+
"version"
181+
],
182+
"deterministic_evaluation": true,
183+
"llm_judges": "none",
184+
"external_apis": "none",
185+
"manifest_aligned": true,
186+
"evidence_bearing": true,
187+
"visualization_only": false
188+
}
189+
],
190+
"global_summary": {
191+
"artifact_count": 7,
192+
"json_artifact_count": 6,
193+
"svg_artifact_count": 1,
194+
"evidence_bearing_count": 6,
195+
"visualization_only_count": 1,
196+
"deterministic_artifact_count": 7,
197+
"llm_free_artifact_count": 7,
198+
"external_api_free_artifact_count": 7
199+
}
200+
}

scripts/generate_evidence_index.py

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
"""Generate deterministic evidence index for committed artifacts."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
from pathlib import Path
7+
from typing import Any
8+
9+
REPO_ROOT = Path(__file__).resolve().parents[1]
10+
MANIFEST_PATH = REPO_ROOT / "fixtures" / "manifest.json"
11+
OUTPUT_PATH = REPO_ROOT / "artifacts" / "evidence_index.json"
12+
13+
ARTIFACT_SPECS: tuple[dict[str, Any], ...] = (
14+
{
15+
"path": "artifacts/capability_boundary_replay_results.json",
16+
"format": "json",
17+
"generator": "scripts/generate_capability_boundary_replay_artifact.py",
18+
"evidence_category": "capability_boundary_replay",
19+
"evidence_role": "capability boundary drift replay evidence",
20+
"evidence_bearing": True,
21+
"visualization_only": False,
22+
},
23+
{
24+
"path": "artifacts/graph_diff_results.json",
25+
"format": "json",
26+
"generator": "scripts/generate_graph_diff_artifact.py",
27+
"evidence_category": "graph_diff",
28+
"evidence_role": "relational replay graph evidence",
29+
"evidence_bearing": True,
30+
"visualization_only": False,
31+
},
32+
{
33+
"path": "artifacts/mcp_trace_replay_results.json",
34+
"format": "json",
35+
"generator": "scripts/generate_mcp_trace_replay_artifact.py",
36+
"evidence_category": "mcp_trace_replay",
37+
"evidence_role": "single-family MCP trace replay evidence",
38+
"evidence_bearing": True,
39+
"visualization_only": False,
40+
},
41+
{
42+
"path": "artifacts/multi_family_admissibility_curves.svg",
43+
"format": "svg",
44+
"generator": "scripts/render_multi_family_admissibility_svg.py",
45+
"evidence_category": "multi_family_admissibility_visualization",
46+
"evidence_role": "visualization of admissibility outcomes",
47+
"evidence_bearing": False,
48+
"visualization_only": True,
49+
},
50+
{
51+
"path": "artifacts/multi_family_admissibility_results.json",
52+
"format": "json",
53+
"generator": "scripts/generate_multi_family_admissibility_artifact.py",
54+
"evidence_category": "multi_family_admissibility",
55+
"evidence_role": "cross-family admissibility evidence",
56+
"evidence_bearing": True,
57+
"visualization_only": False,
58+
},
59+
{
60+
"path": "artifacts/replay_semantic_integrity_results.json",
61+
"format": "json",
62+
"generator": "scripts/generate_replay_semantic_integrity_artifact.py",
63+
"evidence_category": "replay_semantic_integrity",
64+
"evidence_role": "semantic replay integrity evidence",
65+
"evidence_bearing": True,
66+
"visualization_only": False,
67+
},
68+
{
69+
"path": "artifacts/tool_ordering_replay_results.json",
70+
"format": "json",
71+
"generator": "scripts/generate_tool_ordering_replay_artifact.py",
72+
"evidence_category": "tool_ordering_replay",
73+
"evidence_role": "tool-order replay drift evidence",
74+
"evidence_bearing": True,
75+
"visualization_only": False,
76+
},
77+
)
78+
79+
80+
def _load_json(path: Path) -> dict[str, Any]:
81+
return json.loads(path.read_text(encoding="utf-8"))
82+
83+
84+
def _manifest_families() -> set[str]:
85+
manifest = _load_json(MANIFEST_PATH)
86+
return {str(fixture["family"]) for fixture in manifest["fixtures"]}
87+
88+
89+
def _extract_fixture_families(payload: dict[str, Any]) -> list[str]:
90+
families: set[str] = set()
91+
if isinstance(payload.get("families"), list):
92+
for family in payload["families"]:
93+
if isinstance(family, dict) and isinstance(family.get("family"), str):
94+
families.add(family["family"])
95+
if isinstance(payload.get("family"), str):
96+
families.add(payload["family"])
97+
return sorted(families)
98+
99+
100+
def _build_artifact_entry(spec: dict[str, Any], manifest_families: set[str]) -> dict[str, Any] | None:
101+
artifact_path = REPO_ROOT / spec["path"]
102+
if not artifact_path.exists():
103+
return None
104+
105+
entry = {
106+
"path": spec["path"],
107+
"format": spec["format"],
108+
"generator": spec["generator"],
109+
"evidence_category": spec["evidence_category"],
110+
"evidence_role": spec["evidence_role"],
111+
"fixture_families": [],
112+
"top_level_keys": [],
113+
"deterministic_evaluation": True,
114+
"llm_judges": "none",
115+
"external_apis": "none",
116+
"manifest_aligned": False,
117+
"evidence_bearing": spec["evidence_bearing"],
118+
"visualization_only": spec["visualization_only"],
119+
}
120+
121+
if spec["format"] == "json":
122+
payload = _load_json(artifact_path)
123+
families = _extract_fixture_families(payload)
124+
entry["fixture_families"] = families
125+
entry["top_level_keys"] = sorted(payload.keys())
126+
entry["deterministic_evaluation"] = payload.get("evaluation_mode", "deterministic") == "deterministic"
127+
entry["llm_judges"] = payload.get("llm_judges", "none")
128+
entry["external_apis"] = payload.get("external_apis", "none")
129+
if families:
130+
entry["manifest_aligned"] = set(families) == manifest_families
131+
132+
return entry
133+
134+
135+
def generate_evidence_index(output_path: Path = OUTPUT_PATH) -> Path:
136+
manifest_families = _manifest_families()
137+
artifacts = [
138+
entry
139+
for spec in sorted(ARTIFACT_SPECS, key=lambda item: item["path"])
140+
for entry in [_build_artifact_entry(spec, manifest_families)]
141+
if entry is not None
142+
]
143+
144+
index = {
145+
"artifact_id": "evidence_index_v1",
146+
"generated_by": "EvidenceIndexGenerator",
147+
"version": "1.0",
148+
"evaluation_mode": "deterministic",
149+
"llm_judges": "none",
150+
"external_apis": "none",
151+
"artifacts": artifacts,
152+
"global_summary": {
153+
"artifact_count": len(artifacts),
154+
"json_artifact_count": sum(1 for item in artifacts if item["format"] == "json"),
155+
"svg_artifact_count": sum(1 for item in artifacts if item["format"] == "svg"),
156+
"evidence_bearing_count": sum(1 for item in artifacts if item["evidence_bearing"]),
157+
"visualization_only_count": sum(1 for item in artifacts if item["visualization_only"]),
158+
"deterministic_artifact_count": sum(1 for item in artifacts if item["deterministic_evaluation"]),
159+
"llm_free_artifact_count": sum(1 for item in artifacts if item["llm_judges"] == "none"),
160+
"external_api_free_artifact_count": sum(1 for item in artifacts if item["external_apis"] == "none"),
161+
},
162+
}
163+
164+
output_path.parent.mkdir(parents=True, exist_ok=True)
165+
output_path.write_text(json.dumps(index, indent=2) + "\n", encoding="utf-8")
166+
return output_path
167+
168+
169+
if __name__ == "__main__":
170+
path = generate_evidence_index()
171+
print(path.relative_to(REPO_ROOT).as_posix())

0 commit comments

Comments
 (0)