Skip to content

Commit 8fd9cb7

Browse files
committed
feat: ADR 0032 M2-T002 — evidence-event taxonomy (mapper-only, additive)
Adds RunEventType members + bidirectional mapper entries for the 19 audit event types the evidence bundle reads (model_route, git_sandbox_started/ resolved, skill_activated, skill_lifecycle_transition, test_run, undo_applied, provenance_collected, tool_call_started/use/approved/denied/pending_approval, tool_error, approval_requested/granted/denied, run_cancelled, run_pending_approval). Event list verified by grep of run_evidence.py. Effect: the M2-T001 reader now surfaces these events from the audit JSONL (previously skipped as legacy) — the M6 fold's only prerequisite (§16). Mapper/reader only; no emit site changed, nothing newly emitted, so the audit stream is byte-unchanged (no double-write; §15). Reader test changes are an intended behavior update, NOT masking: assertions that evidence events return None were removed (now false) and replaced with assertions that they are surfaced; a new positive test was added; legacy-skip examples switched to genuinely-unmapped events. Net coverage is stronger. Constraint: additive taxonomy/mapper only; zero emit changes; audit output byte-unchanged Tested: lifecycle 23, run_evidence+summary+receipt 40, smoke 200, acceptance 646/646, full mypy clean 1009 files, ruff clean Confidence: high Roadmap-Status: unchanged Allow-test-weakening: reader tests updated to intended M2-T002 behavior (evidence events now surfaced); removed assertions were newly-false, stronger positive assertions added
1 parent d1791c0 commit 8fd9cb7

2 files changed

Lines changed: 92 additions & 31 deletions

File tree

teaagent/runner/_events.py

Lines changed: 47 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,33 @@ class RunEventType(str, Enum):
4848
RUN_COMPLETED = 'run_completed'
4949
RUN_FAILED = 'run_failed'
5050

51-
# Planned (M1+): PLAN_RESOLVED, DECISION_RECEIVED, TOOL_CALL_APPROVED,
52-
# TOOL_CALL_DENIED, CONTEXT_COMPACTED, BUDGET_CHECKPOINT, ITERATION_COMPLETED,
53-
# FINAL_VALIDATION, RUN_PENDING_APPROVAL, RUN_CANCELLED, RECEIPT_EMITTED,
54-
# SESSION_START, SESSION_END, SKILL_LOAD, MODEL_ROUTE, GIT_SANDBOX_STARTED,
55-
# GIT_SANDBOX_RESOLVED, UNDO_PERFORMED, PRE_TOOL_USE, POST_TOOL_USE, PRE_COMPACT
51+
# M2 evidence-event taxonomy (ADR 0032 §16): the event types the evidence
52+
# bundle reads, so the reader can surface them from the audit JSONL.
53+
# Mapping/reader only — emit-site migration is deferred (see §16).
54+
MODEL_ROUTE = 'model_route'
55+
GIT_SANDBOX_STARTED = 'git_sandbox_started'
56+
GIT_SANDBOX_RESOLVED = 'git_sandbox_resolved'
57+
SKILL_ACTIVATED = 'skill_activated'
58+
SKILL_LIFECYCLE_TRANSITION = 'skill_lifecycle_transition'
59+
TEST_RUN = 'test_run'
60+
UNDO_APPLIED = 'undo_applied'
61+
PROVENANCE_COLLECTED = 'provenance_collected'
62+
TOOL_CALL_STARTED = 'tool_call_started'
63+
TOOL_USE = 'tool_use'
64+
TOOL_CALL_APPROVED = 'tool_call_approved'
65+
TOOL_CALL_DENIED = 'tool_call_denied'
66+
TOOL_CALL_PENDING_APPROVAL = 'tool_call_pending_approval'
67+
TOOL_ERROR = 'tool_error'
68+
APPROVAL_REQUESTED = 'approval_requested'
69+
APPROVAL_GRANTED = 'approval_granted'
70+
APPROVAL_DENIED = 'approval_denied'
71+
RUN_CANCELLED = 'run_cancelled'
72+
RUN_PENDING_APPROVAL = 'run_pending_approval'
73+
74+
# Planned (later phases): PLAN_RESOLVED, DECISION_RECEIVED,
75+
# CONTEXT_COMPACTED, BUDGET_CHECKPOINT, ITERATION_COMPLETED,
76+
# FINAL_VALIDATION, RECEIPT_EMITTED, SESSION_START, SESSION_END,
77+
# PRE_TOOL_USE, POST_TOOL_USE, PRE_COMPACT
5678

5779

5880
@dataclass(frozen=True)
@@ -85,6 +107,26 @@ class RunEvent:
85107
RunEventType.TOOL_CALL_FAILED: 'tool_call_failed',
86108
RunEventType.RUN_COMPLETED: 'run_completed',
87109
RunEventType.RUN_FAILED: 'run_failed',
110+
# M2 evidence-event taxonomy (§16) — mapping only; not yet spine-emitted.
111+
RunEventType.MODEL_ROUTE: 'model_route',
112+
RunEventType.GIT_SANDBOX_STARTED: 'git_sandbox_started',
113+
RunEventType.GIT_SANDBOX_RESOLVED: 'git_sandbox_resolved',
114+
RunEventType.SKILL_ACTIVATED: 'skill_activated',
115+
RunEventType.SKILL_LIFECYCLE_TRANSITION: 'skill_lifecycle_transition',
116+
RunEventType.TEST_RUN: 'test_run',
117+
RunEventType.UNDO_APPLIED: 'undo_applied',
118+
RunEventType.PROVENANCE_COLLECTED: 'provenance_collected',
119+
RunEventType.TOOL_CALL_STARTED: 'tool_call_started',
120+
RunEventType.TOOL_USE: 'tool_use',
121+
RunEventType.TOOL_CALL_APPROVED: 'tool_call_approved',
122+
RunEventType.TOOL_CALL_DENIED: 'tool_call_denied',
123+
RunEventType.TOOL_CALL_PENDING_APPROVAL: 'tool_call_pending_approval',
124+
RunEventType.TOOL_ERROR: 'tool_error',
125+
RunEventType.APPROVAL_REQUESTED: 'approval_requested',
126+
RunEventType.APPROVAL_GRANTED: 'approval_granted',
127+
RunEventType.APPROVAL_DENIED: 'approval_denied',
128+
RunEventType.RUN_CANCELLED: 'run_cancelled',
129+
RunEventType.RUN_PENDING_APPROVAL: 'run_pending_approval',
88130
}
89131

90132

tests/lifecycle/test_event_stream_reader.py

Lines changed: 45 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,29 @@ def test_audit_event_to_run_event_type_m0_events() -> None:
5353

5454

5555
def test_audit_event_to_run_event_type_legacy_events_return_none() -> None:
56-
"""Test that unmapped legacy audit event types return None."""
57-
# Legacy event types that are not in M0 should return None, not raise
58-
assert audit_event_to_run_event_type('tool_call_started') is None
59-
assert audit_event_to_run_event_type('model_route') is None
60-
assert audit_event_to_run_event_type('git_sandbox_started') is None
56+
"""Still-unmapped audit event types return None, not raise."""
57+
# Genuinely unmapped (planned for later phases, no taxonomy entry yet).
58+
assert audit_event_to_run_event_type('plan_resolved') is None
59+
assert audit_event_to_run_event_type('context_compacted') is None
60+
assert audit_event_to_run_event_type('session_start') is None
6161
assert audit_event_to_run_event_type('unknown_event_type') is None
6262

6363

64+
def test_audit_event_to_run_event_type_evidence_events_now_mapped() -> None:
65+
"""M2-T002 (§16): evidence events the bundle reads are now typed/surfaced."""
66+
assert audit_event_to_run_event_type('tool_call_started') == (
67+
RunEventType.TOOL_CALL_STARTED
68+
)
69+
assert audit_event_to_run_event_type('model_route') == RunEventType.MODEL_ROUTE
70+
assert audit_event_to_run_event_type('git_sandbox_started') == (
71+
RunEventType.GIT_SANDBOX_STARTED
72+
)
73+
assert audit_event_to_run_event_type('approval_granted') == (
74+
RunEventType.APPROVAL_GRANTED
75+
)
76+
assert audit_event_to_run_event_type('undo_applied') == RunEventType.UNDO_APPLIED
77+
78+
6479
def test_read_run_events_from_audit_mapped_events_only() -> None:
6580
"""Test that read_run_events_from_audit filters to only mapped event types."""
6681
# Handcrafted list of audit entries: mix of M0 and legacy events
@@ -95,11 +110,18 @@ def test_read_run_events_from_audit_mapped_events_only() -> None:
95110
},
96111
{
97112
'event_id': 'ev-5',
98-
'event_type': 'model_route', # Legacy: not in M0
113+
'event_type': 'model_route', # evidence event — now mapped (§16)
99114
'run_id': 'run-test',
100115
'payload': {'resolved_model': 'claude-opus'},
101116
'created_at': '2026-06-13T10:00:04+00:00',
102117
},
118+
{
119+
'event_id': 'ev-5b',
120+
'event_type': 'plan_resolved', # still unmapped legacy → skipped
121+
'run_id': 'run-test',
122+
'payload': {'plan': 'x'},
123+
'created_at': '2026-06-13T10:00:04.5+00:00',
124+
},
103125
{
104126
'event_id': 'ev-6',
105127
'event_type': 'run_completed', # M0 event
@@ -111,26 +133,23 @@ def test_read_run_events_from_audit_mapped_events_only() -> None:
111133

112134
events = read_run_events_from_audit(entries)
113135

114-
# Only mapped events should appear: run_started, tool_call_requested, tool_call_completed, run_completed
115-
# (tool_call_started and model_route are skipped)
116-
assert len(events) == 4
117-
118-
# Check event types in order
119-
assert events[0].type == RunEventType.RUN_STARTED
120-
assert events[1].type == RunEventType.TOOL_CALL_REQUESTED
121-
assert events[2].type == RunEventType.TOOL_CALL_COMPLETED
122-
assert events[3].type == RunEventType.RUN_COMPLETED
136+
# All taxonomy events surface (M0 + M2 evidence: tool_call_started and
137+
# model_route now mapped per §16). Only still-unmapped plan_resolved skips.
138+
assert [e.type for e in events] == [
139+
RunEventType.RUN_STARTED,
140+
RunEventType.TOOL_CALL_STARTED,
141+
RunEventType.TOOL_CALL_REQUESTED,
142+
RunEventType.TOOL_CALL_COMPLETED,
143+
RunEventType.MODEL_ROUTE,
144+
RunEventType.RUN_COMPLETED,
145+
]
123146

124-
# Check sequence numbers are monotonic starting from 1
125-
assert events[0].seq == 1
126-
assert events[1].seq == 2
127-
assert events[2].seq == 3
128-
assert events[3].seq == 4
147+
# Monotonic seq over surfaced events only, 1..N.
148+
assert [e.seq for e in events] == [1, 2, 3, 4, 5, 6]
129149

130-
# Check run_id and payload preservation
150+
# run_id and payload preserved.
131151
assert all(e.run_id == 'run-test' for e in events)
132152
assert events[0].payload.get('task') == 'test task'
133-
assert events[1].payload.get('call_id') == 'call-1'
134153

135154

136155
def test_read_run_events_from_audit_preserves_redaction() -> None:
@@ -184,9 +203,9 @@ def test_read_run_events_from_jsonl_file(tmp_path: Path) -> None:
184203
},
185204
{
186205
'event_id': 'ev-2',
187-
'event_type': 'tool_call_started', # Legacy, will be skipped
206+
'event_type': 'plan_resolved', # still-unmapped legacy → skipped
188207
'run_id': 'run-jsonl-test',
189-
'payload': {'tool_name': 'test_tool'},
208+
'payload': {'plan': 'x'},
190209
'created_at': '2026-06-13T10:00:01+00:00',
191210
},
192211
{
@@ -216,8 +235,8 @@ def test_read_run_events_from_jsonl_file(tmp_path: Path) -> None:
216235

217236
events = read_run_events_from_jsonl(jsonl_file)
218237

219-
# Should read 3 events (run_started, tool_call_requested, run_completed)
220-
# tool_call_started is skipped as legacy
238+
# Should read 3 events (run_started, tool_call_requested, run_completed);
239+
# the still-unmapped plan_resolved is skipped.
221240
assert len(events) == 3
222241

223242
assert events[0].type == RunEventType.RUN_STARTED

0 commit comments

Comments
 (0)