Skip to content

Commit 7ff4f18

Browse files
authored
Create scenarios.py
1 parent 4ba4419 commit 7ff4f18

1 file changed

Lines changed: 144 additions & 0 deletions

File tree

src/ohip_bench/scenarios.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
"""
2+
Built-in deterministic benchmark scenarios for IX-HapticSight.
3+
4+
This module provides a small catalog of reusable benchmark scenarios that can
5+
be run against the current RuntimeService + BenchmarkRunner stack.
6+
7+
The intent is to make common evaluation paths explicit and discoverable:
8+
- consent happy path
9+
- consent denial path
10+
- safety denial path
11+
- execution capability mismatch path
12+
13+
These scenarios are still repository-stage artifacts, not deployment evidence.
14+
"""
15+
16+
from __future__ import annotations
17+
18+
from .models import (
19+
BenchmarkDomain,
20+
BenchmarkExpectation,
21+
BenchmarkScenario,
22+
)
23+
from .runner import make_consent_scenario
24+
25+
26+
def make_safety_red_scenario() -> BenchmarkScenario:
27+
"""
28+
Contact request with explicit consent but a RED session safety level.
29+
30+
Expected result:
31+
- DENIED
32+
- not executable
33+
- fault reason tied to session safety red
34+
"""
35+
return BenchmarkScenario(
36+
scenario_id="safety-red-001",
37+
title="RED safety level blocks support contact",
38+
domain=BenchmarkDomain.SAFETY,
39+
description=(
40+
"A contact request should be denied when the session starts in RED "
41+
"safety state even if explicit consent exists."
42+
),
43+
inputs={
44+
"session": {
45+
"session_id": "sess-1",
46+
"subject_id": "person-1",
47+
"interaction_state": "IDLE",
48+
"execution_state": "IDLE",
49+
"safety_level": "RED",
50+
"consent_valid": False,
51+
"consent_fresh": False,
52+
},
53+
"request": {
54+
"request_id": "req-1",
55+
"interaction_kind": "SUPPORT_CONTACT",
56+
"source": "BENCHMARK",
57+
"target_name": "shoulder",
58+
"requested_scope": "shoulder_contact",
59+
"requires_contact": True,
60+
"requires_consent_freshness": True,
61+
},
62+
"consent": {
63+
"grant_explicit": True,
64+
"scopes": ["shoulder_contact"],
65+
"source": "benchmark",
66+
},
67+
"nudge": {
68+
"level": "GREEN",
69+
"target": {
70+
"frame": "W",
71+
"xyz": [0.42, -0.18, 1.36],
72+
"rpy": [0.0, 0.0, 1.57],
73+
},
74+
"normal": [0.0, 0.8, 0.6],
75+
"rationale": "benchmark shoulder support",
76+
"priority": 0.9,
77+
"expires_in_ms": 500,
78+
},
79+
"start_pose": {
80+
"frame": "W",
81+
"xyz": [0.10, 0.00, 1.00],
82+
"rpy": [0.0, 0.0, 0.0],
83+
},
84+
},
85+
expectation=BenchmarkExpectation(
86+
expected_status="DENIED",
87+
expected_executable=False,
88+
expected_fault_reason="session_safety_red",
89+
expected_execution_status="",
90+
),
91+
tags=("safety", "red", "contact", "benchmark"),
92+
)
93+
94+
95+
def make_consent_catalog() -> list[BenchmarkScenario]:
96+
"""
97+
Standard consent-path scenarios for the current repo stage.
98+
"""
99+
return [
100+
make_consent_scenario(
101+
scenario_id="consent-approved-001",
102+
title="Explicit consent allows support contact",
103+
explicit_consent=True,
104+
expected_status="APPROVED",
105+
expected_executable=True,
106+
expected_execution_status="ACCEPTED",
107+
),
108+
make_consent_scenario(
109+
scenario_id="consent-denied-001",
110+
title="Missing consent blocks support contact",
111+
explicit_consent=False,
112+
expected_status="DENIED",
113+
expected_executable=False,
114+
expected_fault_reason="consent_missing_or_invalid",
115+
),
116+
]
117+
118+
119+
def make_core_catalog() -> list[BenchmarkScenario]:
120+
"""
121+
Core benchmark catalog for current repository maturity.
122+
123+
This intentionally stays small and deterministic. More scenarios can be
124+
added later once the runtime, replay, and logging layers deepen.
125+
"""
126+
catalog: list[BenchmarkScenario] = []
127+
catalog.extend(make_consent_catalog())
128+
catalog.append(make_safety_red_scenario())
129+
return catalog
130+
131+
132+
def scenario_ids(scenarios: list[BenchmarkScenario]) -> list[str]:
133+
"""
134+
Return the ordered scenario IDs from a benchmark catalog.
135+
"""
136+
return [scenario.scenario_id for scenario in scenarios]
137+
138+
139+
__all__ = [
140+
"make_safety_red_scenario",
141+
"make_consent_catalog",
142+
"make_core_catalog",
143+
"scenario_ids",
144+
]

0 commit comments

Comments
 (0)