|
| 1 | +""" |
| 2 | +Built-in deterministic benchmark scenarios for IX-HapticSight. |
| 3 | +
|
| 4 | +This module provides a small catalog of reusable benchmark scenarios that can |
| 5 | +be run against the current RuntimeService + BenchmarkRunner stack. |
| 6 | +
|
| 7 | +The intent is to make common evaluation paths explicit and discoverable: |
| 8 | +- consent happy path |
| 9 | +- consent denial path |
| 10 | +- safety denial path |
| 11 | +- execution capability mismatch path |
| 12 | +
|
| 13 | +These scenarios are still repository-stage artifacts, not deployment evidence. |
| 14 | +""" |
| 15 | + |
| 16 | +from __future__ import annotations |
| 17 | + |
| 18 | +from .models import ( |
| 19 | + BenchmarkDomain, |
| 20 | + BenchmarkExpectation, |
| 21 | + BenchmarkScenario, |
| 22 | +) |
| 23 | +from .runner import make_consent_scenario |
| 24 | + |
| 25 | + |
| 26 | +def make_safety_red_scenario() -> BenchmarkScenario: |
| 27 | + """ |
| 28 | + Contact request with explicit consent but a RED session safety level. |
| 29 | +
|
| 30 | + Expected result: |
| 31 | + - DENIED |
| 32 | + - not executable |
| 33 | + - fault reason tied to session safety red |
| 34 | + """ |
| 35 | + return BenchmarkScenario( |
| 36 | + scenario_id="safety-red-001", |
| 37 | + title="RED safety level blocks support contact", |
| 38 | + domain=BenchmarkDomain.SAFETY, |
| 39 | + description=( |
| 40 | + "A contact request should be denied when the session starts in RED " |
| 41 | + "safety state even if explicit consent exists." |
| 42 | + ), |
| 43 | + inputs={ |
| 44 | + "session": { |
| 45 | + "session_id": "sess-1", |
| 46 | + "subject_id": "person-1", |
| 47 | + "interaction_state": "IDLE", |
| 48 | + "execution_state": "IDLE", |
| 49 | + "safety_level": "RED", |
| 50 | + "consent_valid": False, |
| 51 | + "consent_fresh": False, |
| 52 | + }, |
| 53 | + "request": { |
| 54 | + "request_id": "req-1", |
| 55 | + "interaction_kind": "SUPPORT_CONTACT", |
| 56 | + "source": "BENCHMARK", |
| 57 | + "target_name": "shoulder", |
| 58 | + "requested_scope": "shoulder_contact", |
| 59 | + "requires_contact": True, |
| 60 | + "requires_consent_freshness": True, |
| 61 | + }, |
| 62 | + "consent": { |
| 63 | + "grant_explicit": True, |
| 64 | + "scopes": ["shoulder_contact"], |
| 65 | + "source": "benchmark", |
| 66 | + }, |
| 67 | + "nudge": { |
| 68 | + "level": "GREEN", |
| 69 | + "target": { |
| 70 | + "frame": "W", |
| 71 | + "xyz": [0.42, -0.18, 1.36], |
| 72 | + "rpy": [0.0, 0.0, 1.57], |
| 73 | + }, |
| 74 | + "normal": [0.0, 0.8, 0.6], |
| 75 | + "rationale": "benchmark shoulder support", |
| 76 | + "priority": 0.9, |
| 77 | + "expires_in_ms": 500, |
| 78 | + }, |
| 79 | + "start_pose": { |
| 80 | + "frame": "W", |
| 81 | + "xyz": [0.10, 0.00, 1.00], |
| 82 | + "rpy": [0.0, 0.0, 0.0], |
| 83 | + }, |
| 84 | + }, |
| 85 | + expectation=BenchmarkExpectation( |
| 86 | + expected_status="DENIED", |
| 87 | + expected_executable=False, |
| 88 | + expected_fault_reason="session_safety_red", |
| 89 | + expected_execution_status="", |
| 90 | + ), |
| 91 | + tags=("safety", "red", "contact", "benchmark"), |
| 92 | + ) |
| 93 | + |
| 94 | + |
| 95 | +def make_consent_catalog() -> list[BenchmarkScenario]: |
| 96 | + """ |
| 97 | + Standard consent-path scenarios for the current repo stage. |
| 98 | + """ |
| 99 | + return [ |
| 100 | + make_consent_scenario( |
| 101 | + scenario_id="consent-approved-001", |
| 102 | + title="Explicit consent allows support contact", |
| 103 | + explicit_consent=True, |
| 104 | + expected_status="APPROVED", |
| 105 | + expected_executable=True, |
| 106 | + expected_execution_status="ACCEPTED", |
| 107 | + ), |
| 108 | + make_consent_scenario( |
| 109 | + scenario_id="consent-denied-001", |
| 110 | + title="Missing consent blocks support contact", |
| 111 | + explicit_consent=False, |
| 112 | + expected_status="DENIED", |
| 113 | + expected_executable=False, |
| 114 | + expected_fault_reason="consent_missing_or_invalid", |
| 115 | + ), |
| 116 | + ] |
| 117 | + |
| 118 | + |
| 119 | +def make_core_catalog() -> list[BenchmarkScenario]: |
| 120 | + """ |
| 121 | + Core benchmark catalog for current repository maturity. |
| 122 | +
|
| 123 | + This intentionally stays small and deterministic. More scenarios can be |
| 124 | + added later once the runtime, replay, and logging layers deepen. |
| 125 | + """ |
| 126 | + catalog: list[BenchmarkScenario] = [] |
| 127 | + catalog.extend(make_consent_catalog()) |
| 128 | + catalog.append(make_safety_red_scenario()) |
| 129 | + return catalog |
| 130 | + |
| 131 | + |
| 132 | +def scenario_ids(scenarios: list[BenchmarkScenario]) -> list[str]: |
| 133 | + """ |
| 134 | + Return the ordered scenario IDs from a benchmark catalog. |
| 135 | + """ |
| 136 | + return [scenario.scenario_id for scenario in scenarios] |
| 137 | + |
| 138 | + |
| 139 | +__all__ = [ |
| 140 | + "make_safety_red_scenario", |
| 141 | + "make_consent_catalog", |
| 142 | + "make_core_catalog", |
| 143 | + "scenario_ids", |
| 144 | +] |
0 commit comments