Skip to content

Commit 9bdbcb7

Browse files
test(conformance): prompt-management harness and 12 fixtures
Adds prompt-management as the fifth conformance capability: - harness/prompt_management.py — typed YAML models for the new fixture shape (backends + manager + calls with target / operation / capture_as, plus per-call and top-level expected blocks for raises / result_equivalence / prompt_group / rendered_hash_equal / rendered_hash_different). - harness/fixtures.py — PromptManagementFixture added to the discriminated union; the discriminator recognizes top-level 'backends:' (without 'mock_provider:') as the prompt-management shape. - harness/loader.py — 'prompt-management' added to CAPABILITIES so test_fixture_parsing.py discovers and parses the new fixtures. test_prompt_management.py drives all 12 spec fixtures (001-fetch-success through 012-prompt-result-rendered-hash-stability) against the real PromptManager + a MockPromptBackend that implements the protocol with optional simulate_unavailable + preloaded prompts + a call_count for fixtures that assert fallback chain visits. All 12 fixtures pass.
1 parent e5d8f1b commit 9bdbcb7

4 files changed

Lines changed: 500 additions & 5 deletions

File tree

tests/conformance/harness/fixtures.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
StateSchema,
4848
)
4949
from .expectations import ExpectedBlock, LlmProviderExpected
50+
from .prompt_management import PromptManagementFixture
5051

5152

5253
class _ForbidExtras(BaseModel):
@@ -234,34 +235,45 @@ class GraphFixture(_ForbidExtras):
234235
# ---------------------------------------------------------------------------
235236

236237

237-
def _discriminate_fixture(value: Any) -> Literal["llm_provider", "cases", "graph"]:
238+
def _discriminate_fixture(
239+
value: Any,
240+
) -> Literal["llm_provider", "prompt_management", "cases", "graph"]:
238241
"""Pick the fixture shape from a raw YAML dict.
239242
240-
Order matters: ``mock_provider`` wins over ``cases`` because some
241-
llm-provider fixtures (e.g. 003-message-validation) have BOTH —
242-
``mock_provider`` is the load-bearing discriminator, ``cases`` is just
243-
the table style for sub-cases.
243+
Order matters:
244+
245+
- ``mock_provider`` wins over ``cases`` because some llm-provider
246+
fixtures (e.g. 003-message-validation) have BOTH — ``mock_provider``
247+
is the load-bearing discriminator, ``cases`` is the table style.
248+
- ``backends`` at the top level (without ``mock_provider``) picks
249+
the prompt-management shape. Spec/prompt-management fixtures
250+
always carry ``backends:``.
244251
245252
Also handle the serialization path (where the value is a concrete
246253
variant) so a future ``model_dump`` through the top-level union
247254
doesn't fall through to ``graph`` and warn.
248255
"""
249256
if isinstance(value, LlmProviderFixture):
250257
return "llm_provider"
258+
if isinstance(value, PromptManagementFixture):
259+
return "prompt_management"
251260
if isinstance(value, CasesFixture):
252261
return "cases"
253262
if isinstance(value, GraphFixture):
254263
return "graph"
255264
if isinstance(value, dict):
256265
if "mock_provider" in value:
257266
return "llm_provider"
267+
if "backends" in value:
268+
return "prompt_management"
258269
if "cases" in value:
259270
return "cases"
260271
return "graph"
261272

262273

263274
Fixture = Annotated[
264275
Annotated[LlmProviderFixture, Tag("llm_provider")]
276+
| Annotated[PromptManagementFixture, Tag("prompt_management")]
265277
| Annotated[CasesFixture, Tag("cases")]
266278
| Annotated[GraphFixture, Tag("graph")],
267279
Discriminator(_discriminate_fixture),
@@ -275,5 +287,6 @@ def _discriminate_fixture(value: Any) -> Literal["llm_provider", "cases", "graph
275287
"GraphFixture",
276288
"LlmProviderExpected",
277289
"LlmProviderFixture",
290+
"PromptManagementFixture",
278291
"SubgraphDefinition",
279292
]

tests/conformance/harness/loader.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
"llm-provider",
3232
"pipeline-utilities",
3333
"observability",
34+
"prompt-management",
3435
)
3536

3637
CONFORMANCE_ROOT = Path(__file__).resolve().parents[3] / "openarmature-spec" / "spec"
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
"""Typed YAML models for prompt-management conformance fixtures.
2+
3+
Fixture shape (different from the llm-provider / graph shapes):
4+
5+
- ``backends:`` — list of mock backend specs (each with ``name``,
6+
optional ``simulate_unavailable``, and a list of ``prompts``).
7+
- ``manager:`` — optional manager composition (a list of backend
8+
names, in fallback order).
9+
- ``calls:`` — list of operations to drive. Each call has a
10+
``target`` (``{backend: <name>}`` for direct backend operations,
11+
or ``manager`` for manager operations), an ``operation``, inputs,
12+
optional ``capture_as`` (binds the operation's result to a name
13+
usable by later calls / final expectations), and optional
14+
per-call ``expected``.
15+
- ``expected:`` — optional top-level expectation block for
16+
PromptGroup shape or cross-call result-equivalence assertions
17+
that need access to ``capture_as`` bindings.
18+
"""
19+
20+
from __future__ import annotations
21+
22+
from typing import Any, Literal
23+
24+
from pydantic import BaseModel, ConfigDict
25+
26+
27+
class _StrictModel(BaseModel):
28+
model_config = ConfigDict(extra="forbid")
29+
30+
31+
class _PermissiveModel(BaseModel):
32+
"""For fixture sub-shapes that vary across fixtures and don't
33+
warrant a per-shape enumeration."""
34+
35+
model_config = ConfigDict(extra="allow")
36+
37+
38+
# ---------------------------------------------------------------------------
39+
# Backend / manager configuration
40+
# ---------------------------------------------------------------------------
41+
42+
43+
class FixturePromptSpec(_StrictModel):
44+
name: str
45+
label: str
46+
version: str
47+
template: str
48+
template_hash: str
49+
50+
51+
class FixtureBackendSpec(_StrictModel):
52+
name: str
53+
prompts: list[FixturePromptSpec] = []
54+
simulate_unavailable: bool = False
55+
56+
57+
class FixtureManagerSpec(_StrictModel):
58+
backends: list[str]
59+
60+
61+
# ---------------------------------------------------------------------------
62+
# Call targets, operations, and expectations
63+
# ---------------------------------------------------------------------------
64+
65+
66+
class BackendTarget(_StrictModel):
67+
backend: str
68+
69+
70+
CallTarget = BackendTarget | Literal["manager", "construct_prompt_group"]
71+
72+
73+
class FixtureExpectedRaises(_PermissiveModel):
74+
category: str
75+
# Optional extra carries — fixture 005 uses ``description_mentions``,
76+
# ``name``, ``version``, ``label``. fixture 008 uses
77+
# ``secondary_backend_call_count``. Permissive on this shape so
78+
# fixtures evolve.
79+
carries: dict[str, Any] | None = None
80+
81+
82+
class FixtureExpectedPrompt(_PermissiveModel):
83+
"""Per-call ``expected.prompt`` shape (fetch ops)."""
84+
85+
86+
class FixtureExpectedPromptResult(_PermissiveModel):
87+
"""Per-call ``expected.prompt_result`` shape (render / get ops)."""
88+
89+
90+
class FixtureExpectedPerCall(_StrictModel):
91+
prompt: FixtureExpectedPrompt | None = None
92+
prompt_result: FixtureExpectedPromptResult | None = None
93+
raises: FixtureExpectedRaises | None = None
94+
# Fixture 008's extra: assert how many times the secondary
95+
# backend's fetch was called. Lives alongside ``raises``.
96+
secondary_backend_call_count: int | None = None
97+
# Fixture 009's extra: assert per-backend call counts (named
98+
# backends → expected call count) after a fetch that exhausts
99+
# all of them.
100+
backend_call_counts: dict[str, int] | None = None
101+
102+
103+
class FixtureCall(_StrictModel):
104+
target: CallTarget
105+
# ``operation`` is required for fetch / render / get calls. The
106+
# ``construct_prompt_group`` shape uses the target as the operation
107+
# indicator (no separate operation field on the call).
108+
operation: Literal["fetch", "render", "get"] | None = None
109+
name: str | None = None
110+
label: str | None = None
111+
variables: dict[str, Any] | None = None
112+
# Render-only inputs — either an inline ``fetched_prompt`` (which
113+
# the harness fetches first, then renders) or a ``fetched_prompt_ref``
114+
# pointing at an earlier ``capture_as``.
115+
fetched_prompt: dict[str, str] | None = None
116+
fetched_prompt_ref: str | None = None
117+
# construct_prompt_group-only inputs.
118+
group_name: str | None = None
119+
members_refs: list[str] | None = None
120+
capture_as: str | None = None
121+
expected: FixtureExpectedPerCall | None = None
122+
123+
124+
# ---------------------------------------------------------------------------
125+
# Top-level expected
126+
# ---------------------------------------------------------------------------
127+
128+
129+
class FixtureExpectedPromptGroup(_PermissiveModel):
130+
"""Top-level ``expected.prompt_group`` shape (fixture 011)."""
131+
132+
of: str
133+
group_name: str
134+
member_count: int
135+
member_order_preserved: bool | None = None
136+
member_names: list[str] | None = None
137+
138+
139+
class FixtureExpectedResultEquivalence(_PermissiveModel):
140+
"""Top-level ``expected.result_equivalence`` shape (fixtures 006,
141+
010, 012). Asserts equality across two or more captured results on
142+
a configurable set of fields."""
143+
144+
of: list[str]
145+
fields_must_match: list[str]
146+
fields_may_differ: list[str] = []
147+
# fixture 012 — assert two different captures have a DIFFERENT
148+
# value on a given field.
149+
fields_must_differ: list[str] = []
150+
151+
152+
class FixtureExpectedTopLevel(_StrictModel):
153+
prompt_group: FixtureExpectedPromptGroup | None = None
154+
result_equivalence: FixtureExpectedResultEquivalence | None = None
155+
# Some fixtures (012) have multiple result-equivalence blocks; keep
156+
# a plural list-form too. Empty by default.
157+
result_equivalences: list[FixtureExpectedResultEquivalence] = []
158+
# Fixture 012's per-pair rendered_hash equality / inequality
159+
# assertions. Each entry is a 2-element list of capture names; the
160+
# pair MUST share (resp. differ on) ``rendered_hash``.
161+
rendered_hash_equal: list[list[str]] = []
162+
rendered_hash_different: list[list[str]] = []
163+
164+
165+
# ---------------------------------------------------------------------------
166+
# Fixture root
167+
# ---------------------------------------------------------------------------
168+
169+
170+
class PromptManagementFixture(_StrictModel):
171+
backends: list[FixtureBackendSpec]
172+
manager: FixtureManagerSpec | None = None
173+
calls: list[FixtureCall]
174+
expected: FixtureExpectedTopLevel | None = None

0 commit comments

Comments
 (0)