-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfixtures.py
More file actions
342 lines (283 loc) · 14 KB
/
Copy pathfixtures.py
File metadata and controls
342 lines (283 loc) · 14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
"""Typed fixture root models.
Per the Phase 0 plan: every YAML fixture under
``openarmature-spec/spec/<capability>/conformance/`` lands as one of three
typed shapes. The shape is chosen by a callable discriminator that inspects
the raw dict's top-level keys (no tag field is present in the YAML).
The three shapes:
- :class:`LlmProviderFixture` — ``mock_provider`` is at the top level. Tests
the stateless ``complete()`` / ``ready()`` operations of the
``llm-provider`` capability against canned wire responses. May contain
``cases:`` for table-style sub-cases that share the mock provider.
- :class:`CasesFixture` — top-level ``cases:`` list (and no
``mock_provider``). Each case carries its own graph definition and
expected block. Optional shared ``subgraph`` / ``subgraph_with_idx``
blocks at the top level apply across cases.
- :class:`GraphFixture` — direct graph at the top level (state + entry +
nodes + edges + initial_state + expected). Optional ``run_count`` for
determinism fixtures, plus a long tail of optional harness directives
(``observers``, ``middleware``, ``caller_correlation_id``,
``detached_subgraphs``, etc.).
Sub-shapes (state field schemas, node directives, edge specs, middleware
specs, observer specs, expected blocks) live in :mod:`directives` and
:mod:`expectations`. The split is for readability; what's authoritative is
the union of all three shapes here parsing every fixture in the spec
submodule with ``extra="forbid"`` rejecting unknown keys at every level.
"""
from __future__ import annotations
from typing import Annotated, Any, Literal
from pydantic import BaseModel, ConfigDict, Discriminator, Tag
from .directives import (
EdgeSpec,
LlmCallSpec,
MiddlewareConfig,
MockProviderConfig,
MockResponse,
NodeSpec,
ObserverSpec,
StateSchema,
)
from .expectations import ExpectedBlock, LlmProviderExpected
from .prompt_management import PromptManagementFixture
class _ForbidExtras(BaseModel):
"""Common base — strict by default. Catches both fixture authors and us
drifting from the spec; new directives surface as parse errors at the
point they're introduced rather than getting silently dropped."""
model_config = ConfigDict(extra="forbid")
# ---------------------------------------------------------------------------
# Shared sub-shapes
# ---------------------------------------------------------------------------
class SubgraphDefinition(BaseModel):
"""A subgraph at the fixture's top level (singular ``subgraph:`` form
or one entry of the plural ``subgraphs:`` map). Carries its own state
schema, nodes, and edges — structurally a mini-graph. Permissive
extras to absorb subgraph-local middleware blocks (pipeline-utilities/
020) and any future extension."""
model_config = ConfigDict(extra="allow")
name: str | None = None # singular `subgraph:` form
state: StateSchema
entry: str
nodes: dict[str, NodeSpec]
edges: list[EdgeSpec]
middleware: MiddlewareConfig | None = None
class CaseSpec(BaseModel):
"""One sub-case in a ``CasesFixture`` (or in the ``cases:`` block of an
LlmProviderFixture).
The shape of a case is fluid — checkpointing fixtures (027–031) bring
in ``checkpointer``/``first_run_expected_error``/``saved_record_assertions``/
``resume`` blocks; llm-provider cases bring in ``call`` /
``expected_wire_request``; graph-engine ``007-compile-errors`` cases
have ``graph:`` wrapping the graph + ``expected_compile_error``;
observability cases inherit any harness directive a top-level
``GraphFixture`` could carry. Permissive extras so the parse keeps
pace with case-shape evolution without quarterly model edits.
"""
model_config = ConfigDict(extra="allow")
name: str
description: str | None = None
# graph-engine 007 compile-errors: a case wraps the malformed graph
# under a `graph:` key alongside `expected_compile_error`.
graph: dict[str, Any] | None = None
expected_compile_error: str | None = None
# The graph-shaped fields when a case carries the graph inline (rather
# than under ``graph:``).
state: StateSchema | None = None
entry: str | None = None
nodes: dict[str, NodeSpec] | None = None
edges: list[EdgeSpec] | None = None
initial_state: dict[str, Any] | None = None
subgraph: SubgraphDefinition | None = None
subgraphs: dict[str, SubgraphDefinition] | None = None
middleware: MiddlewareConfig | None = None
observers: list[ObserverSpec] | None = None
expected: ExpectedBlock | None = None
expected_error: dict[str, Any] | None = None
# llm-provider sub-cases.
call: LlmCallSpec | None = None
expected_wire_request: dict[str, Any] | None = None
# Checkpointing fixtures (024-031, 048-054). Two shapes:
# - ``str`` (e.g. ``"in_memory"``): backend kind selector.
# - ``dict``: backend kind + config knobs (e.g. fixture 054's
# ``{kind: in_memory_batched, fan_out_internal_save_batching: ...}``).
checkpointer: str | dict[str, Any] | None = None
first_run_expected_error: dict[str, Any] | None = None
saved_record_assertions: dict[str, Any] | None = None
latest_record_assertions: dict[str, Any] | None = None
resume: dict[str, Any] | None = None
invariants: dict[str, Any] | None = None
# Either an int (run count) or a list of run configs — fixtures vary.
populate_checkpointer_via_runs: Any = None
invoke_with: dict[str, Any] | None = None
caller_correlation_id: str | None = None
# observability — mock LLM responses + per-case run config.
mock_llm: list[MockResponse] | None = None
invocations: int | None = None
# ---------------------------------------------------------------------------
# LlmProviderFixture
# ---------------------------------------------------------------------------
class LlmProviderFixture(_ForbidExtras):
"""A fixture under ``spec/llm-provider/conformance/``.
Either ``calls`` is at the top level (single-case) or wrapped in
``cases`` (table-style). ``mock_provider`` is always present and
discriminates this shape from the graph-shaped fixtures.
"""
mock_provider: MockProviderConfig
calls: list[LlmCallSpec] | None = None
cases: list[CaseSpec] | None = None
# ---------------------------------------------------------------------------
# CasesFixture
# ---------------------------------------------------------------------------
class CasesFixture(_ForbidExtras):
"""A fixture whose top level is ``cases:`` rather than a single graph.
Used by ``007-compile-errors``, the checkpointing fixtures (024–031),
and the determinism / multi-run observability fixtures. Optional shared
``subgraph`` / ``subgraph_with_idx`` / ``subgraphs`` at the top level
apply across all cases. Any other top-level key not listed here is
rejected.
"""
cases: list[CaseSpec]
# Shared graph-shape blocks that apply across every case. The singular
# `subgraph` / `subgraph_with_idx` and the plural `subgraphs` map
# (name -> graph-spec, as the parallel-branches fixtures use) may all
# appear at the cases-fixture top level. Fixture 064 (failure-isolation
# cause fidelity) is the first to share a plural `subgraphs:` across
# cases.
subgraph: SubgraphDefinition | None = None
subgraph_with_idx: SubgraphDefinition | None = None
subgraphs: dict[str, SubgraphDefinition] | None = None
# ---------------------------------------------------------------------------
# GraphFixture
# ---------------------------------------------------------------------------
class GraphFixture(_ForbidExtras):
"""A fixture whose top level IS a single graph.
Covers the bulk of graph-engine, pipeline-utilities, and observability
fixtures. Most fields are optional because different fixtures exercise
different facets of the graph contract.
"""
# Graph definition (graph-engine + most others).
state: StateSchema
entry: str | None = None
nodes: dict[str, NodeSpec] | None = None
edges: list[EdgeSpec] | None = None
initial_state: dict[str, Any] | None = None
expected: ExpectedBlock | None = None
# Legacy: top-level expected_error in graph-engine fixtures 008/009.
expected_error: dict[str, Any] | None = None
# Subgraph definitions — singular form for graph-engine; plural map for
# the multi-subgraph cases in observability/008, observability/010, and
# pipeline-utilities/029.
subgraph: SubgraphDefinition | None = None
subgraphs: dict[str, SubgraphDefinition] | None = None
# Used by pipeline-utilities/020 (fan-out instances expose their idx).
subgraph_with_idx: SubgraphDefinition | None = None
# graph-engine §6 observers (since proposal 0003).
observers: list[ObserverSpec] | None = None
# pipeline-utilities §6 middleware (proposal 0004) and §10 checkpointer
# registration (proposal 0008).
middleware: MiddlewareConfig | None = None
checkpointer: str | None = None
clock_stub: dict[str, Any] | None = None
# Determinism fixtures — graph-engine/010 and pipeline-utilities/011.
run_count: int | None = None
# observability / pipeline-utilities cross-cutting harness directives.
# These are inputs to the test harness, NOT the engine.
caller_correlation_id: str | None = None
detached_subgraphs: list[str] | None = None
detached_fan_outs: list[str] | None = None
disable_llm_spans: bool | None = None
# Proposal 0024 (v0.17.0): observer-level opt-outs for the new
# §5.5.1 payload and §5.5.2/§5.5.3 GenAI semconv attribute sets.
# ``disable_provider_payload`` defaults to True per §5.5.4 — fixtures
# that EXERCISE payload emission set it false explicitly (013-018).
# ``disable_genai_semconv`` defaults to False — fixture 021 sets
# it true to verify the opt-out.
disable_provider_payload: bool | None = None
disable_genai_semconv: bool | None = None
# Proposal 0024 (v0.17.0, fixture 020): provider-level configuration
# overrides — ``provider.genai_system`` overrides the default
# ``"openai"`` value of ``gen_ai.system`` for OpenAI-compatible
# providers serving non-OpenAI endpoints (vLLM, LM Studio, …).
provider: dict[str, Any] | None = None
mock_llm: list[MockResponse] | None = None
caller_global_otel_active: bool | None = None
# Two shapes:
# - ``int``: run-count for observability multi-run fixtures (legacy).
# - ``list[dict]``: per-invocation specs for proposal 0010 §6 Drain
# cross-invocation cleanliness fixtures (e.g., fixture 024). Each
# entry carries its own ``initial_state``, ``drain``, ``expected``.
invocations: int | list[dict[str, Any]] | None = None
# Proposal 0010 §6 Drain — the ``invoke`` directive wraps the
# ``drain.timeout_seconds`` parameter for single-invocation
# drain-timeout fixtures (022, 023, 025). Multi-invocation fixture
# 024 uses the ``invocations`` array above instead.
invoke: dict[str, Any] | None = None
# Proposal 0010 §6 Drain — top-level invariants applied across all
# invocations of a multi-invocation fixture (e.g.,
# ``second_invocation_drain_independent_of_first`` on fixture 024).
# Single-invocation fixtures put their invariants under
# ``expected.invariants`` (the field already on ExpectedBlock).
invariants: dict[str, Any] | None = None
# ---------------------------------------------------------------------------
# Discriminator + root union
# ---------------------------------------------------------------------------
def _discriminate_fixture(
value: Any,
) -> Literal["llm_provider", "prompt_management", "cases", "graph"]:
"""Pick the fixture shape from a raw YAML dict.
Order matters:
- ``mock_provider`` wins over ``cases`` because some llm-provider
fixtures (e.g. 003-message-validation) have BOTH — ``mock_provider``
is the load-bearing discriminator, ``cases`` is the table style.
- ``backends`` at the top level (without ``mock_provider``) picks
the prompt-management shape. Spec/prompt-management fixtures
always carry ``backends:``.
Also handle the serialization path (where the value is a concrete
variant) so a future ``model_dump`` through the top-level union
doesn't fall through to ``graph`` and warn.
"""
if isinstance(value, LlmProviderFixture):
return "llm_provider"
if isinstance(value, PromptManagementFixture):
return "prompt_management"
if isinstance(value, CasesFixture):
return "cases"
if isinstance(value, GraphFixture):
return "graph"
if isinstance(value, dict):
if "mock_provider" in value:
return "llm_provider"
# PM fixtures uniquely have ``backends:`` AND ``calls:`` and
# none of the graph-shape keys. Co-occurrence is the
# discriminator until a spec-side ``kind:`` field lands —
# checking ``backends:`` alone would silently misroute any
# future fixture that introduces a backends list for some
# other purpose.
if (
"backends" in value
and ("calls" in value or "cases" in value)
and not any(k in value for k in ("nodes", "edges", "state", "entry"))
):
# Per proposal 0046 (v0.38.0) the chat-prompt fixtures
# (017-031) carry ``backends:`` + top-level ``cases:``
# instead of ``backends:`` + ``calls:``. Route both
# shapes to ``prompt_management``.
return "prompt_management"
if "cases" in value:
return "cases"
return "graph"
Fixture = Annotated[
Annotated[LlmProviderFixture, Tag("llm_provider")]
| Annotated[PromptManagementFixture, Tag("prompt_management")]
| Annotated[CasesFixture, Tag("cases")]
| Annotated[GraphFixture, Tag("graph")],
Discriminator(_discriminate_fixture),
]
__all__ = [
"CaseSpec",
"CasesFixture",
"Fixture",
"GraphFixture",
"LlmProviderExpected",
"LlmProviderFixture",
"PromptManagementFixture",
"SubgraphDefinition",
]