Skip to content

Commit 2ca14a5

Browse files
Tighten v0.84.0 deferral accuracy + dedup 008
From a PR #197 review: several deferral reasons and conformance.toml statuses overstated non-implementation for partially-shipped work. - conformance.toml: 0081 not-yet -> textual-only (the <any-string> non-empty matcher is enforced by the adapter; precedent 0055/0071); 0085 not-yet -> partial (the save-side enclosing_fan_out_lineage keying shipped in #194; only the resume consume-side is unshipped), and drop the false "fixture 076 defers with it" claim (076 is number-gated, not deferred); drop the stale "a new case on 008" from the 0084 comment (that case is 0061's and is now wired). - Reword the 132 / 023-027 / 119 / 022-023 deferral reasons to state what actually ships (the 0084 lineage keying, the 0059 embed wire, 0075 callable branches, the structured_output_invalid base mapping) versus what is genuinely deferred. - Extract _assert_detached_raise_both_spans from the two near-identical 008 raise-case blocks, and add the parent openarmature.invocation ERROR assertion the fixtures' span_trees expect.
1 parent f731bef commit 2ca14a5

4 files changed

Lines changed: 98 additions & 88 deletions

File tree

conformance.toml

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -801,9 +801,14 @@ status = "not-yet"
801801

802802
# Spec v0.76.0 (proposal 0081). Conformance-adapter value-matcher
803803
# vocabulary (§5.10) -- ratifies the fixture matcher tokens already in
804-
# use. Not-yet (no fixture changes); awaits a conformance-pin batch.
804+
# use. Descriptive (no fixture changes); the one tightening with teeth
805+
# (<any-string> = non-empty) is already enforced by the adapter
806+
# (tests/conformance/test_observability.py _value_matches rejects the
807+
# empty string). No module-level change required; matches the
808+
# conformance-adapter textual-only precedent (0055 / 0071).
805809
[proposals."0081"]
806-
status = "not-yet"
810+
status = "textual-only"
811+
since = "0.16.0"
807812

808813
# Spec v0.77.0 (proposal 0082). Structured-output failure diagnostics
809814
# (graph-engine §6 -- LlmFailedEvent response-side surface for
@@ -822,16 +827,16 @@ status = "not-yet"
822827
# Spec v0.81.0 (proposal 0084). Nested-fan-out span lineage chain
823828
# (graph-engine §6 fan_out_index_chain / branch_name_chain +
824829
# observability §4 / §5.5 lineage-resolved parent). Not-yet; graph-
825-
# engine fixture 039 and observability fixtures 132-134 (+ a new case
826-
# on 008) defer with it.
830+
# engine fixture 039 and observability fixtures 132-134 defer with it.
827831
[proposals."0084"]
828832
status = "not-yet"
829833

830834
# Spec v0.80.0 (proposal 0085). Nested-fan-out checkpoint resume
831-
# lineage (pipeline-utilities §10.11 enclosing_fan_out_lineage). Not-
832-
# yet; pipeline-utilities fixture 076 defers with it.
835+
# lineage (pipeline-utilities §10.11 enclosing_fan_out_lineage).
833836
[proposals."0085"]
834-
status = "not-yet"
837+
status = "partial"
838+
since = "0.16.0"
839+
note = "The SAVE-side enclosing_fan_out_lineage keying (pipeline-utilities §10.11) shipped in #194: a fan-out instance's checkpoint tracking key carries the enclosing fan-out instance lineage in the in-memory dict and through the checkpoint projection / lookup / cleanup / restore, so concurrent outer instances no longer collide. partial because the RESUME consume-side is not yet shipped: a fan-out nested inside an outer instance re-runs rather than skipping on resume, since the saved record format carries no lineage (tracked as a follow-up). pipeline-utilities fixture 076 is not collected by the test_pipeline_utilities.py _LAST_DRIVEN_FIXTURE number gate (it is not deferred); the resume consume-side plus its fixture wiring land in a later PR."
835840

836841
# Spec v0.79.0 (proposal 0086). Service-wide default cache_ttl_seconds
837842
# on PromptManager (prompt-management §6). Not-yet; prompt-management

tests/conformance/test_llm_provider.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,13 @@
115115
# ----- v0.16.0 spec-pin bump (v0.70.1 -> v0.84.0) -------------------
116116
# Proposal 0082 (structured-output failure diagnostics, spec v0.77.0)
117117
# extended fixtures 022/023 to additionally assert the now-required
118-
# finish_reason + usage on the structured_output_invalid error; python
119-
# does not yet carry those response-side fields on the error, so the
120-
# extended fixtures defer until a later v0.16.0 PR lands 0082.
118+
# finish_reason + usage on the structured_output_invalid error. The
119+
# base structured_output_invalid mapping still has coverage in
120+
# tests/unit/test_structured_output.py::
121+
# test_pydantic_validation_failure_wraps_in_structured_output_invalid;
122+
# only 0082's additive finish_reason / usage-on-error response-side
123+
# fields are unimplemented, which is why the conformance fixtures defer
124+
# until a later v0.16.0 PR lands 0082.
121125
"022-structured-output-parse-failure": (
122126
"Proposal 0082 finish_reason/usage on structured_output_invalid; not implemented"
123127
),

tests/conformance/test_observability.py

Lines changed: 73 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,8 @@ def _reset_otel_global_tracer_provider(restore_to: object) -> None:
316316
# shape the cross-capability parser doesn't model (cf. 110) and has no
317317
# dedicated runner here yet; defer until the harness wires it.
318318
"119-otel-callable-branch-attempt-index-under-node-retry": (
319-
"proposal 0075 callable-branch attempt_index coverage round-out; harness runner not yet wired"
319+
"0075 shipped v0.15.0; runner for this callable-branch-attempt-index-under-retry "
320+
"case not yet wired -- harness gap, not unimplemented"
320321
),
321322
# Proposal 0082 (structured-output failure diagnostics, spec v0.77.0).
322323
# The LlmFailedEvent response-side surface (output_content / finish_reason
@@ -347,15 +348,24 @@ def _reset_otel_global_tracer_provider(restore_to: object) -> None:
347348
"131-token-budget-on-structured-output-failure",
348349
)
349350
},
350-
# Proposal 0084 (nested-fan-out span lineage, spec v0.81.0). The
351-
# fan_out_index_chain / branch_name_chain event surface + the lineage-
352-
# keyed OTel parent resolution are unimplemented until a later v0.16.0
353-
# PR; the no-dropped-spans keying, orphan fallback, and Langfuse parent-
354-
# resolution fixtures defer with it.
351+
# Proposal 0084 (nested-fan-out span lineage, spec v0.81.0).
352+
# Fixture 132's nested-lineage dispatch KEYING shipped in #194 (the
353+
# OTel + Langfuse observers key dispatches by the full enclosing
354+
# fan-out / branch lineage, so no spans drop). It stays deferred for
355+
# 0084's remaining surface: the fan_out_index_chain / branch_name_chain
356+
# arrive on the provider events with 0084 (see graph/events.py), and the
357+
# nested-orphan LLM parent resolution is top-level-instance-only as of
358+
# #195 -- the at-any-depth generalization rides 0084.
359+
"132-otel-nested-fan-out-span-keying-and-llm-exact-match": (
360+
"nested-lineage span keying shipped (#194); deferred for 0084's event-chain "
361+
"surface (fan_out_index_chain / branch_name_chain on provider events) and the "
362+
"nested-orphan LLM parent (top-level only as of #195)"
363+
),
364+
# Proposal 0084: 133 (orphan LLM fallback) + 134 (Langfuse parent
365+
# resolution) defer with 0084's lineage-resolved parent surface.
355366
**{
356367
fixture_id: "nested-fan-out span lineage (proposal 0084) not-yet implemented"
357368
for fixture_id in (
358-
"132-otel-nested-fan-out-span-keying-and-llm-exact-match",
359369
"133-otel-nested-fan-out-orphan-llm-fallback",
360370
"134-langfuse-nested-fan-out-parent-resolution",
361371
)
@@ -2539,6 +2549,54 @@ def _has_exception_event(span: Any) -> bool:
25392549
return any(getattr(e, "name", None) == "exception" for e in events)
25402550

25412551

2552+
def _assert_detached_raise_both_spans(
2553+
spans: Sequence[ReadableSpan],
2554+
*,
2555+
parent_span_name: str,
2556+
expected_link_count: int,
2557+
) -> None:
2558+
# Proposal 0061 §4.2: a raising detached unit surfaces ERROR on BOTH the
2559+
# parent's dispatch span (the one carrying the Link, named
2560+
# ``parent_span_name``) and the per-unit detached invocation span (its own
2561+
# trace) -- distinct traces, shared invocation_id, each with the §4 category
2562+
# + an OTel exception event. The parent's own openarmature.invocation span
2563+
# also inherits ERROR (§4.4): the fixtures' expected span_trees show the
2564+
# parent invocation ERROR.
2565+
parent_dispatch = next((s for s in spans if s.name == parent_span_name and s.links), None)
2566+
assert parent_dispatch is not None, f"expected a parent {parent_span_name!r} span carrying a Link"
2567+
assert parent_dispatch.status.status_code.name == "ERROR", (
2568+
f"parent {parent_span_name!r} span MUST carry ERROR for a raising detached unit"
2569+
)
2570+
assert _has_exception_event(parent_dispatch), (
2571+
f"parent {parent_span_name!r} span MUST record the exception"
2572+
)
2573+
assert dict(parent_dispatch.attributes or {}).get("openarmature.error.category") == "node_exception"
2574+
assert len(parent_dispatch.links) == expected_link_count, (
2575+
f"parent {parent_span_name!r} span MUST carry exactly {expected_link_count} Link(s); "
2576+
f"got {len(parent_dispatch.links)}"
2577+
)
2578+
parent_trace_id = cast("Any", parent_dispatch.context).trace_id
2579+
detached_trace_id = parent_dispatch.links[0].context.trace_id
2580+
assert detached_trace_id != parent_trace_id, "detached + parent traces MUST be distinct"
2581+
inv_spans = [s for s in spans if s.name == "openarmature.invocation"]
2582+
detached_inv = next((s for s in inv_spans if cast("Any", s.context).trace_id == detached_trace_id), None)
2583+
parent_inv = next((s for s in inv_spans if cast("Any", s.context).trace_id == parent_trace_id), None)
2584+
assert detached_inv is not None, "detached trace MUST root in an openarmature.invocation span"
2585+
assert parent_inv is not None
2586+
assert parent_inv.status.status_code.name == "ERROR", (
2587+
"parent openarmature.invocation span MUST inherit ERROR for a raising detached unit (§4.4)"
2588+
)
2589+
assert detached_inv.status.status_code.name == "ERROR", (
2590+
"detached invocation span MUST carry the detached unit's ERROR status (§4.2)"
2591+
)
2592+
assert _has_exception_event(detached_inv), "detached invocation span MUST record the exception"
2593+
assert dict(detached_inv.attributes or {}).get("openarmature.error.category") == "node_exception"
2594+
parent_iid = _invocation_id_of(parent_inv)
2595+
assert parent_iid is not None and _invocation_id_of(detached_inv) == parent_iid, (
2596+
"detached invocation span MUST share the parent's invocation_id"
2597+
)
2598+
2599+
25422600
async def _run_fixture_008_case(case: Mapping[str, Any]) -> None:
25432601
case_name = case["name"]
25442602
expect_raise = case_name in (
@@ -2644,79 +2702,17 @@ async def _run_fixture_008_case(case: Mapping[str, Any]) -> None:
26442702
return
26452703

26462704
if case_name == "detached_subgraph_raises_error_status_on_both_spans":
2647-
# Proposal 0061 §4.2: a raising detached subgraph surfaces ERROR
2648-
# on BOTH the parent's dispatch span and the detached invocation
2649-
# span — distinct traces, shared invocation_id, each with the §4
2650-
# category + an OTel exception event.
2651-
dispatch_spans = [s for s in spans if s.name == "dispatch"]
2652-
parent_dispatch = next((s for s in dispatch_spans if s.links), None)
2653-
assert parent_dispatch is not None, "expected a parent 'dispatch' span with a Link"
2654-
assert parent_dispatch.status.status_code.name == "ERROR", (
2655-
"parent dispatch span MUST carry ERROR for a raising detached subgraph"
2656-
)
2657-
assert _has_exception_event(parent_dispatch), "parent dispatch span MUST record the exception"
2658-
assert dict(parent_dispatch.attributes or {}).get("openarmature.error.category") == "node_exception"
2659-
parent_trace_id = cast("Any", parent_dispatch.context).trace_id
2660-
detached_trace_id = parent_dispatch.links[0].context.trace_id
2661-
assert detached_trace_id != parent_trace_id, "detached + parent traces MUST be distinct"
2662-
inv_spans = [s for s in spans if s.name == "openarmature.invocation"]
2663-
detached_inv = next(
2664-
(s for s in inv_spans if cast("Any", s.context).trace_id == detached_trace_id), None
2665-
)
2666-
parent_inv = next((s for s in inv_spans if cast("Any", s.context).trace_id == parent_trace_id), None)
2667-
assert detached_inv is not None, "detached trace MUST root in an openarmature.invocation span"
2668-
assert parent_inv is not None
2669-
assert detached_inv.status.status_code.name == "ERROR", (
2670-
"detached invocation span MUST carry the detached unit's ERROR status (§4.2)"
2671-
)
2672-
assert _has_exception_event(detached_inv), "detached invocation span MUST record the exception"
2673-
assert dict(detached_inv.attributes or {}).get("openarmature.error.category") == "node_exception"
2674-
parent_iid = _invocation_id_of(parent_inv)
2675-
assert parent_iid is not None and _invocation_id_of(detached_inv) == parent_iid, (
2676-
"detached invocation span MUST share the parent's invocation_id"
2677-
)
2705+
# The parent's subgraph-dispatch span (named "dispatch") carries the
2706+
# single Link to the one detached subgraph trace.
2707+
_assert_detached_raise_both_spans(spans, parent_span_name="dispatch", expected_link_count=1)
26782708
return
26792709

26802710
if case_name == "detached_fan_out_instance_raises_error_status_on_both_spans":
2681-
# Proposal 0061 §4.2, fan-out-instance variant: a raising detached
2682-
# fan-out instance surfaces ERROR on BOTH the parent's fan-out node
2683-
# span (parent trace, carrying the Link) and the per-instance detached
2684-
# invocation span (its own trace), each with the §4 category + an OTel
2685-
# exception event, sharing the parent invocation_id. The single-element
2686-
# fan-out (items [1]) means exactly one instance runs and raises.
2687-
fan_out_node_spans = [s for s in spans if s.name == "per_document_scoring"]
2688-
parent_fan_out = next((s for s in fan_out_node_spans if s.links), None)
2689-
assert parent_fan_out is not None, "expected a fan-out node span with a Link in the parent trace"
2690-
assert parent_fan_out.status.status_code.name == "ERROR", (
2691-
"parent fan-out node span MUST carry ERROR for a raising detached instance"
2692-
)
2693-
assert _has_exception_event(parent_fan_out), "parent fan-out node span MUST record the exception"
2694-
assert dict(parent_fan_out.attributes or {}).get("openarmature.error.category") == "node_exception"
2695-
assert len(parent_fan_out.links) == 1, (
2696-
f"fan-out node span MUST carry exactly one Link; got {len(parent_fan_out.links)}"
2697-
)
2698-
parent_trace_id = cast("Any", parent_fan_out.context).trace_id
2699-
detached_trace_id = parent_fan_out.links[0].context.trace_id
2700-
assert detached_trace_id != parent_trace_id, "detached instance + parent traces MUST be distinct"
2701-
inv_spans = [s for s in spans if s.name == "openarmature.invocation"]
2702-
detached_inv = next(
2703-
(s for s in inv_spans if cast("Any", s.context).trace_id == detached_trace_id), None
2704-
)
2705-
parent_inv = next((s for s in inv_spans if cast("Any", s.context).trace_id == parent_trace_id), None)
2706-
assert detached_inv is not None, (
2707-
"detached instance trace MUST root in an openarmature.invocation span"
2708-
)
2709-
assert parent_inv is not None
2710-
assert detached_inv.status.status_code.name == "ERROR", (
2711-
"detached instance invocation span MUST carry the instance's ERROR status (§4.2)"
2712-
)
2713-
assert _has_exception_event(detached_inv), (
2714-
"detached instance invocation span MUST record the exception"
2715-
)
2716-
assert dict(detached_inv.attributes or {}).get("openarmature.error.category") == "node_exception"
2717-
parent_iid = _invocation_id_of(parent_inv)
2718-
assert parent_iid is not None and _invocation_id_of(detached_inv) == parent_iid, (
2719-
"detached instance invocation span MUST share the parent's invocation_id"
2711+
# Fan-out-instance variant: the parent's fan-out node span
2712+
# ("per_document_scoring") carries the Link. The single-element fan-out
2713+
# (items [1]) means exactly one instance runs and raises -> one Link.
2714+
_assert_detached_raise_both_spans(
2715+
spans, parent_span_name="per_document_scoring", expected_link_count=1
27202716
)
27212717
return
27222718

tests/conformance/test_retrieval_provider.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,12 @@
6565
if 18 <= int(p.stem[:3]) <= 22
6666
},
6767
**{
68-
p.stem: "OpenAI-compatible embeddings wire mapping (proposal 0079) not implemented"
68+
p.stem: (
69+
"OpenAI-compatible embed wire ships via the bundled OpenAIEmbeddingProvider "
70+
"(proposal 0059); deferred because the harness lacks a wire-capture primitive "
71+
"(expected_wire_request / url / headers) and 0079's dimensions / input_type "
72+
"request knobs are unimplemented"
73+
)
6974
for p in CONFORMANCE_DIR.glob("[0-9][0-9][0-9]-*.yaml")
7075
if 23 <= int(p.stem[:3]) <= 27
7176
},

0 commit comments

Comments
 (0)