LunarCommand · chris-colinsky · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026 · Jun 29, 2026
diff --git a/conformance.toml b/conformance.toml
@@ -32,7 +32,7 @@
 
 [manifest]
 implementation = "openarmature-python"
-spec_pin = "v0.70.1"
+spec_pin = "v0.84.0"
 
 # Status values:
 #   implemented   — shipped behavior matches the proposal's contract
@@ -764,3 +764,101 @@ note = "ParallelBranchesNode gains two additive branch forms. (1) Inline-callabl
 status = "implemented"
 since = "0.15.0"
 note = "The model's output tool calls get an output-side home on the openarmature.llm.complete span. observability §5.5.10 adds the UNGATED identity projections openarmature.llm.output.tool_calls.count / .names / .ids (the class of openarmature.llm.model / attempt_index; emitted only on a tool-calling completion, omitted entirely otherwise -- not count=0); .names and .ids are index-aligned in request order, .count equals their length. §5.5.1 adds the GATED openarmature.llm.output.tool_calls, the full [{id, name, arguments}] serialization (reusing the §5.5.5 input tool-call encoding) carrying the arguments, suppressed under disable_provider_payload and subject to the truncation contract. graph-engine §6: LlmCompletionEvent gains an output_tool_calls field (the ToolCall records, populated unconditionally). python carries the field on BOTH the terminal LlmCompletionEvent (spec-conformance + the Langfuse/consumer path) and the python-internal per-attempt LlmRetryAttemptEvent, and the OTel observer renders the span attributes from the per-attempt event (the LLM-span source since 0050) -- mirroring how output_content already works. OA-namespace, no gen_ai.* mirror (the attempt_index precedent). Langfuse request-side mapping is OUT OF SCOPE (proposal defers it as future work); no Langfuse change. Fixtures 085 (two calls -> count/names/ids), 086 (no calls -> family absent), 087 (payload-gating: identity survives off, gated full present only on)."
+
+# Spec v0.71.0 (proposal 0062).  LLM completion streaming -- an opt-in
+# stream flag on complete() emitting a per-chunk LlmTokenEvent plus the
+# §6 streaming-assembly reassembly into the atomic Response.  Python has
+# not yet shipped streaming; v0.16.0 leaves it not-yet.  llm-provider
+# fixtures 059/060 and observability fixtures 111-118 defer with it.
+[proposals."0062"]
+status = "not-yet"
+
+# Spec v0.72.0 (proposal 0077).  Retrieval-provider TEI wire mapping
+# (§8 / §8.1) plus the input_type embedding knob (§2 / §3).  Python has
+# not yet shipped the retrieval wire mappings; not-yet.  retrieval-
+# provider fixtures 013-017 defer with it.
+[proposals."0077"]
+status = "not-yet"
+
+# Spec v0.73.0 (proposal 0078).  Retrieval-provider Jina hosted wire
+# mapping (§8.2).  Not-yet; retrieval-provider fixtures 018-022 defer
+# with it.
+[proposals."0078"]
+status = "not-yet"
+
+# Spec v0.74.0 (proposal 0079).  Retrieval-provider OpenAI-compatible
+# embeddings wire mapping (§8.3).  Not-yet; retrieval-provider fixtures
+# 023-027 defer with it.
+[proposals."0079"]
+status = "not-yet"
+
+# Spec v0.75.0 (proposal 0080).  PromptGroup arity enforcement
+# (prompt-management §10 / §11 -- construct-time raise plus the new
+# prompt_group_invalid error category).  Not-yet; prompt-management
+# fixture 035 defers with it.
+[proposals."0080"]
+status = "not-yet"
+
+# Spec v0.76.0 (proposal 0081).  Conformance-adapter value-matcher
+# vocabulary (§5.10) -- ratifies the fixture matcher tokens already in
+# use.  Descriptive (no fixture changes); the one tightening with teeth
+# (<any-string> = non-empty) is already enforced by the adapter
+# (tests/conformance/test_observability.py _value_matches rejects the
+# empty string). No module-level change required; matches the
+# conformance-adapter textual-only precedent (0055 / 0071).
+[proposals."0081"]
+status = "textual-only"
+since = "0.16.0"
+
+# Spec v0.77.0 (proposal 0082).  Structured-output failure diagnostics
+# (graph-engine §6 -- LlmFailedEvent response-side surface for
+# structured_output_invalid + llm-provider §7 finish_reason / usage).
+# Not-yet; llm-provider fixtures 022/023 and observability fixtures
+# 120-125 defer with it.
+[proposals."0082"]
+status = "not-yet"
+
+# Spec v0.78.0 (proposal 0083).  Per-prompt token-budget observability
+# (prompt-management §3 / graph-engine §6 / observability §5.5.15).
+# Not-yet; observability fixtures 126-131 defer with it.
+[proposals."0083"]
+status = "not-yet"
+
+# Spec v0.81.0 (proposal 0084).  Nested-fan-out span lineage chain
+# (graph-engine §6 fan_out_index_chain / branch_name_chain +
+# observability §4 / §5.5 lineage-resolved parent).  Not-yet; graph-
+# engine fixture 039 and observability fixtures 132-134 defer with it.
+[proposals."0084"]
+status = "not-yet"
+
+# Spec v0.80.0 (proposal 0085).  Nested-fan-out checkpoint resume
+# lineage (pipeline-utilities §10.11 enclosing_fan_out_lineage).
+[proposals."0085"]
+status = "partial"
+since = "0.16.0"
+note = "The SAVE-side enclosing_fan_out_lineage keying (pipeline-utilities §10.11) shipped in #194: a fan-out instance's checkpoint tracking key carries the enclosing fan-out instance lineage in the in-memory dict and through the checkpoint projection / lookup / cleanup / restore, so concurrent outer instances no longer collide. partial because the RESUME consume-side is not yet shipped: a fan-out nested inside an outer instance re-runs rather than skipping on resume, since the saved record format carries no lineage (tracked as a follow-up). pipeline-utilities fixture 076 is not collected by the test_pipeline_utilities.py _LAST_DRIVEN_FIXTURE number gate (it is not deferred); the resume consume-side plus its fixture wiring land in a later PR."
+
+# Spec v0.79.0 (proposal 0086).  Service-wide default cache_ttl_seconds
+# on PromptManager (prompt-management §6).  Not-yet; prompt-management
+# fixture 036 defers with it.
+[proposals."0086"]
+status = "not-yet"
+
+# Spec v0.82.0 (proposal 0087).  Conformance-adapter within-node
+# directive execution order (§8.3).  Not-yet; observability fixture 135
+# defers with it.
+[proposals."0087"]
+status = "not-yet"
+
+# Spec v0.83.0 (proposal 0088).  Langfuse parallel-branches mapping
+# parity (observability §8.4.8).  Not-yet; observability fixture 136
+# defers with it.
+[proposals."0088"]
+status = "not-yet"
+
+# Spec v0.84.0 (proposal 0089).  Embedding / rerank typed-event output
+# (graph-engine §6 EmbeddingEvent.output_vectors / RerankEvent.
+# output_results + observability output mappings).  Not-yet;
+# observability fixtures 137/138 defer with it (and re-source 083/108).
+[proposals."0089"]
+status = "not-yet"
diff --git a/openarmature-spec b/openarmature-spec
diff --git a/pyproject.toml b/pyproject.toml
@@ -63,7 +63,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec"
 openarmature = "openarmature.cli:main"
 
 [tool.openarmature]
-spec_version = "0.70.1"
+spec_version = "0.84.0"
 
 [dependency-groups]
 dev = [

diff --git a/src/openarmature/AGENTS.md b/src/openarmature/AGENTS.md
@@ -1,6 +1,6 @@
 # OpenArmature — Agent documentation
 
-*This is the agent guide bundled with the openarmature Python package, version 0.15.0 (spec v0.70.1). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
+*This is the agent guide bundled with the openarmature Python package, version 0.15.0 (spec v0.84.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
 
 ## TL;DR
 
@@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents:
 
 ## Capability contracts
 
-_Sourced from openarmature-spec v0.70.1. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md` verbatim — including additions from accepted proposals that this Python implementation may not yet ship. For per-proposal implementation status (implemented / partial / textual-only / not-yet), see the `conformance.toml` manifest at the repo root. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
+_Sourced from openarmature-spec v0.84.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md` verbatim — including additions from accepted proposals that this Python implementation may not yet ship. For per-proposal implementation status (implemented / partial / textual-only / not-yet), see the `conformance.toml` manifest at the repo root. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
 
 ### Capability: `graph-engine`
 
@@ -221,7 +221,7 @@ and is invisible to nodes that don't opt into middleware.
 
 **Middleware.** An async callable with the shape:
 
-```
+```python
 async def middleware(state, next) -> partial_update
 ```
 
@@ -262,13 +262,14 @@ second, and so on, with the original node at the inner end.
 
 For a chain `[m1, m2, m3]` wrapping node `n`, execution proceeds:
 
-```
-m1 sees state, calls next(s) ────► m2 sees state, calls next(s) ────► m3 sees state, calls next(s)
-                                                                                  │
-                                                                                  ▼
-                                                                                 n(state) → partial_update
-                                                                                  │
-m1 returns partial_update ◄──── m2 returns partial_update ◄──── m3 returns partial_update
+```mermaid
+flowchart LR
+    m1["m1 sees state,<br/>calls next(s)"] --> m2["m2 sees state,<br/>calls next(s)"]
+    m2 --> m3["m3 sees state,<br/>calls next(s)"]
+    m3 --> n["n(state) produces<br/>partial_update"]
+    n --> r3["m3 returns<br/>partial_update"]
+    r3 --> r2["m2 returns<br/>partial_update"]
+    r2 --> r1["m1 returns<br/>partial_update"]
 ```
 
 Each middleware's return value flows back through the previous layer's `next` call return.
@@ -283,7 +284,7 @@ The two phases are tied to a single position in the chain: if `m1` is outermost,
 runs first AND `m1`'s post-phase runs last. Pre-order and post-order are not configured
 independently. Concretely, a middleware function carries both phases:
 
-```
+```python
 async def my_middleware(state, next):
     # ── pre-node phase: runs on the way IN ──
     started_at = time.time()

diff --git a/src/openarmature/__init__.py b/src/openarmature/__init__.py
@@ -25,7 +25,7 @@
 """
 
 __version__ = "0.15.0"
-__spec_version__ = "0.70.1"
+__spec_version__ = "0.84.0"
 # Proposal 0052 (spec observability §5.1 / §8.4.1): canonical
 # package-registry name for this implementation. Surfaces on every
 # OTel invocation span as ``openarmature.implementation.name`` and on

diff --git a/tests/conformance/test_fixture_parsing.py b/tests/conformance/test_fixture_parsing.py
@@ -557,6 +557,78 @@ def _id(case: tuple[str, Path]) -> str:
     "observability/110-otel-callable-branch-span": (
         "Cross-capability parser doesn't model final_state + span_tree together; runs in test_observability"
     ),
+    # ----- v0.16.0 spec-pin bump (v0.70.1 -> v0.84.0) -----------------------
+    # New fixtures whose directive shapes the cross-capability parser doesn't
+    # model, for proposals deferred to their own later v0.16.0 PRs. Each runs
+    # (or stays accounted) in its capability runner once that proposal lands.
+    # Proposal 0062 (LLM completion streaming, v0.71.0) -- the stream-flag
+    # llm-provider wire fixtures + the per-chunk LlmTokenEvent observability
+    # fixtures. (117 parses cleanly and is accounted in test_observability.)
+    "llm-provider/059-openai-streaming-wire": "Proposal 0062 streaming; not implemented",
+    "llm-provider/060-stream-unsupported-mapping-rejects": "Proposal 0062 streaming; not implemented",
+    "observability/111-llm-token-event-dispatch-on-stream": "Proposal 0062 streaming; not implemented",
+    "observability/112-llm-token-event-absent-without-stream": "Proposal 0062 streaming; not implemented",
+    "observability/113-streamed-tool-call-reassembles-no-token-events": (
+        "Proposal 0062 streaming; not implemented"
+    ),
+    "observability/114-llm-token-event-then-failure-mid-stream": "Proposal 0062 streaming; not implemented",
+    "observability/115-llm-token-event-call-id-links-to-completion": (
+        "Proposal 0062 streaming; not implemented"
+    ),
+    "observability/116-llm-token-event-call-level-retry-one-call-id": (
+        "Proposal 0062 streaming; not implemented"
+    ),
+    "observability/118-llm-token-event-reasoning-delta-kind": "Proposal 0062 streaming; not implemented",
+    # Proposal 0075 (callable branches) coverage round-out fixture 119
+    # (v0.73.1); the cross-capability parser doesn't model its graph-style
+    # shape (cf. 110). Accounted in test_observability.
+    "observability/119-otel-callable-branch-attempt-index-under-node-retry": (
+        "Proposal 0075 callable-branch coverage round-out; harness shape not modelled"
+    ),
+    # Proposal 0082 (structured-output failure diagnostics, v0.77.0) -- the
+    # LlmFailedEvent response-side directive shape.
+    "observability/120-llm-failure-event-structured-output-truncation": (
+        "Proposal 0082 structured-output failure diagnostics; not implemented"
+    ),
+    "observability/121-llm-failure-event-structured-output-schema-mismatch": (
+        "Proposal 0082 structured-output failure diagnostics; not implemented"
+    ),
+    "observability/122-llm-failure-event-response-side-null-on-non-body-failure": (
+        "Proposal 0082 structured-output failure diagnostics; not implemented"
+    ),
+    # Proposal 0083 (per-prompt token-budget observability, v0.78.0) -- the
+    # token_budget directive shape + budget-exceeded expectations.
+    "observability/126-token-budget-input-exceeded": "Proposal 0083 token-budget; not implemented",
+    "observability/127-token-budget-total-exceeded": "Proposal 0083 token-budget; not implemented",
+    "observability/128-token-budget-under-budget-no-warning": "Proposal 0083 token-budget; not implemented",
+    "observability/129-token-budget-absent-unchanged": "Proposal 0083 token-budget; not implemented",
+    "observability/130-langfuse-token-budget-warning-level": "Proposal 0083 token-budget; not implemented",
+    "observability/131-token-budget-on-structured-output-failure": (
+        "Proposal 0083 token-budget; not implemented"
+    ),
+    # Proposal 0084 (nested-fan-out span lineage, v0.81.0) -- the
+    # lineage-chain directive shapes. (132 parses cleanly; accounted in
+    # test_observability.)
+    "observability/133-otel-nested-fan-out-orphan-llm-fallback": (
+        "Proposal 0084 nested-fan-out span lineage; not implemented"
+    ),
+    "observability/134-langfuse-nested-fan-out-parent-resolution": (
+        "Proposal 0084 nested-fan-out span lineage; not implemented"
+    ),
+    # Proposal 0087 (within-node directive execution order, v0.82.0).
+    "observability/135-within-node-directive-execution-order": (
+        "Proposal 0087 within-node directive execution order; not implemented"
+    ),
+    # Proposal 0089 (embedding / rerank typed-event output, v0.84.0) -- the
+    # rerank failure observation directive shape (rerank capability unshipped).
+    "observability/138-langfuse-rerank-failure-observation": (
+        "Proposal 0089 rerank failure observation; rerank capability not implemented"
+    ),
+    # Proposal 0086 (PromptManager default cache_ttl_seconds, v0.79.0) -- the
+    # manager default-cache-ttl directive shape.
+    "prompt-management/036-prompt-manager-default-cache-ttl": (
+        "Proposal 0086 default cache_ttl_seconds; not implemented"
+    ),
 }
 
 

diff --git a/tests/conformance/test_llm_provider.py b/tests/conformance/test_llm_provider.py
@@ -112,6 +112,27 @@
     "056-call-level-retry-transient": ("per-attempt LLM spans; see test_observability_otel.py"),
     "057-call-level-retry-exhaustion": ("per-attempt LLM spans; see test_observability_otel.py"),
     "058-call-level-retry-non-transient-no-retry": ("per-attempt LLM spans; see test_observability_otel.py"),
+    # ----- v0.16.0 spec-pin bump (v0.70.1 -> v0.84.0) -------------------
+    # Proposal 0082 (structured-output failure diagnostics, spec v0.77.0)
+    # extended fixtures 022/023 to additionally assert the now-required
+    # finish_reason + usage on the structured_output_invalid error. The
+    # base structured_output_invalid mapping still has coverage in
+    # tests/unit/test_structured_output.py::
+    # test_pydantic_validation_failure_wraps_in_structured_output_invalid;
+    # only 0082's additive finish_reason / usage-on-error response-side
+    # fields are unimplemented, which is why the conformance fixtures defer
+    # until a later v0.16.0 PR lands 0082.
+    "022-structured-output-parse-failure": (
+        "Proposal 0082 finish_reason/usage on structured_output_invalid; not implemented"
+    ),
+    "023-structured-output-validation-failure": (
+        "Proposal 0082 finish_reason/usage on structured_output_invalid; not implemented"
+    ),
+    # Proposal 0062 (LLM completion streaming, spec v0.71.0) -- the stream
+    # flag on complete() + SSE wire handling + the streaming-unsupported
+    # rejection. Unimplemented until a later v0.16.0 PR.
+    "059-openai-streaming-wire": "Proposal 0062 streaming; not implemented",
+    "060-stream-unsupported-mapping-rejects": "Proposal 0062 streaming; not implemented",
 }