From b1cd394c2b66f5610f6dd73a339ee61fa0b6e7d1 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 08:30:37 -0700
Subject: [PATCH 01/15] Strip spec refs from observability docstrings

---
 src/openarmature/observability/correlation.py |  16 +--
 .../observability/langfuse/__init__.py        |   2 +-
 .../observability/langfuse/client.py          |  17 +--
 .../observability/langfuse/observer.py        | 105 ++++++++++--------
 src/openarmature/observability/metadata.py    |  37 +++---
 5 files changed, 95 insertions(+), 82 deletions(-)

diff --git a/src/openarmature/observability/correlation.py b/src/openarmature/observability/correlation.py
index af515d7..bb8a434 100644
--- a/src/openarmature/observability/correlation.py
+++ b/src/openarmature/observability/correlation.py
@@ -139,14 +139,16 @@ def _reset_invocation_id(token: Token[str | None]) -> None:
 def validate_invocation_id(value: object) -> str:
     """Validate a caller-supplied ``invocation_id`` and return it.
 
-    Per observability §5.1 a caller-supplied id MAY be any non-empty
-    URL-safe string. Rejects empty / non-string / non-URL-safe values
-    at the ``invoke()`` boundary so the violation surfaces
-    synchronously to the caller rather than as a downstream trace-id
-    derivation failure. Typed ``object`` (like
-    :func:`validate_invocation_metadata`) so the boundary check guards
-    against untyped callers. Raises :class:`ValueError`.
+    A caller-supplied id MAY be any non-empty URL-safe string. Rejects
+    empty / non-string / non-URL-safe values at the ``invoke()``
+    boundary so the violation surfaces synchronously to the caller
+    rather than as a downstream trace-id derivation failure. Typed
+    ``object`` (like :func:`validate_invocation_metadata`) so the
+    boundary check guards against untyped callers. Raises
+    :class:`ValueError`.
     """
+    # Spec observability §5.1: a caller-supplied invocation_id MAY be
+    # any non-empty URL-safe string.
     if not isinstance(value, str):
         raise ValueError(f"invocation_id must be a string; got {type(value).__name__}")
     if not value:
diff --git a/src/openarmature/observability/langfuse/__init__.py b/src/openarmature/observability/langfuse/__init__.py
index 5657655..92e35c3 100644
--- a/src/openarmature/observability/langfuse/__init__.py
+++ b/src/openarmature/observability/langfuse/__init__.py
@@ -16,7 +16,7 @@
 Public surface:
 
 - :class:`LangfuseObserver` — observer-driven Langfuse Trace +
-  Observation emission per spec observability §8.
+  Observation emission.
 - :class:`LangfuseClient` — Protocol the observer calls. Satisfied by
   the bundled :class:`InMemoryLangfuseClient` and (structurally) by
   the real ``langfuse.Langfuse`` SDK class.
diff --git a/src/openarmature/observability/langfuse/client.py b/src/openarmature/observability/langfuse/client.py
index 8091b76..276498b 100644
--- a/src/openarmature/observability/langfuse/client.py
+++ b/src/openarmature/observability/langfuse/client.py
@@ -11,7 +11,7 @@
 
 """Langfuse client Protocol + in-memory recorder.
 
-The :class:`LangfuseObserver` consumes the §6 OA event stream and
+The :class:`LangfuseObserver` consumes the OA event stream and
 emits Langfuse Trace + Observation entities through a
 :class:`LangfuseClient`. The Protocol is intentionally narrow: it
 declares only the methods the observer calls. Concrete sinks:
@@ -55,7 +55,7 @@ class LangfuseObservation:
     Carries the observation's type-discriminated shape — Spans hold
     timing + metadata; Generations add model/parameters/usage/input/
     output/prompt-entity link; Events are point-in-time markers
-    (reserved per spec §8.2 — not used by this version of the mapping).
+    (reserved, not used by this version of the mapping).
     """
 
     id: str
@@ -161,7 +161,7 @@ class LangfuseClient(Protocol):
     closes.
 
     The Protocol does NOT define `event(...)` — Event observations
-    are reserved by §8.2 but not used in v0.23.0 of the mapping.
+    are reserved but not used by this mapping.
     """
 
     def trace(
@@ -173,10 +173,11 @@ def trace(
     ) -> None:
         """Create a new Trace.
 
-        The Trace `id` MUST be the OA invocation_id verbatim (§8.4.1).
+        The Trace `id` MUST be the OA invocation_id verbatim.
         Implementations track Traces internally; observation calls
         pass `trace_id` to associate.
         """
+        # Spec §8.4.1: the Trace id is the OA invocation_id verbatim.
         ...
 
     # The current observer doesn't invoke this method — it sets the
@@ -199,10 +200,10 @@ def update_trace(
         """Update an existing Trace's mutable fields after creation.
 
         Used by the observer when the caller-supplied invocation
-        label (§8.6) lands later than the Trace's open call, when
-        additional metadata becomes available mid-invocation, or
-        when the proposal 0043 invocation-boundary events populate
-        ``trace.input`` / ``trace.output``.
+        label lands later than the Trace's open call, when additional
+        metadata becomes available mid-invocation, or when the
+        invocation-boundary events populate ``trace.input`` /
+        ``trace.output``.
         """
         ...
 
diff --git a/src/openarmature/observability/langfuse/observer.py b/src/openarmature/observability/langfuse/observer.py
index 384fc6c..61f0de5 100644
--- a/src/openarmature/observability/langfuse/observer.py
+++ b/src/openarmature/observability/langfuse/observer.py
@@ -97,10 +97,10 @@ def _read_implementation_version() -> str:
 class _OpenObservation:
     """An in-flight Langfuse observation pinned in the observer's state.
 
-    Per proposal 0045: carries the observation's own
-    ``fan_out_index_chain`` and ``branch_name_chain`` so the
-    augmentation walk can apply §3.4's lineage-aware boundary rule
-    (mirror of the OTel observer's ``_OpenSpan``)."""
+    Carries the observation's own ``fan_out_index_chain`` and
+    ``branch_name_chain`` so the augmentation walk can apply the
+    lineage-aware boundary rule (mirror of the OTel observer's
+    ``_OpenSpan``)."""
 
     handle: LangfuseSpanHandle | LangfuseGenerationHandle
     fan_out_index_chain: tuple[int | None, ...] = ()
@@ -138,22 +138,23 @@ def _empty_str_frozenset() -> frozenset[str]:
 
 def _apply_caller_metadata(metadata: dict[str, Any], caller_metadata: Mapping[str, Any]) -> None:
     """Merge caller-supplied invocation metadata into a Trace's or
-    Observation's metadata bag at top level per observability §8.4.1
-    + §8.4.2 (proposal 0034).
+    Observation's metadata bag at top level.
 
-    Top-level placement is by spec: Langfuse UI filters on
+    Top-level placement lets the Langfuse UI filter on
     ``metadata.<key>`` directly, so caller-supplied entries become
     siblings to ``correlation_id`` / ``entry_node`` rather than
     nested under a ``user`` sub-object.
 
-    Reserved-key collision with §8.4.1 / §8.4.2 keys
+    Reserved-key collision with the OA-emitted keys
     (``correlation_id``, ``entry_node``, ``spec_version``,
-    ``namespace``, etc.) is not currently checked here: the spec
-    permits the rejection to happen at either boundary, and the
-    ``invoke()`` API-boundary validation already rejects
-    ``openarmature.*`` / ``gen_ai.*`` prefixed keys. Per-Langfuse-
-    backend collision rejection is queued as a follow-up.
+    ``namespace``, etc.) is not currently checked here: the rejection
+    may happen at either boundary, and the ``invoke()`` API-boundary
+    validation already rejects ``openarmature.*`` / ``gen_ai.*``
+    prefixed keys. Per-Langfuse-backend collision rejection is queued
+    as a follow-up.
     """
+    # Spec observability §8.4.1 / §8.4.2 (proposal 0034): top-level
+    # placement of caller-supplied metadata on the Trace / Observation.
     for key, value in caller_metadata.items():
         metadata[key] = value
 
@@ -163,15 +164,16 @@ def _subgraph_identity_at(event: NodeEvent, depth: int) -> str:
     given 1-based namespace depth, or the empty string when no
     identity is tracked at that depth.
 
-    Per observability §5.3 + the coord-thread
-    ``clarify-subgraph-name-semantics`` resolution: the empty-string
-    fallback matches the spec's "if the implementation tracks one"
-    clause for implementations / direct ``SubgraphNode(...)`` callers
-    that don't wire an identity through. Conformance fixtures
-    031/032/033 lock identity as the required value; the empty-string
-    path keeps direct callers conformant with §5.3 but failing those
-    fixtures.
+    The empty-string fallback matches the spec's "if the
+    implementation tracks one" clause for implementations / direct
+    ``SubgraphNode(...)`` callers that don't wire an identity through.
+    Conformance fixtures 031/032/033 lock identity as the required
+    value; the empty-string path keeps direct callers conformant but
+    failing those fixtures.
     """
+    # Spec observability §5.3 (coord thread
+    # clarify-subgraph-name-semantics): empty-string fallback is
+    # conformant for callers that don't track a subgraph identity.
     idx = depth - 1
     if 0 <= idx < len(event.subgraph_identities):
         identity = event.subgraph_identities[idx]
@@ -254,12 +256,12 @@ class _InvState:
 
 @dataclass
 class LangfuseObserver:
-    """Observer-driven Langfuse mapping per spec observability §8.
+    """Observer-driven Langfuse mapping.
 
     Construct with a :class:`LangfuseClient` — the bundled
     :class:`InMemoryLangfuseClient` for tests, or a real
     ``langfuse.Langfuse()`` instance for production. The observer
-    handles the §6 event stream and emits Trace + Observation entities
+    handles the event stream and emits Trace + Observation entities
     through the client.
 
     Constructor knobs:
@@ -267,34 +269,34 @@ class LangfuseObserver:
     - ``client``: the Langfuse sink (Protocol-typed).
     - ``disable_llm_spans``: when ``True`` the observer skips
       Generation observations on LLM provider events.
-    - ``disable_provider_payload``: default ``True`` per §8.9's "symmetric
-      privacy posture" with the OTel observer. Gates
+    - ``disable_provider_payload``: default ``True`` for a symmetric
+      privacy posture with the OTel observer. Gates
       ``generation.input`` / ``output`` / ``metadata.request_extras``
       emission. The name carries the broadened provider-payload scope;
       LLM completion is OA's only provider-call payload today.
     - ``payload_byte_cap``: per-attribute byte cap on the source
       payload string before parse-back. Mirrors the OTel observer's
       ``payload_max_bytes`` semantic — emission preserves the raw
-      truncated string when the §5.5.5 marker is present (per §8.7).
-      Default 64 KiB; same minimum (256 bytes) applies.
+      truncated string when the truncation marker is present. Default
+      64 KiB; same minimum (256 bytes) applies.
     - ``detached_subgraphs``: set of subgraph wrapper node names that
-      run in their own Langfuse Trace per §8.5. Each such subgraph
-      gets a fresh trace_id; the main Trace's dispatch observation
-      surfaces the link via ``metadata.detached_child_trace_ids``.
+      run in their own Langfuse Trace. Each such subgraph gets a fresh
+      trace_id; the main Trace's dispatch observation surfaces the link
+      via ``metadata.detached_child_trace_ids``.
     - ``detached_fan_outs``: set of fan-out node names whose instances
       each get their own Langfuse Trace. Same link mechanism on the
       fan-out node observation: each per-instance detached trace_id
       lands in the array.
-    - ``disable_state_payload``: default ``True`` per §8.4.1 *Trace
-      input/output sourcing* (proposal 0043). When ``True`` the
-      observer does NOT serialize ``initial_state`` / final state
-      directly onto ``trace.input`` / ``trace.output``; the minimal
-      stub applies unless ``trace_input_from_state`` /
-      ``trace_output_from_state`` overrides. When ``False`` the raw
-      state object is serialized to the Trace fields, subject to
-      ``payload_byte_cap`` truncation. Independent of
-      ``disable_provider_payload`` — the two payloads carry distinct
-      threat models (LLM-call transcript vs. application state).
+    - ``disable_state_payload``: default ``True`` (Trace input/output
+      sourcing). When ``True`` the observer does NOT serialize
+      ``initial_state`` / final state directly onto ``trace.input`` /
+      ``trace.output``; the minimal stub applies unless
+      ``trace_input_from_state`` / ``trace_output_from_state``
+      overrides. When ``False`` the raw state object is serialized to
+      the Trace fields, subject to ``payload_byte_cap`` truncation.
+      Independent of ``disable_provider_payload`` — the two payloads
+      carry distinct threat models (LLM-call transcript vs.
+      application state).
     - ``trace_input_from_state``: optional caller hook returning the
       value to use as ``trace.input``. Called once per invocation at
       the ``InvocationStartedEvent``. Returning ``None`` falls
@@ -309,8 +311,8 @@ class LangfuseObserver:
       parameterization.
     - ``implementation_version``: string surfaced as
       ``trace.metadata.implementation_version`` on every Trace.
-      Defaults to ``openarmature.__version__``. Always-emit invariant
-      inherited from §5.1 — not gated by ``disable_state_payload``,
+      Defaults to ``openarmature.__version__``. Always emitted —
+      not gated by ``disable_state_payload``,
       ``disable_provider_payload``, or any other privacy knob.
 
     The observer reads the spec version from the package at
@@ -319,6 +321,11 @@ class LangfuseObserver:
     state isolation keys all internal maps by invocation_id.
     """
 
+    # Spec observability §8 (Langfuse backend mapping). Knob spec
+    # basis: §8.9 privacy posture; §8.4.1 Trace input/output sourcing
+    # (proposal 0043); §8.5 detached traces; §5.1 always-emit
+    # attribution invariant.
+
     client: LangfuseClient
     disable_llm_spans: bool = False
     disable_provider_payload: bool = True
@@ -1451,9 +1458,9 @@ def _handle_typed_llm_completion(self, event: LlmCompletionEvent) -> None:
 
     def _handle_typed_llm_failed(self, event: LlmFailedEvent) -> None:
         """Open + close an ERROR-level Generation observation from the
-        typed LlmFailedEvent (failure path, proposal 0058). Same shape
-        as the success path with ERROR level + error_category as the
-        Generation observation's statusMessage."""
+        typed LlmFailedEvent (failure path). Same shape as the success
+        path with ERROR level + error_category as the Generation
+        observation's statusMessage."""
         from openarmature.observability.correlation import (
             current_correlation_id,
             current_invocation_id,
@@ -1600,8 +1607,9 @@ def _typed_event_metadata(
         return metadata
 
     def _usage_from_typed_event(self, event: LlmCompletionEvent) -> LangfuseUsage | None:
-        """Map the typed event's Usage onto the Langfuse Usage record
-        per §8.4.3. Returns None when no usage was reported."""
+        """Map the typed event's Usage onto the Langfuse Usage record.
+        Returns None when no usage was reported."""
+        # Spec observability §8.4.3 (Langfuse usage mapping).
         usage = event.usage
         if usage is None:
             return None
@@ -1614,8 +1622,9 @@ def _usage_from_typed_event(self, event: LlmCompletionEvent) -> LangfuseUsage |
         )
 
     def _resolve_prompt_link_from_typed_event(self, event: LlmCompletionEvent | LlmFailedEvent) -> Any:
-        """§8.4.4 case discrimination on the typed event's active_prompt
+        """Case discrimination on the typed event's active_prompt
         snapshot."""
+        # Spec observability §8.4.4.
         active_prompt = event.active_prompt
         if active_prompt is None:
             return None
diff --git a/src/openarmature/observability/metadata.py b/src/openarmature/observability/metadata.py
index e2c381b..10f775d 100644
--- a/src/openarmature/observability/metadata.py
+++ b/src/openarmature/observability/metadata.py
@@ -7,7 +7,7 @@
 # at the ``invoke()`` boundary and at mid-invocation augmentation
 # via ``set_invocation_metadata``.
 
-"""Caller-supplied invocation metadata (proposal 0034).
+"""Caller-supplied invocation metadata.
 
 Two surfaces:
 
@@ -30,8 +30,8 @@
   for spec-normative attribute namespaces; collisions would silently
   overwrite OA-emitted state at the observer layer).
 - Keys MUST NOT exactly match a reserved OA-emitted top-level metadata
-  key name (the §8.4 Langfuse set plus ``invocation_id``; proposal
-  0041) for the same collision reason.
+  key name (the Langfuse set plus ``invocation_id``) for the same
+  collision reason.
 - Values MUST be OTel-attribute-compatible scalars: ``str``, ``int``,
   ``float``, ``bool``, or a homogeneous list/tuple of those types.
   ``None``, nested objects, and mixed-type arrays are rejected.
@@ -123,11 +123,10 @@ def current_invocation_metadata() -> MappingProxyType[str, AttributeValue]:
     callers MUST NOT mutate it. Use :func:`set_invocation_metadata`
     to add entries.
 
-    Aliased as :func:`get_invocation_metadata` per spec §3.4 (proposal
-    0048, v0.40.0); the alias is the canonical spec-idiomatic name
-    paralleling :func:`set_invocation_metadata`. Both names point at
-    the same function — pick whichever reads naturally at the call
-    site.
+    Aliased as :func:`get_invocation_metadata`; the alias is the
+    canonical idiomatic name paralleling :func:`set_invocation_metadata`.
+    Both names point at the same function — pick whichever reads
+    naturally at the call site.
     """
     return _invocation_metadata_var.get()
 
@@ -146,10 +145,10 @@ def set_invocation_metadata(**entries: AttributeValue) -> None:
     metadata. Additive: existing keys with the same names are
     overwritten; other keys are preserved.
 
-    Per spec §3.4: affects spans / observations emitted AFTER the
-    call returns. Open observations whose lineage covers the calling
-    context ARE updated in place per proposal 0040 — implementations
-    enqueue a :class:`~openarmature.graph.events.MetadataAugmentationEvent`
+    Affects spans / observations emitted AFTER the call returns. Open
+    observations whose lineage covers the calling context ARE updated
+    in place: implementations enqueue a
+    :class:`~openarmature.graph.events.MetadataAugmentationEvent`
     on the engine's serial observer-delivery queue carrying the
     delta + the calling context's lineage tuple (namespace,
     attempt_index, fan_out_index, branch_name); observers correlate
@@ -169,10 +168,11 @@ def set_invocation_metadata(**entries: AttributeValue) -> None:
     symmetry; users typically call this from inside a node body,
     middleware, or observer where an invocation is already in flight.
 
-    Symmetric with :func:`get_invocation_metadata` (proposal 0048,
-    spec §3.4 v0.40.0) which returns an immutable snapshot of the
-    current async context's view.
+    Symmetric with :func:`get_invocation_metadata`, which returns an
+    immutable snapshot of the current async context's view.
     """
+    # Spec observability §3.4: additive merge, affecting only spans /
+    # observations emitted after this call returns.
     if not entries:
         return
     for key, value in entries.items():
@@ -226,13 +226,14 @@ def validate_invocation_metadata(mapping: object) -> MappingProxyType[str, Attri
     read-only view the engine stashes on the ContextVar.
 
     Public so the engine (`CompiledGraph.invoke`) calls this at the
-    boundary BEFORE any work begins; per spec §3.4 the rejection
-    surfaces as a synchronous error to the caller of ``invoke()``
-    rather than as a backend-emission failure.
+    boundary BEFORE any work begins; the rejection surfaces as a
+    synchronous error to the caller of ``invoke()`` rather than as a
+    backend-emission failure.
 
     Returns the validated read-only mapping. Raises :class:`ValueError`
     on any rule violation (with a message naming the offending key).
     """
+    # Spec observability §3.4: boundary validation, synchronous rejection.
     if mapping is None:
         return _EMPTY_METADATA
     if not isinstance(mapping, dict):

From 113eb3cfb9aa4ee03b1c0ec625725d139dbbc5b8 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 10:29:32 -0700
Subject: [PATCH 02/15] Strip spec refs from otel observer docstrings

---
 .../observability/otel/observer.py            | 76 +++++++++----------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/src/openarmature/observability/otel/observer.py b/src/openarmature/observability/otel/observer.py
index 39c3832..b1b6b20 100644
--- a/src/openarmature/observability/otel/observer.py
+++ b/src/openarmature/observability/otel/observer.py
@@ -166,8 +166,7 @@ def _read_implementation_version() -> str:
 
 def _apply_caller_metadata(attrs: dict[str, Any], metadata: Mapping[str, Any]) -> None:
     """Merge caller-supplied invocation metadata into a span's
-    attribute dict as ``openarmature.user.<key>`` entries per
-    observability §5.6.
+    attribute dict as ``openarmature.user.<key>`` entries.
 
     Called at every span-emission site so the metadata family is
     cross-cutting (invocation span, every node span, subgraph
@@ -187,12 +186,13 @@ def _subgraph_identity_at(event: NodeEvent, depth: int) -> str:
     given 1-based namespace depth, or the empty string when no
     identity is tracked at that depth.
 
-    Per observability §5.3 + the coord-thread
-    ``clarify-subgraph-name-semantics`` resolution: empty-string
-    fallback matches the spec's "if the implementation tracks one"
-    clause for callers using ``SubgraphNode(name=..., compiled=...)``
-    without supplying ``subgraph_identity``.
+    The empty-string fallback matches the spec's "if the implementation
+    tracks one" clause for callers using
+    ``SubgraphNode(name=..., compiled=...)`` without supplying
+    ``subgraph_identity``.
     """
+    # Spec observability §5.3 (coord thread
+    # clarify-subgraph-name-semantics).
     idx = depth - 1
     if 0 <= idx < len(event.subgraph_identities):
         identity = event.subgraph_identities[idx]
@@ -214,10 +214,10 @@ class _OpenSpan:
     single event handler's scope, so no token needs to live across
     events.
 
-    Per proposal 0045: carries the span's own ``fan_out_index_chain``
-    and ``branch_name_chain`` so the augmentation walk can apply
-    §3.4's lineage-aware boundary rule without re-deriving the chain
-    from successive events."""
+    Carries the span's own ``fan_out_index_chain`` and
+    ``branch_name_chain`` so the augmentation walk can apply the
+    lineage-aware boundary rule without re-deriving the chain from
+    successive events."""
 
     span: Span
     fan_out_index_chain: tuple[int | None, ...] = ()
@@ -231,7 +231,7 @@ def _span_chain_on_path(
 ) -> bool:
     """Return True iff ``open_span``'s chain is a prefix-match of the
     augmenter's chain — i.e., the span sits on the augmenter's
-    call-stack ancestor path.  Per proposal 0045 §3.4:
+    call-stack ancestor path:
 
     - A span shorter than the augmenter (chain prefix-matches) is an
       ancestor on the path.
@@ -240,6 +240,7 @@ def _span_chain_on_path(
     - A span deeper than the augmenter, OR with a position-mismatch
       anywhere, is a sibling and MUST NOT be updated.
     """
+    # Spec observability §3.4 (proposal 0045): lineage-aware boundary.
     span_fi = open_span.fan_out_index_chain
     span_bn = open_span.branch_name_chain
     if len(span_fi) > len(aug_fi_chain):
@@ -385,7 +386,7 @@ class _InvState:
 
 @dataclass
 class OTelObserver:
-    """Observer-driven OTel span lifecycle per spec observability §6.
+    """Observer-driven OTel span lifecycle.
 
     Construct with a :class:`SpanProcessor` (typically a
     :class:`BatchSpanProcessor` wrapping a real exporter, or a
@@ -443,6 +444,7 @@ class OTelObserver:
     event handler's scope.
     """
 
+    # Spec observability §6 (observer-driven span lifecycle).
     # span_processor accepts a single processor or a sequence per
     # observability friction-roundup #5. The dataclass field type is
     # the union; ``__post_init__`` normalizes to a tuple internally.
@@ -764,7 +766,7 @@ def _open_started_span(self, event: NodeEvent) -> None:
         )
 
     def _handle_completed(self, event: NodeEvent) -> None:
-        """Close the matching span, applying §4.2 status mapping."""
+        """Close the matching span, applying the status mapping."""
         from openarmature.observability.correlation import current_invocation_id
 
         invocation_id = current_invocation_id()
@@ -942,8 +944,7 @@ def _collect_augmentation_targets(
         self, invocation_id: str, event: MetadataAugmentationEvent
     ) -> list[Span]:
         """Collect open spans on the augmenter's call-stack ancestor
-        chain per proposal 0045 §3.4.  Three-step boundary decision
-        tree per open span:
+        chain.  Three-step boundary decision tree per open span:
 
         1. Same context as augmenter (or descendant sharing the
            mutated mapping) — update.
@@ -1060,17 +1061,17 @@ def _collect_augmentation_targets(
     # ------------------------------------------------------------------
 
     def _emit_checkpoint_migrate_span(self, event: NodeEvent) -> None:
-        """Spec pipeline-utilities §6 cross-ref (proposal 0014): emit a
-        zero-duration ``openarmature.checkpoint.migrate`` span when
-        a versioned resume's migration chain runs. The synthetic
-        event carries ``_MigrationSummary`` on ``pre_state``; this
-        handler reads ``from_version`` / ``to_version`` /
+        """Emit a zero-duration ``openarmature.checkpoint.migrate``
+        span when a versioned resume's migration chain runs. The
+        synthetic event carries ``_MigrationSummary`` on ``pre_state``;
+        this handler reads ``from_version`` / ``to_version`` /
         ``chain_length`` from the summary onto the span.
 
         Emitted under the invocation's root span (no parent-node
         context — the migration runs before any node fires), so
         trace UIs surface it as the first child of the invocation.
         """
+        # Spec pipeline-utilities §6 cross-ref (proposal 0014).
         from openarmature.graph.compiled import _MigrationSummary
         from openarmature.observability.correlation import (
             current_correlation_id,
@@ -1117,10 +1118,10 @@ def _emit_checkpoint_migrate_span(self, event: NodeEvent) -> None:
         span.end()
 
     def _emit_checkpoint_save_span(self, event: NodeEvent) -> None:
-        """Spec pipeline-utilities §10.8 + observability §4.5: emit a
-        zero-duration ``openarmature.checkpoint.save`` span attached
-        to the most-recently-opened node span (the node whose
+        """Emit a zero-duration ``openarmature.checkpoint.save`` span
+        attached to the most-recently-opened node span (the node whose
         completed event triggered the save)."""
+        # Spec pipeline-utilities §10.8 + observability §4.5.
         from openarmature.observability.correlation import (
             current_correlation_id,
             current_invocation_id,
@@ -1313,8 +1314,8 @@ def _handle_typed_llm_completion(self, event: LlmCompletionEvent) -> None:
 
     def _handle_typed_llm_failed(self, event: LlmFailedEvent) -> None:
         """Open + close the ``openarmature.llm.complete`` span from the
-        typed LlmFailedEvent (failure path, proposal 0058). Same span
-        shape as the success path with ERROR status +
+        typed LlmFailedEvent (failure path). Same span shape as the
+        success path with ERROR status +
         ``openarmature.error.category`` attribute attached."""
         from openarmature.observability.correlation import (
             current_correlation_id,
@@ -1605,16 +1606,17 @@ def _sync_subgraph_spans(
         correlation_id: str | None,
         event: NodeEvent,
     ) -> None:
-        """Open any synthetic subgraph dispatch spans we need (per
-        observability §4.5: subgraph wrapper MUST emit a span); close
-        any subgraph spans whose prefix is no longer an ancestor of
-        the current event's namespace.
+        """Open any synthetic subgraph dispatch spans we need (the
+        subgraph wrapper MUST emit a span); close any subgraph spans
+        whose prefix is no longer an ancestor of the current event's
+        namespace.
 
         Called from ``_open_started_span`` BEFORE opening the leaf
         node span. Detached-mode entries (subgraph or fan-out instance)
         are registered as detached roots so their inner spans live
         in a fresh trace.
         """
+        # Spec observability §4.5: the subgraph wrapper emits a span.
         namespace = event.namespace
         # 1. Close any open subgraph spans that aren't ancestors of
         #    the current namespace — we've left those subgraphs.
@@ -2015,10 +2017,9 @@ def _open_fan_out_instance_dispatch_span(
         prefix: tuple[str, ...],
         event: NodeEvent,
     ) -> None:
-        """Per-instance dispatch span for a non-detached fan-out
-        (per spec §5.4 + proposal 0013, v0.10.0). Mirror of
-        ``_open_detached_fan_out_instance_root`` but lives in the
-        parent trace (no fresh trace_id).
+        """Per-instance dispatch span for a non-detached fan-out.
+        Mirror of ``_open_detached_fan_out_instance_root`` but lives in
+        the parent trace (no fresh trace_id).
 
         Parents under the fan-out node span at ``prefix``. Span name
         is the fan-out node's name; attributes are
@@ -2084,9 +2085,8 @@ def _open_parallel_branches_branch_dispatch_span(
         prefix: tuple[str, ...],
         event: NodeEvent,
     ) -> None:
-        """Per-branch dispatch span for a parallel-branches NODE (per
-        observability §5.7 + proposal 0044, v0.36.0).  Mirror of
-        ``_open_fan_out_instance_dispatch_span``.
+        """Per-branch dispatch span for a parallel-branches NODE.
+        Mirror of ``_open_fan_out_instance_dispatch_span``.
 
         Parents under the parallel-branches node span at ``prefix``.
         Span name is the branch's identifier (``event.branch_name``).
@@ -2209,7 +2209,7 @@ def _find_fan_out_node_span(self, inv_state: _InvState, prefix: tuple[str, ...])
         return None
 
     def _node_attrs(self, event: NodeEvent, correlation_id: str | None) -> dict[str, Any]:
-        """Build the §5 attribute set for a node span."""
+        """Build the attribute set for a node span."""
         attrs: dict[str, Any] = {
             "openarmature.node.name": event.node_name,
             "openarmature.node.namespace": list(event.namespace),

From 311c23f2b26f9e0072021936060520a95e0d3e17 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 10:32:49 -0700
Subject: [PATCH 03/15] Strip spec refs from llm docstrings

---
 src/openarmature/llm/messages.py         |  8 +++----
 src/openarmature/llm/provider.py         | 17 ++++++++-------
 src/openarmature/llm/providers/openai.py | 27 ++++++++++++------------
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/src/openarmature/llm/messages.py b/src/openarmature/llm/messages.py
index 9bc32a8..f52ba2e 100644
--- a/src/openarmature/llm/messages.py
+++ b/src/openarmature/llm/messages.py
@@ -77,10 +77,10 @@ class Tool(BaseModel):
 class ForceTool(BaseModel):
     """Force the model to call exactly the named tool.
 
-    Use the record form of the §5 `tool_choice` discriminated union
-    when you need the model to call a specific tool by name. ``type``
-    is the spec-level discriminator (``"tool"``); the wire mapping
-    (§8.1.1) renames it to ``"function"`` for the OpenAI body. The
+    Use the record form of the `tool_choice` discriminated union when
+    you need the model to call a specific tool by name. ``type`` is the
+    spec-level discriminator (``"tool"``); the wire mapping renames it
+    to ``"function"`` for the OpenAI body. The
     ``name`` MUST match a ``Tool.name`` in the supplied ``tools``
     list; ``validate_tool_choice`` enforces this at pre-send time and
     raises ``ProviderInvalidRequest`` on violation.
diff --git a/src/openarmature/llm/provider.py b/src/openarmature/llm/provider.py
index 71e03ac..59ae808 100644
--- a/src/openarmature/llm/provider.py
+++ b/src/openarmature/llm/provider.py
@@ -100,8 +100,8 @@ async def complete(
                 supplied, the implementation constrains the model's
                 output to the schema and populates ``Response.parsed``
                 with the validated value.
-            tool_choice: Optional tool-choice constraint (spec §5). One
-                of ``"auto"``, ``"required"``, ``"none"``, or a
+            tool_choice: Optional tool-choice constraint. One of
+                ``"auto"``, ``"required"``, ``"none"``, or a
                 :class:`ForceTool` record. When ``None`` (the default)
                 the wire ``tool_choice`` field is omitted and the
                 provider's own default applies. Pre-send validation
@@ -213,9 +213,9 @@ def validate_tool_choice(
     tool_choice: ToolChoice | None,
     tools: Sequence[Tool] | None,
 ) -> None:
-    """Validate ``tool_choice`` against ``tools`` per spec §5.
+    """Validate ``tool_choice`` against ``tools``.
 
-    Raises :class:`ProviderInvalidRequest` (the §7
+    Raises :class:`ProviderInvalidRequest` (the
     ``provider_invalid_request`` category) on:
 
     - ``tool_choice`` supplied as a string that is not one of
@@ -229,11 +229,12 @@ def validate_tool_choice(
     - ``tool_choice=ForceTool(name=X)`` supplied with ``X`` not in the
       supplied tools list.
 
-    No-op when ``tool_choice`` is ``None`` (the default — preserves
-    pre-0025 behavior; the wire field is omitted and the provider's
-    own default applies). ``tool_choice="auto"`` and
-    ``tool_choice="none"`` have no ``tools``-related preconditions.
+    No-op when ``tool_choice`` is ``None`` (the default — the wire
+    field is omitted and the provider's own default applies).
+    ``tool_choice="auto"`` and ``tool_choice="none"`` have no
+    ``tools``-related preconditions.
     """
+    # Spec llm-provider §5 (tool_choice) / §7 (provider_invalid_request).
     if tool_choice is None:
         return
     # Two-layer type defense at the API boundary. Pyright catches the
diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py
index b1551fb..8c86cb4 100644
--- a/src/openarmature/llm/providers/openai.py
+++ b/src/openarmature/llm/providers/openai.py
@@ -370,7 +370,7 @@ async def complete(
         class; when supplied as a JSON Schema dict,
         ``Response.parsed`` is the deserialized dict.
 
-        ``tool_choice`` is validated against ``tools`` per spec §5:
+        ``tool_choice`` is validated against ``tools``:
         ``"required"`` and the ``ForceTool`` record both demand
         non-empty ``tools``, and ``ForceTool.name`` must appear in the
         supplied list. Violations raise ``provider_invalid_request``
@@ -589,7 +589,7 @@ def _build_llm_completion_event(
 
         Sources identity / scoping fields from the calling-node
         ContextVars and outcome fields from the response. Request-side
-        fields (per proposal 0057) are passed through from the
+        fields are passed through from the
         provider's complete() local state — serialized message list,
         the gen_ai.request.* parameter mapping, the RuntimeConfig
         extras, the prompt-context snapshots taken at dispatch time,
@@ -669,17 +669,17 @@ def _build_llm_failed_event(
         """Construct the typed LlmFailedEvent for the failure path.
 
         Sources identity / scoping fields from the calling-node
-        ContextVars and failure fields from the raised §7 exception.
+        ContextVars and failure fields from the raised exception.
         Field set mirrors LlmCompletionEvent (identity + request-side)
-        plus the three failure-specific fields per proposal 0058.
+        plus the three failure-specific fields.
 
         ``error_type`` defaults to the exception class name — falls
         into the "upstream exception class name" style documented in
-        the spec field table. Providers that have a vendor error code
+        the field table. Providers that have a vendor error code
         available (e.g. ``rate_limit_exceeded`` for OpenAI) can
-        override with vendor-specific detail in a future spec
-        proposal; for now the class name is the safest default since
-        every LlmProviderError subclass carries one.
+        override with vendor-specific detail in a future proposal; for
+        now the class name is the safest default since every
+        LlmProviderError subclass carries one.
         """
 
         namespace = current_namespace_prefix()
@@ -1178,7 +1178,8 @@ def _augment_messages_with_schema_directive(
 
 
 def _message_to_wire(msg: Message) -> dict[str, Any]:
-    """Spec §8.1.1 request mapping for one message."""
+    """Request mapping for one message."""
+    # Spec llm-provider §8.1.1.
     if isinstance(msg, SystemMessage):
         return {"role": "system", "content": msg.content}
     if isinstance(msg, UserMessage):
@@ -1299,14 +1300,14 @@ def _tool_to_wire(tool: Tool) -> dict[str, Any]:
 
 
 def _wire_to_assistant_message(wire: dict[str, Any], *, lenient_args: bool) -> AssistantMessage:
-    """Parse OpenAI-shaped assistant message into spec §3 form.
+    """Parse an OpenAI-shaped assistant message into canonical form.
 
     When ``lenient_args=True`` (i.e. ``finish_reason == "error"``),
     tool calls with unparseable JSON arguments populate
-    ``arguments=None`` instead of raising. Per spec §3 "Validation
-    under finish_reason: error" — degraded responses surface what
-    they can; repair is a caller concern.
+    ``arguments=None`` instead of raising — degraded responses surface
+    what they can; repair is a caller concern.
     """
+    # Spec llm-provider §3: validation under finish_reason "error".
     content_raw = wire.get("content") or ""
     content: str = content_raw if isinstance(content_raw, str) else ""
     raw_tool_calls = cast("list[Any]", wire.get("tool_calls") or [])

From 1375daca6ddf85c666dfb2e75492631fb10f0653 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 10:38:01 -0700
Subject: [PATCH 04/15] Strip spec refs from prompts docstrings

---
 .../prompts/backends/filesystem.py            |  4 +-
 src/openarmature/prompts/backends/langfuse.py | 13 ++---
 src/openarmature/prompts/context.py           | 15 +++---
 src/openarmature/prompts/errors.py            |  4 +-
 src/openarmature/prompts/hashing.py           |  2 +-
 src/openarmature/prompts/label_resolver.py    |  8 +--
 src/openarmature/prompts/manager.py           | 34 +++++++------
 src/openarmature/prompts/prompt.py            | 49 +++++++++----------
 8 files changed, 66 insertions(+), 63 deletions(-)

diff --git a/src/openarmature/prompts/backends/filesystem.py b/src/openarmature/prompts/backends/filesystem.py
index 0e0ef48..6b555a4 100644
--- a/src/openarmature/prompts/backends/filesystem.py
+++ b/src/openarmature/prompts/backends/filesystem.py
@@ -21,7 +21,7 @@ class FilesystemPromptBackend:
     - ``layout="per-label"`` (default): ``<root>/<label>/<name>.j2``.
       The ``label`` subdirectory keeps name-collisions across labels
       distinct (e.g., ``prompts/production/greeting.j2`` and
-      ``prompts/staging/greeting.j2``). Spec §5 permits filesystem
+      ``prompts/staging/greeting.j2``). The spec permits filesystem
       backends to interpret label as "a subdirectory or filename
       suffix"; this is the subdirectory variant.
     - ``layout="flat"``: ``<root>/<name>.j2``. The same template
@@ -39,7 +39,7 @@ class FilesystemPromptBackend:
     well past any realistic single-backend exposure.
 
     Optional ``sampling_source`` populates ``Prompt.sampling`` from a
-    sidecar file, per the spec §5 informative filesystem convention:
+    sidecar file, per the informative filesystem convention:
 
     - ``"none"`` (default): never populate ``sampling``.
     - ``"per-prompt-sidecar"``: read ``<name>.config.json`` from the
diff --git a/src/openarmature/prompts/backends/langfuse.py b/src/openarmature/prompts/backends/langfuse.py
index 2b6b5e7..1832306 100644
--- a/src/openarmature/prompts/backends/langfuse.py
+++ b/src/openarmature/prompts/backends/langfuse.py
@@ -5,12 +5,13 @@
 module only when ``langfuse`` is installed (``backends/__init__`` does
 not import it, so the base package stays langfuse-free).
 
-Per proposal 0046 (v0.38.0): both Langfuse TEXT and CHAT prompts are
-supported.  Text prompts return a :class:`TextPrompt`; chat prompts
-return a :class:`ChatPrompt` with one :class:`ContentSegment` per
-Langfuse chat message.  Langfuse chat placeholders map to
+Both Langfuse TEXT and CHAT prompts are supported.  Text prompts
+return a :class:`TextPrompt`; chat prompts return a
+:class:`ChatPrompt` with one :class:`ContentSegment` per Langfuse
+chat message.  Langfuse chat placeholders map to
 :class:`PlaceholderSegment` entries.
 """
+# Proposal 0046 (v0.38.0): Langfuse text + chat prompt support.
 
 from __future__ import annotations
 
@@ -227,9 +228,9 @@ def _chat_segments_from_normalized(
     :class:`ChatSegment` entries.  Placeholder segments use
     ``model_construct`` so a Langfuse-stored prompt with a
     malformed placeholder name (e.g., leading-digit) reaches the
-    render path before raising — the spec-normative §11 error
+    render path before raising — the normative render-time error
     trigger.  Content segments go through the normal pydantic
-    constructor since their fields don't carry spec-§11 constraints
+    constructor since their fields don't carry the same constraints
     that hand-built callers would benefit from catching earlier."""
     for entry in entries:
         if entry.get("type") == "placeholder":
diff --git a/src/openarmature/prompts/context.py b/src/openarmature/prompts/context.py
index 4bf8fc4..006a57a 100644
--- a/src/openarmature/prompts/context.py
+++ b/src/openarmature/prompts/context.py
@@ -1,19 +1,20 @@
 """Context variables for propagating prompt identity to observability.
 
-Spec §11 leaves the propagation mechanism implementation-defined.
-This module provides the Python implementation: two ``ContextVar``s
-plus two context managers (``with_active_prompt`` and
+The propagation mechanism is implementation-defined. This module
+provides the Python implementation: two ``ContextVar``s plus two
+context managers (``with_active_prompt`` and
 ``with_active_prompt_group``) that observers read to surface the
-normative ``openarmature.prompt.*`` and
-``openarmature.prompt.group_name`` span attributes.
+``openarmature.prompt.*`` and ``openarmature.prompt.group_name`` span
+attributes.
 
 Nesting policy: innermost-wins. When two ``with_active_prompt``
 contexts nest, the inner result is the active one for the
 duration of the inner block; the same applies to
 ``with_active_prompt_group``. This matches Python's natural
-``ContextVar`` token-stacking behavior; spec §11 doesn't mandate
-a nesting policy.
+``ContextVar`` token-stacking behavior.
 """
+# Spec prompt-management §11: propagation mechanism is
+# implementation-defined (no mandated nesting policy).
 
 from __future__ import annotations
 
diff --git a/src/openarmature/prompts/errors.py b/src/openarmature/prompts/errors.py
index b4d7e14..6884355 100644
--- a/src/openarmature/prompts/errors.py
+++ b/src/openarmature/prompts/errors.py
@@ -54,8 +54,8 @@ class PromptRenderError(PromptError):
     Carries the source prompt's identity plus the variable mapping
     and a description of the render failure.
 
-    Non-transient per spec §10: retrying the same render with the
-    same prompt + variables will not succeed. Callers whose backend
+    Non-transient: retrying the same render with the same prompt +
+    variables will not succeed. Callers whose backend
     serves a fixed template later should re-fetch + re-render rather
     than relying on retry-middleware to auto-retry the failed render.
     """
diff --git a/src/openarmature/prompts/hashing.py b/src/openarmature/prompts/hashing.py
index 7e62b2a..fa71435 100644
--- a/src/openarmature/prompts/hashing.py
+++ b/src/openarmature/prompts/hashing.py
@@ -24,7 +24,7 @@ def compute_rendered_hash(messages: list[Message]) -> str:
     """SHA-256 over a canonical JSON serialization of ``messages``.
 
     Preserves message boundaries, roles, content (including
-    content-block structure per llm-provider §3.1), and tool_calls.
+    content-block structure), and tool_calls.
     ``json.dumps(sort_keys=True, separators=(",", ":"))`` over the
     per-message ``model_dump(mode="json")`` is deterministic across
     runs; datetimes serialize as ISO-8601 strings.
diff --git a/src/openarmature/prompts/label_resolver.py b/src/openarmature/prompts/label_resolver.py
index b6c9427..bbf43c3 100644
--- a/src/openarmature/prompts/label_resolver.py
+++ b/src/openarmature/prompts/label_resolver.py
@@ -25,11 +25,13 @@
 class LabelResolver(Protocol):
     """Resolves a prompt name to the label to fetch under.
 
-    Implementations MUST follow the §7 fallback chain in
-    :meth:`resolve`: per-name override > default override > spec
-    fallback ``"production"``.
+    Implementations MUST follow the fallback chain in
+    :meth:`resolve`: per-name override > default override > the
+    ``"production"`` fallback.
     """
 
+    # Spec prompt-management §7: label fallback chain.
+
     def resolve(self, name: str) -> str:
         """Return the label to fetch ``name`` under.
 
diff --git a/src/openarmature/prompts/manager.py b/src/openarmature/prompts/manager.py
index a53cfd0..9e5be7e 100644
--- a/src/openarmature/prompts/manager.py
+++ b/src/openarmature/prompts/manager.py
@@ -53,7 +53,7 @@ class PromptManager:
     Users interact with the manager; backends are an implementation
     detail of construction. The manager owns:
 
-    - ``fetch``: consults backends in order per §9 (was §8) fallback semantics.
+    - ``fetch``: consults backends in order with fallback semantics.
     - ``render``: synchronous local string transform; produces a
       ``PromptResult``.
     - ``get``: convenience: ``render(await fetch(...), variables)``.
@@ -62,13 +62,16 @@ class PromptManager:
 
     - ``label_resolver``: optional ``LabelResolver`` consulted by
       :meth:`fetch` / :meth:`get` when no explicit ``label`` argument
-      is supplied (§6 step-2 of the fallback chain).
+      is supplied (step 2 of the fallback chain).
     - ``jinja_undefined``: Jinja ``Undefined`` subclass for render-time
-      variable resolution. Default ``StrictUndefined`` matches spec
-      §8 (was §7); pass ``jinja2.ChainableUndefined`` or any other
-      ``Undefined`` subclass to opt out of strict-by-default rendering.
+      variable resolution. Default ``StrictUndefined`` for
+      strict-by-default rendering; pass ``jinja2.ChainableUndefined``
+      or any other ``Undefined`` subclass to opt out.
     """
 
+    # Spec prompt-management: fetch fallback (§9), label-resolution
+    # chain (§6), strict-by-default render (§8).
+
     def __init__(
         self,
         *backends: PromptBackend,
@@ -108,11 +111,10 @@ def _resolve_label(self, label: str | None, name: str) -> str:
         return SPEC_FALLBACK_LABEL
 
     async def fetch(self, name: str, label: str | None = None) -> Prompt:
-        """Consult composed backends in order, applying §9 (was §8) fallback.
+        """Consult composed backends in order, applying the fallback chain.
 
-        Label is resolved per §6's three-step chain: explicit
-        argument > configured ``LabelResolver`` > spec fallback
-        ``"production"``.
+        Label is resolved by a three-step chain: explicit argument >
+        configured ``LabelResolver`` > the ``"production"`` fallback.
 
         - First successful fetch wins; further backends are not consulted.
         - ``PromptNotFound`` from any backend STOPS the chain: the
@@ -169,18 +171,18 @@ def render(
         """Apply ``variables`` (and optionally ``placeholders``) and return a PromptResult.
 
         Render is synchronous; no I/O.  Variables are strict by
-        default per §8: a template reference to a name not in
-        ``variables`` raises ``PromptRenderError``.
+        default: a template reference to a name not in ``variables``
+        raises ``PromptRenderError``.
 
-        For a :class:`TextPrompt`, ``placeholders`` is ignored per
-        spec §6 ("a Text-prompt renders to exactly one Message with
+        For a :class:`TextPrompt`, ``placeholders`` is ignored ("a
+        Text-prompt renders to exactly one Message with
         ``role: "user"`` and ``content`` equal to the rendered
         template text").  Implementations MUST NOT raise on a
         non-empty ``placeholders`` mapping passed alongside a Text
         prompt.
 
         For a :class:`ChatPrompt`, the chat_template is rendered
-        segment-by-segment per spec §6 — content segments substitute
+        segment-by-segment — content segments substitute
         ``variables`` into the text (or per-block content) and
         produce one Message per segment; placeholder segments inject
         the caller-supplied ``list[Message]`` from
@@ -400,8 +402,8 @@ def _render_content_block(
         key: str,
     ) -> ContentBlock | None:
         """Render a single content-block template.  Returns None when
-        a text block renders to the empty string (caller surfaces
-        §11 empty-text-block error)."""
+        a text block renders to the empty string (caller surfaces the
+        empty-text-block error)."""
         if isinstance(block, TextBlockTemplate):
             rendered = self._render_template_text(compute_template_hash(block.text), block.text, variables)
             if not rendered:
diff --git a/src/openarmature/prompts/prompt.py b/src/openarmature/prompts/prompt.py
index cac943e..f92e39c 100644
--- a/src/openarmature/prompts/prompt.py
+++ b/src/openarmature/prompts/prompt.py
@@ -1,13 +1,13 @@
 """Prompt and PromptResult records.
 
-Per proposal 0046 (prompt-management §3.1): two prompt variants land
-on this module — the existing single-string Text-prompt
-(:class:`TextPrompt`, formerly ``Prompt``) and the new role-tagged
-Chat-prompt (:class:`ChatPrompt`) carrying a list of
+Two prompt variants land on this module — the existing single-string
+Text-prompt (:class:`TextPrompt`, formerly ``Prompt``) and the
+role-tagged Chat-prompt (:class:`ChatPrompt`) carrying a list of
 :class:`ChatSegment` entries.  The user-facing union alias
 :data:`Prompt` covers both; callers ``isinstance``-narrow at the
 consumption point.
 """
+# Proposal 0046 (prompt-management §3.1): Text + Chat prompt variants.
 
 from __future__ import annotations
 
@@ -42,8 +42,8 @@ class SamplingConfig(RuntimeConfig):
 # blocks are assistant-side round-trip content and don't sit on
 # the authored-template surface.
 class TextBlockTemplate(BaseModel):
-    """Text content block template.  Renders to an llm-provider
-    §3.1.1 text block carrying the variable-substituted text."""
+    """Text content block template.  Renders to an llm-provider text
+    block carrying the variable-substituted text."""
 
     model_config = ConfigDict(extra="forbid")
 
@@ -53,7 +53,7 @@ class TextBlockTemplate(BaseModel):
 
 class ImageURLBlockTemplate(BaseModel):
     """URL image content block template.  Renders to an llm-provider
-    §3.1.2 URL image block; ``url`` is variable-substituted."""
+    URL image block; ``url`` is variable-substituted."""
 
     model_config = ConfigDict(extra="forbid")
 
@@ -64,7 +64,7 @@ class ImageURLBlockTemplate(BaseModel):
 
 class ImageInlineBlockTemplate(BaseModel):
     """Inline base64 image content block template.  Renders to an
-    llm-provider §3.1.2 inline image block; ``base64_data`` and
+    llm-provider inline image block; ``base64_data`` and
     ``media_type`` are variable-substituted."""
 
     model_config = ConfigDict(extra="forbid")
@@ -89,20 +89,19 @@ class ImageInlineBlockTemplate(BaseModel):
 class ContentSegment(BaseModel):
     """One role-tagged content segment of a chat prompt.
 
-    Per spec §3.1, ``role`` is one of the three canonical authoring
-    roles from llm-provider §3 (Message shape); the fourth llm-provider
-    §3 role (``"tool"``) is intentionally excluded — tool-result
-    messages have a distinct per-message shape that doesn't map to a
-    template-author surface.  Tool-loop content flows through
-    placeholder segments instead.
+    ``role`` is one of the three canonical authoring roles from the
+    Message shape; the fourth role (``"tool"``) is intentionally
+    excluded — tool-result messages have a distinct per-message shape
+    that doesn't map to a template-author surface.  Tool-loop content
+    flows through placeholder segments instead.
 
     ``content`` is either a single text template (the common case) or
     an ordered non-empty list of :class:`ContentBlockTemplate` entries
     for multimodal user messages (text + image).  Image blocks are
-    user-only per llm-provider §3.1.2 — a non-user role with an
-    image-block-containing list raises ``prompt_render_error`` at
-    render time.  Construction-time validation here surfaces the
-    same condition earlier for ergonomic feedback.
+    user-only — a non-user role with an image-block-containing list
+    raises ``prompt_render_error`` at render time.  Construction-time
+    validation here surfaces the same condition earlier for ergonomic
+    feedback.
     """
 
     model_config = ConfigDict(extra="forbid")
@@ -136,7 +135,7 @@ class PlaceholderSegment(BaseModel):
     an empty list injects zero messages (valid; the first-turn case),
     while an absent mapping entry raises ``prompt_render_error``.
 
-    Per spec §3.1 the ``placeholder`` name MUST match
+    The ``placeholder`` name MUST match
     ``[A-Za-z_][A-Za-z0-9_]*`` — ASCII identifier shape — to avoid
     collision with backend placeholder syntax.
     """
@@ -205,13 +204,12 @@ class TextPrompt(_PromptBase):
     """An unrendered single-string template plus identity metadata.
 
     Renders to a single :class:`UserMessage` carrying the substituted
-    template text (spec §6.render Text-prompt clause).  Per the
-    proposal 0046 v0.38.0 narrowing, Text-prompts render to exactly
-    one Message with ``role: "user"``; multi-message and multimodal
-    prompts go through :class:`ChatPrompt`.
+    template text.  Text-prompts render to exactly one Message with
+    ``role: "user"``; multi-message and multimodal prompts go through
+    :class:`ChatPrompt`.
 
     ``placeholders`` passed to ``PromptManager.render`` are ignored
-    for Text-prompt rendering per spec §6.
+    for Text-prompt rendering.
     """
 
     kind: Literal["text"] = "text"
@@ -219,8 +217,7 @@ class TextPrompt(_PromptBase):
 
 
 class ChatPrompt(_PromptBase):
-    """A role-tagged, multi-segment chat prompt (spec §3.1
-    *Chat-prompt variant*, proposal 0046 v0.38.0).
+    """A role-tagged, multi-segment chat prompt.
 
     ``chat_template`` is an ordered list of :class:`ChatSegment`
     entries — content segments carrying a role + content (text

From 7f1e7e8d222875d3cf48e1e770561ee53b6a1e09 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 10:43:21 -0700
Subject: [PATCH 05/15] Strip spec refs from checkpoint docstrings

---
 .../checkpoint/backends/memory.py             | 51 ++++++++++---------
 .../checkpoint/backends/sqlite.py             |  4 +-
 src/openarmature/checkpoint/errors.py         | 31 +++++------
 src/openarmature/checkpoint/migration.py      | 22 ++++----
 src/openarmature/checkpoint/protocol.py       | 29 ++++++-----
 5 files changed, 67 insertions(+), 70 deletions(-)

diff --git a/src/openarmature/checkpoint/backends/memory.py b/src/openarmature/checkpoint/backends/memory.py
index f6dc348..b1d08cb 100644
--- a/src/openarmature/checkpoint/backends/memory.py
+++ b/src/openarmature/checkpoint/backends/memory.py
@@ -19,24 +19,26 @@
 
 @dataclass(frozen=True)
 class FanOutInternalSaveBatching:
-    """Per-Checkpointer-instance configuration for §10.11.4 fan-out
-    internal save batching.
+    """Per-Checkpointer-instance configuration for fan-out internal
+    save batching.
 
     Applies ONLY to fan-out instance internal saves. Outermost-graph,
     subgraph-internal, and fan-out node completion saves remain
-    synchronous per §10.3.
+    synchronous.
 
     - ``flush_every``: flush the buffer every N buffered saves. ``0``
       / negative means batching is disabled (every save flushes
       immediately). The buffered save count resets at each flush.
 
-    Buffered-but-unflushed saves are LOST on crash per §10.11.4;
-    on resume, instances whose completed state was buffered-only
-    revert to ``in_flight`` / ``not_started`` and re-run. The §10.11.1
-    reducer correctness holds because their contributions hadn't
-    durably committed.
+    Buffered-but-unflushed saves are LOST on crash; on resume,
+    instances whose completed state was buffered-only revert to
+    ``in_flight`` / ``not_started`` and re-run. Reducer correctness
+    holds because their contributions hadn't durably committed.
     """
 
+    # Spec pipeline-utilities §10.11.4 (fan-out internal save batching);
+    # §10.3 synchronous saves; §10.11.1 reducer correctness.
+
     flush_every: int = 0
 
 
@@ -53,23 +55,22 @@ class InMemoryCheckpointer:
     from :meth:`load`; no serialization round-trip. (This is the
     feature: tests can assert on the saved state's identity.)
 
-    **State-migration eligibility:** none. Per spec §10.12.1, a
-    backend supports migration only when it can expose a structural
-    intermediate form of the loaded state independent of the current
+    **State-migration eligibility:** none. A backend supports
+    migration only when it can expose a structural intermediate form
+    of the loaded state independent of the current
     state class. This backend holds live typed instances by
     reference, so a version mismatch on resume raises
     ``CheckpointRecordInvalid`` rather than consulting the
     migration registry.
 
-    **Fan-out internal save batching** (per spec §10.11.4): optional
-    via the ``fan_out_internal_save_batching`` constructor parameter.
+    **Fan-out internal save batching**: optional via the
+    ``fan_out_internal_save_batching`` constructor parameter.
     Default is no batching (every save flushes immediately). When
     enabled, fan-out instance internal saves buffer in memory and
     flush every ``flush_every`` saves. Outermost-graph,
     subgraph-internal, and fan-out node completion saves bypass the
     buffer entirely (they remain synchronous). On crash, buffered
-    saves are lost — by design, per §10.11.4's documented cost
-    trade-off.
+    saves are lost — by design, a documented cost trade-off.
     """
 
     # Per spec §10.12.1: in-memory storage holds live typed-state
@@ -104,8 +105,8 @@ async def save(self, invocation_id: str, record: CheckpointRecord) -> None:
         previous record for the same id. Not durable across process
         restarts.
 
-        Per §10.11.4: outermost-graph, subgraph-internal, and
-        fan-out node completion saves are synchronous regardless of
+        Outermost-graph, subgraph-internal, and fan-out node
+        completion saves are synchronous regardless of
         the batching configuration. The engine routes fan-out
         instance internal saves through :meth:`save_fan_out_internal`
         instead; this method bypasses the buffer.
@@ -121,8 +122,8 @@ async def save(self, invocation_id: str, record: CheckpointRecord) -> None:
             self._records[invocation_id] = record
 
     async def save_fan_out_internal(self, invocation_id: str, record: CheckpointRecord) -> None:
-        """Buffer a fan-out instance internal save under the §10.11.4
-        batching policy. When batching is disabled (default), behaves
+        """Buffer a fan-out instance internal save under the batching
+        policy. When batching is disabled (default), behaves
         identically to :meth:`save` — every save is synchronously
         durable. When ``flush_every`` is positive, the save is
         buffered; the buffer flushes when the count reaches the
@@ -142,10 +143,10 @@ async def save_fan_out_in_flight_failure(
         invocation_id: str,
         record: CheckpointRecord,
     ) -> None:
-        """Buffer an "instance failed mid-execution" save under §10.11.4
-        batching. The failure save records the in_flight state of an
-        instance whose terminal inner node raised; this save closes the
-        in_flight observability gap (per §10.11) for instances whose
+        """Buffer an "instance failed mid-execution" save under the
+        batching policy. The failure save records the in_flight state
+        of an instance whose terminal inner node raised; this save
+        closes the in_flight observability gap for instances whose
         subgraphs have no sibling-completed save to piggyback on.
 
         Under batching, this save buffers BUT does NOT count toward
@@ -184,8 +185,8 @@ def _flush_invocation_buffer_locked(self, invocation_id: str) -> None:
 
     async def load(self, invocation_id: str) -> CheckpointRecord | None:
         """Return the saved record for ``invocation_id`` or ``None``
-        if nothing has been saved under that id. Per §10.11.4:
-        buffered-but-unflushed fan-out internal saves are NOT visible
+        if nothing has been saved under that id. Buffered-but-unflushed
+        fan-out internal saves are NOT visible
         to ``load`` — that's the crash-loses-buffered contract. To
         simulate a crash before the buffer flushes, drop the
         Checkpointer reference; the buffer is in-memory only.
diff --git a/src/openarmature/checkpoint/backends/sqlite.py b/src/openarmature/checkpoint/backends/sqlite.py
index 22849a6..5b2b04d 100644
--- a/src/openarmature/checkpoint/backends/sqlite.py
+++ b/src/openarmature/checkpoint/backends/sqlite.py
@@ -281,8 +281,8 @@ def _decode(self, blob: bytes, recorded_mode: str, invocation_id: str) -> Any:
 
     async def save(self, invocation_id: str, record: CheckpointRecord) -> None:
         """Upsert ``record`` under ``invocation_id``. The state,
-        completed positions, parent-state stack, and (per proposal 0009)
-        per-fan-out-node progress are serialized via the configured
+        completed positions, parent-state stack, and per-fan-out-node
+        progress are serialized via the configured
         :class:`SerializationMode` and written in a single statement.
         Writes are durable on return (WAL mode, per-write fsync at the
         SQLite layer)."""
diff --git a/src/openarmature/checkpoint/errors.py b/src/openarmature/checkpoint/errors.py
index cf51654..38786d4 100644
--- a/src/openarmature/checkpoint/errors.py
+++ b/src/openarmature/checkpoint/errors.py
@@ -60,14 +60,13 @@ class CheckpointRecordInvalid(CheckpointError):
     """Raised when ``Checkpointer.load(X)`` returns a record whose
     schema is incompatible with the current graph: state shape
     mismatch, missing required fields, OR a post-migration state
-    that fails to deserialize against the current state class (per
-    spec §10.12.4). Non-transient.
+    that fails to deserialize against the current state class.
+    Non-transient.
 
     Note: raw ``schema_version`` mismatches no longer route here.
     They now flow through ``CheckpointStateMigrationMissing`` (no
     chain registered) or ``CheckpointStateMigrationFailed`` (chain
-    application raised) per spec §10.10's three-way category
-    distinction.
+    application raised) — a three-way category distinction.
     """
 
     category = "checkpoint_record_invalid"
@@ -80,8 +79,8 @@ def __init__(self, invocation_id: str, message: str) -> None:
 class CheckpointStateMigrationMissing(CheckpointError):
     """Raised on resume when the saved record's ``schema_version``
     does not match the current state class's ``schema_version`` AND
-    no chain of registered migrations bridges the two. Non-transient
-    per spec §10.10; the user MUST register a migration (or pin
+    no chain of registered migrations bridges the two. Non-transient;
+    the user MUST register a migration (or pin
     their state to the saved version) for the resume to succeed.
 
     Carries the saved-from / current-to versions and a description
@@ -112,18 +111,16 @@ def __init__(
 
 
 class CheckpointStateMigrationChainAmbiguous(CheckpointError):
-    """Raised when the registered migration graph is ambiguous per
-    spec §10.10 / §10.12 (proposal 0018, spec v0.16.0):
+    """Raised when the registered migration graph is ambiguous:
 
-    - Duplicate-pair case (§10.12.1): two migrations register with the
-      same ``(from_version, to_version)`` pair. Raised at registration
+    - Duplicate-pair case: two migrations register with the same
+      ``(from_version, to_version)`` pair. Raised at registration
       time so the user sees the ambiguity before any resume attempt.
-    - Multi-shortest-path case (§10.12.2): the registered migration
-      graph has multiple distinct shortest paths between the saved
-      and current versions (e.g., a diamond ``v1→v2→v4`` + ``v1→v3→v4``).
-      Spec accepts either compile-time detection (recommended) or
-      load-time detection (this impl runs the check inside BFS at
-      resume time).
+    - Multi-shortest-path case: the registered migration graph has
+      multiple distinct shortest paths between the saved and current
+      versions (e.g., a diamond ``v1→v2→v4`` + ``v1→v3→v4``). Either
+      compile-time detection (recommended) or load-time detection is
+      acceptable (this impl runs the check inside BFS at resume time).
 
     Non-transient: retrying without changing the migration graph
     will not succeed. Carries ``from_version`` / ``to_version`` when
@@ -149,7 +146,7 @@ def __init__(
 
 class CheckpointStateMigrationFailed(CheckpointError):
     """Raised on resume when a registered migration function raises
-    during chain application (per spec §10.12.2). The migration's
+    during chain application. The migration's
     exception is preserved as ``__cause__``. Non-transient by
     default: a buggy migration is deterministic, so retrying
     without changing the migration code will not succeed.
diff --git a/src/openarmature/checkpoint/migration.py b/src/openarmature/checkpoint/migration.py
index 4244c89..23516d2 100644
--- a/src/openarmature/checkpoint/migration.py
+++ b/src/openarmature/checkpoint/migration.py
@@ -1,12 +1,12 @@
 """State migration types and registry.
 
-Realizes pipeline-utilities §10.12 (proposal 0014). A
-``StateMigration`` describes one edge in the migration graph;
+A ``StateMigration`` describes one edge in the migration graph;
 ``MigrationRegistry`` holds the ordered set and resolves chains
 via BFS. Ambiguity (duplicate ``(from, to)`` pairs OR multiple
 distinct shortest paths between the same source/sink) is a
-configuration-style error per §10.12.1 / §10.12.2.
+configuration-style error.
 """
+# Realizes pipeline-utilities §10.12 (proposal 0014).
 
 from __future__ import annotations
 
@@ -29,9 +29,9 @@ class StateMigration:
     chain (or for final deserialization into the current state class).
 
     Migrations MUST be pure: deterministic, no I/O, no implicit
-    state. The framework does not police purity per spec §10.12.2
-    ("the contract is documented, not policed"); violating it
-    risks non-deterministic resume.
+    state. The framework does not police purity (the contract is
+    documented, not policed); violating it risks non-deterministic
+    resume.
     """
 
     from_version: str
@@ -46,15 +46,14 @@ class MigrationRegistry:
 
     - Two migrations with the same ``from_version`` AND
       ``to_version`` raise ``CheckpointStateMigrationChainAmbiguous``
-      directly per spec §10.10 (proposal 0018) so the canonical
-      category surfaces at the registration boundary without any
-      wrapping by the builder.
+      directly so the canonical category surfaces at the registration
+      boundary without any wrapping by the builder.
     - Two migrations with the same ``from_version`` and different
       ``to_version`` are permitted (branched migration graph;
       chain resolution picks a path or raises ambiguity if multiple
       shortest paths exist).
 
-    Resolution-time semantics (per §10.12.2):
+    Resolution-time semantics:
 
     - BFS from ``record.schema_version`` to
       ``current.schema_version``. BFS naturally finds the shortest
@@ -133,8 +132,7 @@ def resolve_chain(
 
         Raises ``CheckpointStateMigrationChainAmbiguous`` if
         multiple distinct shortest paths exist between
-        ``from_version`` and ``to_version`` (ambiguous chain per
-        spec §10.10 / §10.12.2; proposal 0018 / spec v0.16.0).
+        ``from_version`` and ``to_version`` (an ambiguous chain).
         Same canonical category as the duplicate-pair detection
         in ``register``; one type for chain ambiguity regardless
         of when it surfaces.
diff --git a/src/openarmature/checkpoint/protocol.py b/src/openarmature/checkpoint/protocol.py
index 426e36f..1804d49 100644
--- a/src/openarmature/checkpoint/protocol.py
+++ b/src/openarmature/checkpoint/protocol.py
@@ -23,12 +23,13 @@
 produce records in the shipping version (atomic-restart contract).
 
 ``CheckpointRecord.schema_version`` carries the user-facing
-state-schema identifier per spec §10.2 (proposal 0014 repurposes
-the field from the original backend-internal record-shape role).
-The framework reads ``type(state).schema_version`` at save time;
-on load, version mismatches route through the migration registry
-(per §10.12) rather than a strict equality check.
+state-schema identifier. The framework reads
+``type(state).schema_version`` at save time; on load, version
+mismatches route through the migration registry rather than a strict
+equality check.
 """
+# Spec pipeline-utilities §10.2 (proposal 0014): schema_version is the
+# state-schema identifier; §10.12 migration registry on mismatch.
 
 from __future__ import annotations
 
@@ -98,14 +99,14 @@ class FanOutInstanceProgress:
       correctness contract: an instance marked ``completed`` MUST have
       its contribution recorded into the accumulator AND that
       contribution MUST be reflected in ``result``. Reducer composition
-      rules (§10.11.1) depend on this exactly-once guarantee.
+      rules depend on this exactly-once guarantee.
     - ``result``: for ``completed`` instances, the durable contribution
       to the fan-out accumulator (a success value for the
       ``target_field`` bucket, or under ``collect`` error policy an
       error entry for the ``errors_field`` bucket). Typed per the
       parent state schema's ``target_field`` / ``errors_field``
-      (representation is implementation-defined per §10.11; Python
-      stores as ``Any`` since dynamic typing absorbs the variance).
+      (representation is implementation-defined; Python stores as
+      ``Any`` since dynamic typing absorbs the variance).
       Unused for ``in_flight`` and ``not_started``.
     - ``result_is_error``: boolean discriminator for ``completed``
       entries — ``True`` when the contribution is a ``collect``-mode
@@ -113,8 +114,8 @@ class FanOutInstanceProgress:
       when the contribution is a success value that rolls forward
       into ``target_field``. MUST be ``False`` for ``in_flight`` and
       ``not_started`` (the value of ``result`` is ignored for those).
-      Per proposal 0027 (spec v0.21.0): implementations MUST consult
-      this field on resume rather than inferring routing from
+      Implementations MUST consult this field on resume rather than
+      inferring routing from
       ``result``'s shape — heuristic inspection would misclassify
       user state values that happen to match the engine's
       error-record shape.
@@ -148,7 +149,7 @@ class FanOutProgress:
       the fan-out (empty for outermost-graph fan-outs). Disambiguates
       fan-outs of the same name in different nested-subgraph contexts.
     - ``instance_count``: the resolved instance count for this fan-out
-      (per pipeline-utilities §9 count or items_field mode).
+      (count or items_field mode).
     - ``instances``: a tuple of per-instance entries indexed by
       ``fan_out_index`` (``instances[i]`` is the entry for
       ``fan_out_index=i``). Length equals ``instance_count``.
@@ -167,7 +168,7 @@ class CheckpointRecord:
 
     Frozen: backends MUST treat the record as immutable. The engine
     builds a fresh record per ``completed`` event rather than mutating
-    a shared one. The ``fan_out_progress`` field (per §10.11) carries
+    a shared one. The ``fan_out_progress`` field carries
     per-fan-out-node entries when one or more fan-outs are in flight
     at save time; an empty tuple means no fan-out progress to record.
     """
@@ -234,8 +235,8 @@ class Checkpointer(Protocol):
     plain dict, JSON tree, or similar) that is independent of the
     current state class. JSON-encoded backends naturally satisfy
     this; backends that store live typed state instances or use
-    class-bound serialization (pickle) cannot. Per spec §10.12.1,
-    backends that cannot expose the intermediate MUST raise
+    class-bound serialization (pickle) cannot. Backends that cannot
+    expose the intermediate MUST raise
     ``CheckpointRecordInvalid`` on version mismatch even when
     migrations are registered; the registry has no chance to bridge.
 

From a62f4bdfa1efe63878cf57ab9199cacd84f18aba Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 10:47:55 -0700
Subject: [PATCH 06/15] Strip spec refs from graph (builder/errors/events/pb)
 docstrings

---
 src/openarmature/graph/builder.py           | 15 +++--
 src/openarmature/graph/errors.py            | 25 ++++----
 src/openarmature/graph/events.py            | 63 ++++++++++-----------
 src/openarmature/graph/parallel_branches.py | 31 +++++-----
 4 files changed, 64 insertions(+), 70 deletions(-)

diff --git a/src/openarmature/graph/builder.py b/src/openarmature/graph/builder.py
index 48d95ee..b71b663 100644
--- a/src/openarmature/graph/builder.py
+++ b/src/openarmature/graph/builder.py
@@ -306,11 +306,11 @@ def add_parallel_branches_node(
         errors_field: str | None = None,
         middleware: Iterable[Middleware] | None = None,
     ) -> Self:
-        """Register a parallel-branches node per pipeline-utilities §11.
+        """Register a parallel-branches node.
 
         ``branches`` is a mapping from non-empty branch name to a
         :class:`BranchSpec`. Insertion order is preserved and is
-        the dispatch + merge order per §11.8.
+        the dispatch + merge order.
 
         Validates at registration:
 
@@ -397,7 +397,7 @@ def with_state_migration(
         to_version: str,
         migrate: Callable[[Any], Any],
     ) -> Self:
-        """Register one state migration per pipeline-utilities §10.12.
+        """Register one state migration.
 
         On resume, when the saved record's ``schema_version`` does not
         match the current state class's ``schema_version``, the engine
@@ -406,15 +406,14 @@ def with_state_migration(
         ``parent_states``) before deserialization.
 
         Migrations MUST be pure: deterministic, no I/O, no implicit
-        state. The framework does not police purity (per §10.12.2),
-        but violating it risks non-deterministic resume.
+        state. The framework does not police purity, but violating it
+        risks non-deterministic resume.
 
         Raises ``CheckpointStateMigrationChainAmbiguous`` at
         registration if the ``(from_version, to_version)`` pair is
-        already registered (per spec §10.10 / §10.12.1; proposal
-        0018 / spec v0.16.0). Also raises ``ValueError`` if
+        already registered. Also raises ``ValueError`` if
         ``to_version`` is the empty-string sentinel (the un-declared
-        marker per §10.2 is not a valid chain target).
+        marker is not a valid chain target).
         """
         self._migration_registry.register(
             StateMigration(
diff --git a/src/openarmature/graph/errors.py b/src/openarmature/graph/errors.py
index 7f56a3c..06c5ac0 100644
--- a/src/openarmature/graph/errors.py
+++ b/src/openarmature/graph/errors.py
@@ -145,8 +145,7 @@ def __init__(self, node_name: str, collect_field: str) -> None:
 
 class ParallelBranchesNoBranches(CompileError):
     """Raised at registration when a parallel-branches node's
-    ``branches`` mapping is empty. Per pipeline-utilities §11.9
-    / proposal 0011. Non-transient."""
+    ``branches`` mapping is empty. Non-transient."""
 
     category = "parallel_branches_no_branches"
 
@@ -184,18 +183,16 @@ def __init__(self, node_name: str, cause: BaseException, recoverable_state: Any)
 
 class ParallelBranchesBranchFailed(NodeException):
     """Raised when a branch's subgraph raises under
-    ``error_policy: 'fail_fast'``. Per pipeline-utilities §11.9 /
-    proposal 0011.
-
-    Subtype of :class:`NodeException` (per §11.9: "a
-    ``node_exception`` subtype attached at the parallel-branches
-    node's level"). The existing NodeException-classifier path
-    handles transient classification from ``__cause__`` per §6.1:
-    non-transient by default, inheriting transient classification
-    from the wrapped exception.
-
-    Carries ``branch_name`` as a structured field per §11.9; the
-    inner exception rides ``__cause__``.
+    ``error_policy: 'fail_fast'``.
+
+    Subtype of :class:`NodeException` (a ``node_exception`` subtype
+    attached at the parallel-branches node's level). The existing
+    NodeException-classifier path handles transient classification
+    from ``__cause__``: non-transient by default, inheriting transient
+    classification from the wrapped exception.
+
+    Carries ``branch_name`` as a structured field; the inner exception
+    rides ``__cause__``.
     """
 
     category = "parallel_branches_branch_failed"
diff --git a/src/openarmature/graph/events.py b/src/openarmature/graph/events.py
index 462c883..8542651 100644
--- a/src/openarmature/graph/events.py
+++ b/src/openarmature/graph/events.py
@@ -112,11 +112,10 @@ class ParallelBranchesEventConfig:
 
     - ``branch_names``: non-empty ordered tuple of strings. The branch
       identifiers in declaration / dispatch order, as configured on
-      the parallel-branches node (pipeline-utilities §11.1).
+      the parallel-branches node.
     - ``branch_count``: positive int. Equals ``len(branch_names)``.
       Surfaced explicitly so observers don't have to derive it.
-    - ``error_policy``: one of ``"fail_fast"`` or ``"collect"`` (per
-      pipeline-utilities §11.5).
+    - ``error_policy``: one of ``"fail_fast"`` or ``"collect"``.
     - ``parent_node_name``: the parallel-branches node's name in the
       parent graph. Carried here for caching by backend observers
       when attributing per-branch dispatch spans.
@@ -180,8 +179,7 @@ class NodeEvent:
       :class:`FanOutEventConfig`. ``None`` on every other event.
     - ``branch_name`` is the non-empty string name of the
       parallel-branches branch this event came from. ``None`` for
-      nodes outside any branch. Per graph-engine §6 / pipeline-
-      utilities §11, the combination of ``namespace``,
+      nodes outside any branch. The combination of ``namespace``,
       ``branch_name``, ``fan_out_index``, ``attempt_index``, and
       ``phase`` jointly uniquely identifies an event source.
       ``branch_name`` and ``fan_out_index`` are independent; both
@@ -195,9 +193,9 @@ class NodeEvent:
     - On ``completed`` events, exactly one of ``post_state`` and
       ``error`` is populated.
 
-    **Synthetic phases.** ``"checkpoint_saved"`` (pipeline-utilities
-    §10.8) and ``"checkpoint_migrated"`` (proposal 0014 §6
-    cross-ref) repurpose this dataclass for non-node events. Both
+    **Synthetic phases.** ``"checkpoint_saved"`` and
+    ``"checkpoint_migrated"`` repurpose this dataclass for non-node
+    events. Both
     are opt-in via ``phases={...}`` on observer registration;
     default subscriptions are ``{"started", "completed"}`` only, so
     legacy observers never see them. Conventions on synthetic
@@ -332,14 +330,14 @@ class MetadataAugmentationEvent:
 
     Distinct from :class:`NodeEvent` because there is no node phase,
     no pre/post state, and no error: this event reports a side-channel
-    augmentation, not a node-attempt boundary. Per graph-engine §6 the
-    event is NOT subject to the observer ``phases`` filter (which only
-    governs ``NodeEvent`` phases); the delivery worker forwards it to
-    every subscribed observer. Observers that handle it iterate their
-    open observations whose lineage is an ancestor of (or equal to)
-    the augmenting context's lineage and apply the entries as
-    ``openarmature.user.<key>`` (OTel, §5.6) /
-    ``metadata.<key>`` (Langfuse, §8.4.1+§8.4.2).
+    augmentation, not a node-attempt boundary. The event is NOT
+    subject to the observer ``phases`` filter (which only governs
+    ``NodeEvent`` phases); the delivery worker forwards it to every
+    subscribed observer. Observers that handle it iterate their open
+    observations whose lineage is an ancestor of (or equal to) the
+    augmenting context's lineage and apply the entries as
+    ``openarmature.user.<key>`` (OTel) / ``metadata.<key>``
+    (Langfuse).
     """
 
     entries: Mapping[str, AttributeValue]
@@ -373,21 +371,21 @@ class InvocationStartedEvent:
     Emitted once per invocation, before any node fires. Observers that
     populate Trace-level input fields (the Langfuse observer, today)
     consume it to resolve ``trace.input`` per the three-lever decision
-    tree in observability §8.4.1. Observers without a Trace-level
-    input concept (the OTel observer) treat it as a no-op.
+    tree. Observers without a Trace-level input concept (the OTel
+    observer) treat it as a no-op.
 
     Carries:
 
     - ``initial_state``: the raw state object the engine constructed
       from ``invoke()``'s arguments (the typed-state instance).
     - ``invocation_id``: the invocation id (caller-supplied or
-      framework-generated per proposal 0039).
-    - ``correlation_id``: the §3 correlation id when present.
+      framework-generated).
+    - ``correlation_id``: the correlation id when present.
     - ``entry_node``: the outermost-graph entry node name.
 
-    Per graph-engine §6 the event is NOT subject to the observer
-    ``phases`` filter (which only governs ``NodeEvent`` phases); the
-    delivery worker forwards it to every subscribed observer.
+    The event is NOT subject to the observer ``phases`` filter (which
+    only governs ``NodeEvent`` phases); the delivery worker forwards it
+    to every subscribed observer.
     """
 
     initial_state: Any
@@ -410,8 +408,8 @@ class InvocationCompletedEvent:
     after a failure boundary on the failure path). Observers that
     populate Trace-level output fields (the Langfuse observer, today)
     consume it to resolve ``trace.output`` per the three-lever
-    decision tree in observability §8.4.1. Observers without a
-    Trace-level output concept (the OTel observer) treat it as a no-op.
+    decision tree. Observers without a Trace-level output concept (the
+    OTel observer) treat it as a no-op.
 
     Carries:
 
@@ -424,11 +422,10 @@ class InvocationCompletedEvent:
     - ``final_node``: the name of the node whose execution preceded
       the END-reached transition on the success path, or the node
       that raised on the failure path.
-    - ``invocation_id`` / ``correlation_id``: the §3 / §5.1 ids.
+    - ``invocation_id`` / ``correlation_id``: the run + correlation ids.
 
-    Per graph-engine §6 the event is NOT subject to the observer
-    ``phases`` filter; the delivery worker forwards it to every
-    subscribed observer.
+    The event is NOT subject to the observer ``phases`` filter; the
+    delivery worker forwards it to every subscribed observer.
     """
 
     final_state: Any
@@ -541,7 +538,7 @@ class LlmCompletionEvent:
     - ``caller_invocation_metadata``: optional snapshot of caller-
       supplied invocation metadata at LLM-call time. Spec-defined as
       OPTIONAL; the python OpenAIProvider populates it by default so
-      the bundled OTel/Langfuse observers can emit the §5.6
+      the bundled OTel/Langfuse observers can emit the
       ``openarmature.user.<key>`` span-attribute family without an
       extra opt-in. Pass ``populate_caller_metadata=False`` to suppress
       the snapshot. Future non-OpenAI providers MAY default to
@@ -606,7 +603,7 @@ class LlmFailedEvent:
     """A typed LLM provider call failure event delivered to observers.
 
     Carries identity, scoping, and failure-context data for an LLM
-    call that raised a llm-provider §7 category exception. Observer
+    call that raised a llm-provider category exception. Observer
     code filters by type discrimination (``isinstance(event,
     LlmFailedEvent)``) rather than by the impl-current sentinel-
     namespace string match.
@@ -619,8 +616,8 @@ class LlmFailedEvent:
 
     Failure-specific fields:
 
-    - ``error_category``: the llm-provider §7 normative error
-      category the call raised. One of the 9 canonical strings
+    - ``error_category``: the llm-provider normative error category
+      the call raised. One of the 9 canonical strings
       (``provider_authentication``, ``provider_unavailable``,
       ``provider_invalid_model``, ``provider_model_not_loaded``,
       ``provider_rate_limit``, ``provider_invalid_response``,
diff --git a/src/openarmature/graph/parallel_branches.py b/src/openarmature/graph/parallel_branches.py
index f1251a0..2b6ff98 100644
--- a/src/openarmature/graph/parallel_branches.py
+++ b/src/openarmature/graph/parallel_branches.py
@@ -11,16 +11,15 @@
 subgraph (with potentially different state schema, middleware,
 topology), its own ``inputs`` / ``outputs`` projection mappings,
 and its own optional ``middleware`` wrapping the whole branch
-invocation as a unit (§11.7).
+invocation as a unit.
 
-Buffer-then-apply semantics per §11.4: contributions are
-collected during dispatch and merged deterministically once at
-node completion, using the parent's reducer for each output
-field. Branch insertion order determines both dispatch order
-(§11.8) and merge tie-breaking when two branches write the same
-parent field.
+Buffer-then-apply semantics: contributions are collected during
+dispatch and merged deterministically once at node completion,
+using the parent's reducer for each output field. Branch insertion
+order determines both dispatch order and merge tie-breaking when
+two branches write the same parent field.
 
-Error policies per §11.5:
+Error policies:
 
 - ``fail_fast``: first failure cancels still-running branches;
   the buffered contributions are discarded; the parallel-branches
@@ -31,6 +30,8 @@
   branches' contributions merge; failed branches' errors land in
   the optional ``errors_field``.
 """
+# Spec pipeline-utilities §11 (parallel branches): §11.4 buffer-then-
+# apply, §11.5 error policies, §11.7 branch middleware, §11.8 order.
 
 from __future__ import annotations
 
@@ -64,7 +65,7 @@ class BranchSpec[ChildT: State]:
     Branches are heterogeneous: each spec MAY reference a different
     compiled subgraph with a different state schema. ``inputs`` /
     ``outputs`` follow the same shape as subgraph projection
-    mappings (proposal 0002).
+    mappings.
 
     Validation lives on the builder side
     (``GraphBuilder.add_parallel_branches_node``):
@@ -83,7 +84,7 @@ class BranchSpec[ChildT: State]:
 @dataclass(frozen=True)
 class ParallelBranchesNode[ParentT: State]:
     """A node that dispatches M heterogeneous compiled subgraphs
-    concurrently per spec §11.
+    concurrently.
 
     The Node Protocol contract requires ``name``, ``middleware``,
     and ``run``. Like :class:`FanOutNode`, the engine recognizes
@@ -229,7 +230,7 @@ async def _fail_fast(
         tasks: list[tuple[str, asyncio.Task[Mapping[str, Any]]]],
         contributions: dict[str, Mapping[str, Any]],
     ) -> Mapping[str, Any]:
-        """Fail-fast policy per spec §11.5.
+        """Fail-fast policy.
 
         Wait for all branches; on first failure, cancel the rest
         and raise ``ParallelBranchesBranchFailed`` with the failing
@@ -309,7 +310,7 @@ async def _collect(
         contributions: dict[str, Mapping[str, Any]],
         errors: list[dict[str, str]],
     ) -> Mapping[str, Any]:
-        """Collect policy per spec §11.5.
+        """Collect policy.
 
         All branches run to completion regardless of individual
         failures. Successful branches' contributions go to the
@@ -346,8 +347,8 @@ def _merge_contributions(
     ) -> dict[str, Any]:
         """Flatten per-branch contributions into a single partial.
 
-        Per §11.4 + §11.8: contributions apply in branch insertion
-        order, using each parent field's reducer. The actual reducer
+        Contributions apply in branch insertion order, using each
+        parent field's reducer. The actual reducer
         application happens at ``_merge_partial`` in compiled.py
         when the engine merges this partial into parent state. Here
         we just flatten the per-branch contributions into a dict
@@ -386,7 +387,7 @@ class _MultiContribution:
     """Sentinel for ``_merge_partial`` indicating that multiple
     branches contributed to the same parent field. The engine
     applies the parent's reducer to each value in sequence,
-    preserving branch insertion order per §11.8.
+    preserving branch insertion order.
     """
 
     values: tuple[Any, ...]

From 50de8dca062aedae3974cfe8081d6bda6cdee831 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 10:54:42 -0700
Subject: [PATCH 07/15] Strip spec refs from graph (fan_out/observer/compiled)
 docstrings

---
 src/openarmature/graph/compiled.py | 113 ++++++++++++++---------------
 src/openarmature/graph/fan_out.py  |  34 ++++-----
 src/openarmature/graph/observer.py |  68 ++++++++---------
 3 files changed, 105 insertions(+), 110 deletions(-)

diff --git a/src/openarmature/graph/compiled.py b/src/openarmature/graph/compiled.py
index 8b65a6e..f12b382 100644
--- a/src/openarmature/graph/compiled.py
+++ b/src/openarmature/graph/compiled.py
@@ -240,15 +240,14 @@ def _merge_partial[StateT: State](
 class _StepResult[StateT: State]:
     """Return shape of the per-step dispatchers
     (``_step_function_node`` / ``_step_subgraph_node`` /
-    ``_step_fan_out_node``) under the proposal-0012 v0.9.0 swap.
+    ``_step_fan_out_node``).
 
-    Spec graph-engine §3 step 3 (revised) requires the
-    ``completed`` event for the just-completed node to fire AFTER
+    The ``completed`` event for the just-completed node fires AFTER
     edge evaluation completes — so that edge-resolution failures
     (``routing_error``, ``edge_exception``) land on the preceding
     node's completed event with ``error`` populated, sharing the
     started/completed pair rather than producing a separate event
-    pair (§6 revised).
+    pair.
 
     The step dispatchers can't call ``_dispatch_completed`` for
     the success path themselves anymore, because the outcome
@@ -268,9 +267,9 @@ class _StepResult[StateT: State]:
     For ``_step_subgraph_node``, the wrapper is transparent per
     fixture 013 (no started/completed pair); ``finalize_completed``
     is a no-op closure so edge errors after a subgraph wrapper
-    propagate silently per proposal 0012's "preceding unit's
-    pair" framing applied to a unit that never had one. Same for
-    middleware that short-circuits without invoking ``next``.
+    propagate silently — the "preceding unit's pair" framing applied
+    to a unit that never had one. Same for middleware that short-
+    circuits without invoking ``next``.
     """
 
     state: StateT
@@ -282,7 +281,7 @@ def _no_op_finalize(_edge_error: RuntimeGraphError | None) -> None:
     didn't dispatch a started/completed pair — subgraph wrappers
     (transparent per fixture 013) and middleware that short-
     circuits without invoking ``next``. Edge errors propagate
-    silently per proposal 0012 + fixture 013."""
+    silently per fixture 013."""
 
 
 # Helpers for the proposal 0009 per-instance fan-out resume contract.
@@ -332,7 +331,7 @@ def _project_fan_out_progress(
     """Project the engine-internal mutable per-fan-out state into the
     frozen :class:`FanOutProgress` shape on a saved record.
 
-    Per §10.11's snapshot semantics, a save fires with ALL concurrent
+    Per the snapshot semantics, a save fires with ALL concurrent
     fan-out instances' states captured at the moment of the save —
     not just the one whose ``completed`` event triggered the save.
     This projection enumerates the whole dict; the engine save site
@@ -382,8 +381,8 @@ def _restore_fan_out_progress_state(
     them, surface as a follow-on.
 
     ``result_is_error`` is read verbatim from the saved record's
-    explicit field per spec §10.11 (proposal 0027). The pre-0027
-    structural-pattern heuristic is gone — the spec mandates the
+    explicit field. The earlier structural-pattern heuristic is gone
+    — the spec mandates the
     explicit field as the authoritative discriminator because the
     user's state schema can legitimately contain values that match
     the engine's canonical error-record shape, and a heuristic would
@@ -420,8 +419,8 @@ async def _save_fan_out_internal(
     """Route a fan-out-internal save through the checkpointer's
     optional batching seam.
 
-    Per spec §10.11.4, Checkpointer backends MAY support batching
-    scoped to fan-out internal saves. When the backend exposes a
+    Checkpointer backends MAY support batching scoped to fan-out
+    internal saves. When the backend exposes a
     ``save_fan_out_internal`` coroutine, route there so it can buffer
     or flush per its configuration. Otherwise, fall back to the
     standard ``save`` — non-batching backends see no behavioral change.
@@ -439,8 +438,8 @@ async def _save_fan_out_in_flight_failure(  # pyright: ignore[reportUnusedFuncti
     record: CheckpointRecord,
 ) -> None:
     """Route an "instance failed mid-execution" save through the
-    checkpointer's failure-save seam (§10.11.4 + the in_flight
-    observability gap §10.11).
+    checkpointer's failure-save seam (closing the in_flight
+    observability gap).
 
     Backends that expose ``save_fan_out_in_flight_failure`` get the
     save directly; under batching, the typical implementation
@@ -461,8 +460,8 @@ class _MigrationSummary:
     """Per-resume migration-chain metadata threaded out of
     ``_migrate_record`` so the engine can dispatch an
     ``openarmature.checkpoint.migrate`` observer event after the
-    invocation context is built (per spec §6 cross-ref in proposal
-    0014). Carried on the synthetic ``NodeEvent.pre_state``
+    invocation context is built. Carried on the synthetic
+    ``NodeEvent.pre_state``
     payload for ``phase="checkpoint_migrated"``; the OTel observer
     reads it to emit the span.
     """
@@ -479,8 +478,8 @@ def _apply_migration_step(
 ) -> Any:
     """Apply one migration step to one value (outer state or one
     parent-state entry). Wraps the user-supplied migration function's
-    raise as ``CheckpointStateMigrationFailed`` per spec §10.12.2.
-    The original exception rides ``__cause__``.
+    raise as ``CheckpointStateMigrationFailed``. The original
+    exception rides ``__cause__``.
     """
     try:
         return migration.migrate(value)
@@ -618,19 +617,18 @@ async def _migrate_record(
         has ``state`` + ``parent_states`` mapped through the chain.
         ``summary`` carries the chain's metadata so the caller can
         dispatch a ``checkpoint_migrated`` observer event after the
-        invocation context exists (per the spec §6 cross-ref in
-        proposal 0014).
+        invocation context exists.
 
         Caller is responsible for the post-migration deserialization
-        step (§10.12.4): if the migrated state cannot deserialize
-        against the current state class, the resulting failure
-        surfaces as ``CheckpointRecordInvalid``.
-
-        Spec §10.12.2 says "parent states MUST be treated as carrying
-        the same ``schema_version`` as the outer record." We apply
-        the same chain to every entry in ``parent_states`` lockstep
-        with the outer state. Future per-parent versioning would
-        need a spec follow-on.
+        step: if the migrated state cannot deserialize against the
+        current state class, the resulting failure surfaces as
+        ``CheckpointRecordInvalid``.
+
+        Parent states MUST be treated as carrying the same
+        ``schema_version`` as the outer record, so we apply the same
+        chain to every entry in ``parent_states`` lockstep with the
+        outer state. Future per-parent versioning would need a
+        follow-on.
         """
         # Eligibility check first per §10.12.1: backends that hold
         # typed in-memory state or class-bound serialization cannot
@@ -904,7 +902,7 @@ async def invoke(
         - ``correlation_id`` is the per-invocation cross-backend join
           key. Caller-supplied or auto-generated UUIDv4 when absent.
           Preserved unchanged across ``resume_invocation``.
-        - ``invocation_id`` (proposal 0039) is the per-attempt id.
+        - ``invocation_id`` is the per-attempt id.
           Caller-supplied or auto-generated UUIDv4 when absent; a
           caller value MAY be any non-empty URL-safe string. Applies
           to the fresh-invocation path only — a ``resume_invocation``
@@ -925,7 +923,7 @@ async def invoke(
           own retry logic if transient backend failures should be
           reattempted.
 
-        **Caller-supplied invocation metadata (proposal 0034).**
+        **Caller-supplied invocation metadata.**
 
         - ``metadata`` is an optional mapping of arbitrary
           ``key → value`` entries the framework propagates to every
@@ -935,7 +933,7 @@ async def invoke(
           the ``openarmature.*`` or ``gen_ai.*`` reserved namespaces.
           Validation runs synchronously at the API boundary; rule
           violations raise ``ValueError`` BEFORE any work begins.
-        - Per spec §5.6 the OTel observer emits each entry as an
+        - The OTel observer emits each entry as an
           ``openarmature.user.<key>`` cross-cutting span attribute on
           every span and OTel log record. The Langfuse observer
           merges each entry into ``trace.metadata`` AND every
@@ -1397,7 +1395,7 @@ async def _step_function_node(
     ) -> _StepResult[StateT]:
         """Run one function-node step through the middleware chain.
 
-        Per pipeline-utilities §3, the runtime chain composes:
+        The runtime chain composes:
 
             [per_graph...] -> [per_node...] -> innermost
 
@@ -1406,10 +1404,10 @@ async def _step_function_node(
         to ``innermost`` is one attempt; middleware that calls ``next``
         repeatedly (e.g., retry) produces multiple attempts and therefore
         multiple started/completed event pairs from the engine, each
-        tagged with an incrementing ``attempt_index`` (graph-engine §6).
+        tagged with an incrementing ``attempt_index``.
 
-        Per proposal-0012 v0.9.0: the success-case ``completed`` event
-        for the FINAL successful attempt fires AFTER edge eval, not
+        The success-case ``completed`` event for the FINAL successful
+        attempt fires AFTER edge eval, not
         inside ``innermost``. Failure-case dispatches
         (``node_exception`` / ``reducer_error`` /
         ``state_validation_error``) stay inline in ``innermost`` —
@@ -1640,7 +1638,7 @@ async def _step_subgraph_node(
     ) -> _StepResult[StateT]:
         """Run one subgraph-as-node step through the parent's middleware chain.
 
-        Per pipeline-utilities §4: the parent's per-graph middleware plus
+        The parent's per-graph middleware plus
         any per-node middleware on the SubgraphNode wraps the subgraph
         dispatch as a single atomic call. The subgraph's INTERNAL nodes
         get their own middleware via the subgraph's own CompiledGraph;
@@ -1650,12 +1648,11 @@ async def _step_subgraph_node(
         events come from the subgraph's internal node executions (per
         fixture 013).
 
-        Per proposal-0012 v0.9.0 + spec coordination: edge errors
-        AFTER a transparent subgraph wrapper propagate to the caller
-        as ``RuntimeGraphError`` per §4 WITHOUT an associated
+        Edge errors AFTER a transparent subgraph wrapper propagate to
+        the caller as ``RuntimeGraphError`` WITHOUT an associated
         completed event — the wrapper has no started/completed pair
-        to share, and proposal 0012's "preceding node's pair" MUST
-        is vacuous (not violated) when the preceding unit emitted
+        to share, and the "preceding node's pair" MUST is vacuous
+        (not violated) when the preceding unit emitted
         no pair. The :class:`_StepResult` returned here uses
         :func:`_no_op_finalize` so the outer ``_invoke`` call to
         ``finalize_completed(edge_error)`` is a no-op.
@@ -1720,7 +1717,7 @@ async def _step_fan_out_node(
     ) -> _StepResult[StateT]:
         """Run one fan-out-as-node step through the parent's middleware chain.
 
-        Per pipeline-utilities §9.6: the parent's per-graph + per-node
+        The parent's per-graph + per-node
         middleware wraps the fan-out as a SINGLE dispatch — one started
         event before the fan-out begins, one completed event after all
         instances complete and fan-in is done. Per-instance events
@@ -1728,10 +1725,10 @@ async def _step_fan_out_node(
         post_state shape is the inner subgraph's state, and they carry
         ``fan_out_index`` populated.
 
-        Raw exceptions escaping the chain become NodeException per §4.
+        Raw exceptions escaping the chain become NodeException.
 
-        Per proposal-0012 v0.9.0: the fan-out's success-case
-        completed event fires AFTER edge eval (mirrors
+        The fan-out's success-case completed event fires AFTER edge
+        eval (mirrors
         ``_step_function_node``). Failure-path dispatches stay
         inline; the success-case is deferred via the returned
         :class:`_StepResult`.
@@ -2084,13 +2081,12 @@ async def _step_parallel_branches_node(
         """Run one parallel-branches-as-node step through the parent's
         middleware chain.
 
-        Per pipeline-utilities §11.6: the parent's per-graph +
+        The parent's per-graph +
         per-node middleware wraps the parallel-branches dispatch
         as a SINGLE unit — one started event before dispatch
         begins, one completed event after all branches complete
         and fan-in is done. Per-branch internal events come from
-        the branches' subgraph executions and carry ``branch_name``
-        per graph-engine §6.
+        the branches' subgraph executions and carry ``branch_name``.
 
         Mirrors ``_step_fan_out_node`` minus the eager
         count/concurrency resolution (parallel branches has no
@@ -2364,8 +2360,7 @@ async def _maybe_save_checkpoint(
         """Fire a checkpoint save for the just-completed node, if a
         backend is registered.
 
-        Per spec pipeline-utilities §10.3 (revised by proposal 0009 /
-        spec v0.18.0):
+        Save policy:
 
         - Save fires for outermost-graph nodes, subgraph-internal
           nodes, fan-out instance internal nodes, AND the fan-out
@@ -2378,7 +2373,7 @@ async def _maybe_save_checkpoint(
           ``fan_out_progress`` field projects this shared dict so
           all concurrent instances' snapshots are captured atomically.
 
-        Atomicity contract (§10.11): the save-call site below
+        Atomicity contract: the save-call site below
         completes the "produce contribution + record into accumulator
         + save" sequence the spec mandates. ``FanOutNode.run_with_context``
         flips an instance's state to ``completed`` and stashes its
@@ -2387,18 +2382,18 @@ async def _maybe_save_checkpoint(
         below leaves the in-memory dict updated but the persisted
         record showing ``in_flight``, so resume re-runs the instance
         and the append/last_write_wins/merge reducer's exactly-once
-        guarantee per §10.11.1 holds.
+        guarantee holds.
 
         Save also enumerates ALL concurrent fan-out instances when
         building ``fan_out_progress`` (not just the one whose
         ``completed`` event triggered this save) — the per-instance
-        snapshot is consistent across siblings, matching §10.11's
-        "captured when a sibling instance's ``completed`` event
-        triggers a save during this instance's execution" wording.
+        snapshot is consistent across siblings, captured when a
+        sibling instance's ``completed`` event triggers a save during
+        this instance's execution.
 
         After ``Checkpointer.save`` returns, dispatch a
-        ``checkpoint_saved`` observer event (per §10.8 SHOULD-level
-        guidance) so observability backends can surface saves as spans.
+        ``checkpoint_saved`` observer event so observability backends
+        can surface saves as spans.
 
         Save failures raise ``CheckpointSaveFailed`` to the caller of
         ``invoke()`` immediately; saves are NOT retried by the engine.
diff --git a/src/openarmature/graph/fan_out.py b/src/openarmature/graph/fan_out.py
index 48d545b..f5d3223 100644
--- a/src/openarmature/graph/fan_out.py
+++ b/src/openarmature/graph/fan_out.py
@@ -10,7 +10,7 @@
 
 This is the single place in the engine where multiple subgraph
 executions overlap in time within a single invocation; everywhere else
-(graph-engine §3) execution is single-threaded.
+execution is single-threaded.
 
 The module contains:
 
@@ -137,20 +137,20 @@ async def run_with_context(
         fan-in collected/extra fields, write count_field and
         errors_field if configured.
 
-        Per proposal 0009 / §10.11 per-instance resume contract: this
-        method registers a per-fan-out tracking entry on the shared
+        Per the per-instance resume contract: this method registers a
+        per-fan-out tracking entry on the shared
         ``context.fan_out_progress_state`` dict before dispatching,
         flips each instance's state through
         ``not_started -> in_flight -> completed`` as the instance
         progresses, and fires an explicit "instance completed" save
         after the per-instance contribution has been recorded into
-        the accumulator. The atomicity contract from §10.11 is
+        the accumulator. The atomicity contract is
         observed: the per-instance state mutation precedes the save,
         so a crash after mutation but before save leaves the saved
         record showing ``in_flight`` (resume re-runs the instance).
 
-        ``pre_resolved_count`` / ``pre_resolved_concurrency`` are the
-        proposal-0013 v0.10.0 hooks: when the engine has already
+        ``pre_resolved_count`` / ``pre_resolved_concurrency`` are
+        hooks: when the engine has already
         resolved the config eagerly to populate
         ``NodeEvent.fan_out_config`` for the fan-out node's events,
         it passes the resolved values in so callable resolvers
@@ -469,12 +469,12 @@ def _build_instance_states(
 ) -> list[Any]:
     """Project parent state to per-instance subgraph states.
 
-    Per spec §9.1:
+    By mode:
     - items_field mode: one instance per item, item_field gets the item
     - count mode: ``count`` instances, item_field absent
     - both modes: inputs map parent fields onto subgraph state fields
 
-    ``pre_resolved_count`` (proposal-0013 hook): if the engine has
+    ``pre_resolved_count``: if the engine has
     already resolved ``cfg.count`` to populate
     ``NodeEvent.fan_out_config.item_count``, the resolved value is
     passed in here so the callable resolver isn't invoked twice.
@@ -526,7 +526,7 @@ def _build_instance_states(
 
 
 def _resolve_count(node_name: str, cfg: FanOutConfig, parent_state: Any) -> int:
-    """Resolve the ``count`` config to an int. Spec §9.1."""
+    """Resolve the ``count`` config to an int."""
     raw = cfg.count
     if callable(raw):
         resolved = raw(parent_state)
@@ -545,7 +545,7 @@ def _resolve_count(node_name: str, cfg: FanOutConfig, parent_state: Any) -> int:
 
 
 def _resolve_concurrency(node_name: str, cfg: FanOutConfig, parent_state: Any) -> int | None:
-    """Resolve the ``concurrency`` config. Spec §9.2."""
+    """Resolve the ``concurrency`` config."""
     raw = cfg.concurrency
     if callable(raw):
         resolved = raw(parent_state)
@@ -647,7 +647,7 @@ async def _save_instance_in_flight(
     save only fires on successful merge (failure path skips it).
 
     Routes through the checkpointer's ``save_fan_out_in_flight_failure``
-    seam (when present) per §10.11.4. Batching backends typically
+    seam (when present). Batching backends typically
     buffer this save WITHOUT triggering a flush — the "crash" the
     failure represents would lose the buffer, including this save,
     in a real-world scenario. Non-batching backends route it through
@@ -692,17 +692,17 @@ async def _save_instance_completed(
     parent_state: Any,
     context: _InvocationContext,
 ) -> None:
-    """Fire the explicit "instance completed" save closing the §10.11
+    """Fire the explicit "instance completed" save closing the
     atomicity gap. The per-instance state has already been flipped to
     ``completed`` with ``result`` populated; this save durably records
     that transition so resume can skip the instance.
 
-    Routed through the fan-out-internal batching seam per §10.11.4 —
+    Routed through the fan-out-internal batching seam —
     backends opting into batching may buffer the save; non-batching
     backends call ``save`` directly. On crash with buffered-but-
     unflushed saves, the instance reverts to ``in_flight`` /
     ``not_started`` on resume and re-runs (contributing for the first
-    time, no double-merge per §10.11.1).
+    time, no double-merge).
     """
     # Lazy imports: ``compiled`` and ``checkpoint.protocol`` would
     # create textual cycles at module-load. Function-scope keeps the
@@ -773,8 +773,8 @@ def _fan_in_fail_fast(
 ) -> dict[str, Any]:
     """Merge per-instance partials into a single fan-out partial under
     the fail_fast policy. All ``results`` succeeded (otherwise gather
-    would have raised), so the count is just ``len(results)``. Spec
-    §9.3 + §9.4: instance-index order."""
+    would have raised), so the count is just ``len(results)``;
+    instance-index order."""
     # §9.4 projection: read each instance's subgraph-space partial by
     # subgraph field name and collect into the parent field. ``.get`` keeps
     # an omitted collect_field (a callable degrade that doesn't set it, §9.3)
@@ -796,7 +796,7 @@ def _fan_in_collect(
 ) -> dict[str, Any]:
     """Merge per-instance results under the collect policy. Failures
     contribute nothing to target_field; if errors_field is configured,
-    failed instances' exceptions are recorded there. Spec §9.5."""
+    failed instances' exceptions are recorded there."""
     successes: list[Mapping[str, Any]] = []
     error_records: list[dict[str, str]] = []
     for idx, r in enumerate(raw_results):
diff --git a/src/openarmature/graph/observer.py b/src/openarmature/graph/observer.py
index 283c76f..d1d9ea7 100644
--- a/src/openarmature/graph/observer.py
+++ b/src/openarmature/graph/observer.py
@@ -110,16 +110,16 @@ async def log_observer(event: NodeEvent | MetadataAugmentationEvent) -> None:
       ``fan_out_index``, ``branch_name``) so rich backends can update
       their open observations in place
       (``span.set_attribute(openarmature.user.<key>, v)`` for OTel,
-      ``observation.update(metadata=...)`` for Langfuse). Per spec §6
-      this variant is NOT subject to the ``phases`` filter — every
+      ``observation.update(metadata=...)`` for Langfuse). This variant
+      is NOT subject to the ``phases`` filter — every
       subscribed observer sees it and isinstance-narrows to decide
       whether to act. Simple user observers typically early-return
       after ``isinstance(event, NodeEvent)`` checks.
     - :class:`InvocationStartedEvent` — emitted once per invocation
       before any node fires. Carries the engine-constructed
       ``initial_state`` so Trace-level backends (Langfuse) can
-      populate ``trace.input`` via the proposal 0043 three-lever
-      decision tree. NOT subject to the ``phases`` filter; OTel-only
+      populate ``trace.input`` via the three-lever decision tree. NOT
+      subject to the ``phases`` filter; OTel-only
       observers ignore it via the isinstance gate.
     - :class:`InvocationCompletedEvent` — emitted once per invocation
       after the last node fires (on both the success path and the
@@ -283,10 +283,10 @@ class _QueuedItem:
     without the worker needing to know the graph topology.
 
     ``event`` is the union of ``NodeEvent`` (started / completed /
-    checkpoint phases), ``MetadataAugmentationEvent`` (proposal 0040,
-    side-channel augmentation), and the two invocation-boundary
-    events ``InvocationStartedEvent`` / ``InvocationCompletedEvent``
-    (proposal 0043, Trace-level input/output sourcing). The delivery
+    checkpoint phases), ``MetadataAugmentationEvent`` (side-channel
+    augmentation), and the two invocation-boundary events
+    ``InvocationStartedEvent`` / ``InvocationCompletedEvent``
+    (Trace-level input/output sourcing). The delivery
     worker branches by type to apply the right delivery contract
     (phase-filter for ``NodeEvent``, no filter for the other three).
     """
@@ -346,8 +346,8 @@ class DrainSummary:
 
     The spec-mandated minimum is these two fields. Implementations MAY
     extend the shape with diagnostic detail (per-observer counts,
-    sampled event metadata) in subsequent versions; v0.19.0 ships the
-    minimum.
+    sampled event metadata) in subsequent versions; this version ships
+    the minimum.
     """
 
     undelivered_count: int
@@ -367,9 +367,9 @@ class _FanOutInstanceState:
     not_started -> in_flight -> completed.
 
     - ``result`` holds the per-instance contribution to the fan-out
-      accumulator, set when ``state == "completed"``. Per spec
-      §10.11 this is "the value contributed to the ``target_field``
-      bucket" (success path) or "the error entry contributed to the
+      accumulator, set when ``state == "completed"``: "the value
+      contributed to the ``target_field`` bucket" (success path) or
+      "the error entry contributed to the
       ``errors_field`` bucket" (collect-mode failure). The harness
       projects this into the frozen ``FanOutInstanceProgress.result``
       verbatim.
@@ -392,7 +392,7 @@ class _FanOutInstanceState:
       execution. Captures the instance's progress for observational
       purposes when an in_flight save snapshot fires; not used as a
       resume re-entry point (the instance re-enters at its subgraph's
-      declared entry node per §10.7).
+      declared entry node).
     """
 
     state: Literal["completed", "in_flight", "not_started"] = "not_started"
@@ -570,8 +570,8 @@ class _InvocationContext:
 
     def full_observers(self) -> tuple[SubscribedObserver, ...]:
         """Return the ordered observer list to deliver for events from
-        this depth. Per spec §6: graph-attached (outermost → innermost),
-        then invocation-scoped (passed to the outermost invoke)."""
+        this depth: graph-attached (outermost → innermost), then
+        invocation-scoped (passed to the outermost invoke)."""
         return self.graph_attached + self.invocation_scoped
 
     def descend_into_subgraph(
@@ -593,7 +593,7 @@ def descend_into_subgraph(
 
         Checkpointing fields propagate unchanged: subgraph-internal
         nodes save to the same backend with the same invocation_id
-        (per spec §10.3; one save per inner-node completion).
+        (one save per inner-node completion).
         """
         return _InvocationContext(
             queue=self.queue,
@@ -638,10 +638,10 @@ def descend_into_fan_out_instance(
 
         Same shape as ``descend_into_subgraph`` but stamps the fan-out
         index onto the new context so every inner-node event carries it.
-        Per spec §9 the index is the instance's 0-based position.
+        The index is the instance's 0-based position.
 
-        Per pipeline-utilities §10.3 (revised by proposal 0009): fan-out
-        instance internal nodes DO produce checkpoint saves. The
+        Fan-out instance internal nodes DO produce checkpoint saves.
+        The
         checkpointer reference propagates unchanged so an inner node's
         ``completed`` event triggers a save; the engine's save path
         projects the shared ``fan_out_progress_state`` into the record's
@@ -695,8 +695,8 @@ def descend_into_parallel_branch(
         """Build the context for one parallel-branches branch's
         subgraph invocation.
 
-        Per pipeline-utilities §11.6 the parallel-branches node looks
-        to outer middleware like a single dispatch; inner-branch
+        The parallel-branches node looks to outer middleware like a
+        single dispatch; inner-branch
         events come from the branch's subgraph execution. Stamps the
         namespace prefix with the parallel-branches node name so
         inner events nest under it (mirrors
@@ -706,11 +706,11 @@ def descend_into_parallel_branch(
         the ``observability.correlation._branch_name_var`` ContextVar
         — set inside the branch's task closure so ``copy_context``
         inherits it through the subgraph's execution.  The PER-DEPTH
-        ``branch_name_chain`` (proposal 0045) is extended here on the
+        ``branch_name_chain`` is extended here on the
         context so the engine can drive the chain ContextVar at
         every inner-node execution site.
 
-        Per §11.9 / §10.7 atomic-restart: drops the checkpointer
+        Atomic-restart: drops the checkpointer
         and pending_resume_states (a crash mid-dispatch re-runs the
         whole parallel-branches node from scratch on resume; the
         branches' inner saves wouldn't be useful).
@@ -776,18 +776,18 @@ def _dispatch(
       engine-task scope (e.g., the OTel observer setting
       ``current_active_observer_span`` for the engine to attach into
       the OTel context) can do so before the node body runs.
-    - :class:`MetadataAugmentationEvent` (proposal 0040): a side-
-      channel augmentation event emitted by
+    - :class:`MetadataAugmentationEvent`: a side-channel augmentation
+      event emitted by
       ``set_invocation_metadata`` mid-invocation. Bypasses the
       ``prepare_sync`` branch entirely — the sync-prep contract is
       anchored on ``"started"``, which only ``NodeEvent`` carries.
       Queued onto the same serial worker so observers see it in
       strict order with the surrounding node events.
     - :class:`InvocationStartedEvent` /
-      :class:`InvocationCompletedEvent` (proposal 0043): invocation-
-      boundary events the engine enqueues at invocation entry / exit
-      so Trace-level backends can populate ``trace.input`` /
-      ``trace.output`` via the §8.4.1 three-lever decision tree.
+      :class:`InvocationCompletedEvent`: invocation-boundary events the
+      engine enqueues at invocation entry / exit so Trace-level
+      backends can populate ``trace.input`` / ``trace.output`` via the
+      three-lever decision tree.
       Bypass ``prepare_sync`` (same rationale as
       ``MetadataAugmentationEvent``: not a node-phase event).
 
@@ -883,10 +883,10 @@ async def deliver_loop(
       the event's phase do NOT receive it. Phase filter applies at
       delivery, not dispatch; the engine still produces both events
       for every attempt.
-    - For :class:`MetadataAugmentationEvent` (proposal 0040) and the
-      two invocation-boundary events :class:`InvocationStartedEvent`
-      / :class:`InvocationCompletedEvent` (proposal 0043), the
-      ``phases`` filter is bypassed entirely — none of those are
+    - For :class:`MetadataAugmentationEvent` and the two
+      invocation-boundary events :class:`InvocationStartedEvent` /
+      :class:`InvocationCompletedEvent`, the ``phases`` filter is
+      bypassed entirely — none of those are
       node-phase events, so every subscribed observer receives them
       regardless of ``phases``. Observers ``isinstance``-narrow on
       the first line and choose whether to act.

From 3b4b84154db4570aa3278d03683117753b7e42cc Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 11:00:07 -0700
Subject: [PATCH 08/15] Strip spec refs from conformance test docstrings

---
 tests/conformance/adapter.py                  | 16 +++++------
 tests/conformance/harness/directives.py       | 28 +++++++++----------
 tests/conformance/harness/expectations.py     |  2 +-
 .../harness/llm_attribute_assertions.py       |  2 +-
 tests/conformance/harness/wire.py             |  8 +++---
 tests/conformance/middleware_seam.py          | 10 +++----
 tests/conformance/test_checkpoint.py          | 25 ++++++++---------
 tests/conformance/test_llm_provider.py        |  6 ++--
 .../test_observability_langfuse.py            |  6 ++--
 tests/conformance/test_pipeline_utilities.py  |  6 ++--
 tests/conformance/test_prompt_management.py   |  6 ++--
 tests/conformance/test_state_migration.py     |  9 +++---
 12 files changed, 61 insertions(+), 63 deletions(-)

diff --git a/tests/conformance/adapter.py b/tests/conformance/adapter.py
index cb83258..5171d7a 100644
--- a/tests/conformance/adapter.py
+++ b/tests/conformance/adapter.py
@@ -600,7 +600,7 @@ class _TracingFanOutNode(FanOutNode[State, State]):
     """Conformance helper: a FanOutNode that appends its name to a shared
     trace list when the engine runs it. Same role as _TracingSubgraphNode
     for subgraphs — a fan-out node is one engine step from the parent's
-    POV (per §9.6), so it should contribute exactly one trace entry."""
+    POV, so it should contribute exactly one trace entry."""
 
     trace_list: list[str] = field(default_factory=list[str])
 
@@ -626,7 +626,7 @@ class _TracingParallelBranchesNode(ParallelBranchesNode[State]):
     """Conformance helper: a ParallelBranchesNode that appends its name
     to the shared trace list once when the engine runs it. The
     parallel-branches dispatcher itself counts as one engine step from
-    the parent's POV per §11.6, mirroring the fan-out tracing wrapper."""
+    the parent's POV, mirroring the fan-out tracing wrapper."""
 
     trace_list: list[str] = field(default_factory=list[str])
 
@@ -726,7 +726,7 @@ def build_graph(
 
     `node_middleware` (mapping node name to ordered middleware list) and
     `graph_middleware` (ordered middleware list applied to every node)
-    are pipeline-utilities §3 hooks. The translation from a fixture's
+    are middleware hooks. The translation from a fixture's
     `middleware:` block into actual instances lives in the
     pipeline-utilities test driver.
 
@@ -868,8 +868,8 @@ class ObserverFixture:
     YAML. None means "no `phases:` key was present" — the harness leaves
     the engine to default to both phases.
 
-    `sleep_ms_per_event` configures the slow-observer directive (proposal
-    0010 §6 Drain conformance). When `None`, the observer runs at full
+    `sleep_ms_per_event` configures the slow-observer directive. When
+    `None`, the observer runs at full
     speed. An int means a constant sleep per event. A dict with
     `first_invocation` / `subsequent_invocations` keys is invocation-
     counter-aware: the first invocation through this observer uses the
@@ -912,7 +912,7 @@ def _record_event(event: NodeEvent) -> dict[str, Any]:
 
 def _resolve_sleep_ms(fixture: ObserverFixture) -> int:
     """Resolve the per-event sleep duration in ms for the slow-observer
-    directive (proposal 0010 §6 Drain). `None` and `0` mean no sleep;
+    directive. `None` and `0` mean no sleep;
     an int form is constant; a dict form selects by `invocation_counter`.
     """
     spec = fixture.sleep_ms_per_event
@@ -942,8 +942,8 @@ def make_observer_fn(
     so the engine's error isolation can be verified by checking that
     subsequent observers/events still get through.
 
-    Honors `fixture.sleep_ms_per_event` per the proposal 0010 slow-
-    observer directive: each event awaits `asyncio.sleep(ms / 1000)`
+    Honors `fixture.sleep_ms_per_event` per the slow-observer
+    directive: each event awaits `asyncio.sleep(ms / 1000)`
     BEFORE recording, so a drain timeout that cancels mid-sleep leaves
     the event unrecorded and the counter shows it as undelivered.
     """
diff --git a/tests/conformance/harness/directives.py b/tests/conformance/harness/directives.py
index b7c22e6..d3f8577 100644
--- a/tests/conformance/harness/directives.py
+++ b/tests/conformance/harness/directives.py
@@ -319,12 +319,12 @@ class ParallelBranchSpec(_AllowExtras):
 
 
 class ParallelBranchesSpec(_AllowExtras):
-    """``parallel_branches:`` block on a NodeSpec (pipeline-utilities §11).
+    """``parallel_branches:`` block on a NodeSpec.
 
     Mirrors :class:`FanOutSpec` but topology-driven: M heterogeneous
     branches, each referencing a different compiled subgraph by name
     against the case's top-level ``subgraphs:`` block. Branch insertion
-    order is preserved per §11.8.
+    order is preserved.
     """
 
     branches: dict[str, ParallelBranchSpec]
@@ -333,8 +333,8 @@ class ParallelBranchesSpec(_AllowExtras):
 
 
 class RuntimeConfigSpec(_AllowExtras):
-    """``calls_llm.config`` block — mirrors ``RuntimeConfig`` (llm-provider
-    §6). Used by observability fixtures 016-018 (request-parameter and
+    """``calls_llm.config`` block — mirrors ``RuntimeConfig``. Used by
+    observability fixtures 016-018 (request-parameter and
     extras emission) and by the GenAI semconv set.
 
     Each field maps one-to-one to ``openarmature.llm.response.RuntimeConfig``
@@ -358,7 +358,7 @@ class CallsLlmSpec(_AllowExtras):
     and stores the response (assistant content) in ``stores_response_in``.
     Used by observability fixtures to verify LLM-provider span emission.
 
-    ``config`` (proposal 0024, fixtures 016-018) carries the optional
+    ``config`` (fixtures 016-018) carries the optional
     ``RuntimeConfig`` field set for the call — temperature, max_tokens,
     top_p, seed, and a provider-specific ``extras`` bag.
     """
@@ -396,7 +396,7 @@ class NodeSpec(_ForbidExtras):
       ``error_category``.
     - ``subgraph`` — references a top-level ``subgraph``/``subgraphs``
       definition by name. Companions: ``inputs``, ``outputs`` for explicit
-      mapping (spec v0.2 §2).
+      mapping.
     - ``fan_out`` — see :class:`FanOutSpec`.
     - ``flaky`` and the four ``flaky_*`` variants — harness mocks for
       retry/checkpoint behaviours.
@@ -407,7 +407,7 @@ class NodeSpec(_ForbidExtras):
     - ``emits_log`` — fires a log record with the node's update.
     - ``also_emits_via_global_tracer`` — fires a span on the OTel global
       provider (used to verify isolation).
-    - ``middleware`` — per-node middleware list (spec v0.5 §3).
+    - ``middleware`` — per-node middleware list.
     """
 
     # Primary directives — exactly one of these must be set.
@@ -517,8 +517,8 @@ class TraceRecorderMiddleware(_AllowExtras):
 
 
 class FailureIsolationMiddleware(_AllowExtras):
-    """Canonical failure-isolation middleware (proposal 0050 §6.3,
-    fixtures 058-063). Catches an exception escaping the inner chain and
+    """Canonical failure-isolation middleware (fixtures 058-063).
+    Catches an exception escaping the inner chain and
     returns a configured degraded partial update, emitting a distinct
     ``FailureIsolatedEvent``."""
 
@@ -577,7 +577,7 @@ class MockResponse(_AllowExtras):
     Permissive shape because the body's content mirrors OpenAI's wire
     format which is wide and evolving; modelling every field would
     duplicate the OpenAI schema. The ``llm-provider`` capability's
-    spec.md §8.1 is the authoritative shape.
+    spec is the authoritative shape.
     """
 
     status: int | None = None
@@ -617,10 +617,10 @@ class ObserverSpec(_ForbidExtras):
     - ``target`` is ``outer`` (outermost graph) or a subgraph name.
     - ``behavior`` is ``record`` (capture events for assertion) or
       ``raise`` (raise to verify error isolation).
-    - ``phases`` (optional, spec v0.6 §6) — subset of ``{"started",
-      "completed"}`` for per-observer phase subscription.
-    - ``sleep_ms_per_event`` (proposal 0010 §6 Drain conformance) — the
-      slow-observer directive. An int means a constant sleep per
+    - ``phases`` (optional) — subset of ``{"started", "completed"}``
+      for per-observer phase subscription.
+    - ``sleep_ms_per_event`` — the slow-observer directive. An int
+      means a constant sleep per
       event; a dict with ``first_invocation`` / ``subsequent_invocations``
       keys selects per invocation index (used by fixture 024 to slow
       only the first invocation).
diff --git a/tests/conformance/harness/expectations.py b/tests/conformance/harness/expectations.py
index a78db84..706586e 100644
--- a/tests/conformance/harness/expectations.py
+++ b/tests/conformance/harness/expectations.py
@@ -35,7 +35,7 @@ class GraphEngineExpected(_ForbidExtras):
     """Expected block for graph-engine fixtures (001–018).
 
     Top-level keys union'd across every fixture in
-    ``spec/graph-engine/conformance/`` at v0.8.0.
+    ``spec/graph-engine/conformance/``.
     """
 
     final_state: dict[str, Any] | None = None
diff --git a/tests/conformance/harness/llm_attribute_assertions.py b/tests/conformance/harness/llm_attribute_assertions.py
index 6e76982..111c179 100644
--- a/tests/conformance/harness/llm_attribute_assertions.py
+++ b/tests/conformance/harness/llm_attribute_assertions.py
@@ -12,7 +12,7 @@
 #   attribute_truncation: {attr: {max_bytes, marker_pattern,
 #                                 utf8_valid,
 #                                 prefix_of_full_serialization}}
-"""Assertion helpers for the v0.17.0 LLM span attribute fixtures."""
+"""Assertion helpers for the LLM span attribute fixtures."""
 
 from __future__ import annotations
 
diff --git a/tests/conformance/harness/wire.py b/tests/conformance/harness/wire.py
index 0919d47..8cc7bd6 100644
--- a/tests/conformance/harness/wire.py
+++ b/tests/conformance/harness/wire.py
@@ -100,10 +100,10 @@ def assert_response_format_absent(body: Mapping[str, Any]) -> None:
 def assert_tool_choice_absent(body: Mapping[str, Any]) -> None:
     """Assert the wire body has no ``tool_choice`` key.
 
-    Per spec §8.1.1 (proposal 0025): when the caller omits
-    ``tool_choice`` from the ``complete()`` call, the wire body MUST
-    omit the field entirely so the OpenAI provider's own default
-    applies. Mirrors :func:`assert_response_format_absent`'s pattern.
+    When the caller omits ``tool_choice`` from the ``complete()`` call,
+    the wire body MUST omit the field entirely so the OpenAI provider's
+    own default applies. Mirrors
+    :func:`assert_response_format_absent`'s pattern.
     """
     if "tool_choice" in body:
         raise AssertionError(
diff --git a/tests/conformance/middleware_seam.py b/tests/conformance/middleware_seam.py
index 9c5e6fb..dd07a91 100644
--- a/tests/conformance/middleware_seam.py
+++ b/tests/conformance/middleware_seam.py
@@ -81,8 +81,8 @@ async def __call__(self, state: Any, next_: NextCall) -> Mapping[str, Any]:
 class ShortCircuitMiddleware:
     """Returns the configured partial without calling `next`.
 
-    Per spec §2: the rest of the chain — subsequent middleware and the
-    wrapped node — does not execute. The short-circuiting middleware's
+    The rest of the chain — subsequent middleware and the wrapped node
+    — does not execute. The short-circuiting middleware's
     own post-phase is also skipped (because there's no `await next`
     return point to pass through).
     """
@@ -98,8 +98,8 @@ async def __call__(self, state: Any, next_: NextCall) -> Mapping[str, Any]:
 class ErrorRecoveryMiddleware:
     """Catches any Exception from `next`; returns the configured partial.
 
-    Per spec §5: middleware MAY catch an exception and return a partial
-    update instead of re-raising. The engine treats the dispatch as a
+    Middleware MAY catch an exception and return a partial update
+    instead of re-raising. The engine treats the dispatch as a
     success (post_state populated, no error in the completed event).
     """
 
@@ -117,7 +117,7 @@ class ErrorRaiserMiddleware:
     """Raises a configured exception in the pre-phase.
 
     Verifies that middleware-raised exceptions surface as
-    ``node_exception`` per graph-engine §4.
+    ``node_exception``.
     """
 
     def __init__(self, *, message: str) -> None:
diff --git a/tests/conformance/test_checkpoint.py b/tests/conformance/test_checkpoint.py
index 516af65..d90303f 100644
--- a/tests/conformance/test_checkpoint.py
+++ b/tests/conformance/test_checkpoint.py
@@ -1,9 +1,9 @@
 """Run every spec checkpoint conformance fixture (024-031, 048-054)
 against the engine.
 
-Phase 5 / proposal-0009 scope: pipeline-utilities §10. Drives the real
-:class:`InMemoryCheckpointer` (with optional fan-out internal save
-batching per §10.11.4) through the engine's save+resume path end-to-end,
+Drives the real :class:`InMemoryCheckpointer` (with optional fan-out
+internal save batching) through the engine's save+resume path
+end-to-end,
 asserting against the fixture's ``saved_record_assertions`` (including
 ``fan_out_progress`` matchers), ``expected.checkpoint_saves``,
 ``invariants``, and resume expectations (including per-instance
@@ -17,15 +17,14 @@
 - 026 record-shape — supported.
 - 027 attempt-index-resets-on-resume — needs a resume-aware
   ``flaky_resume_aware`` test seam in the adapter; deferred.
-- 028 fan-out-atomic-restart — REMOVED in spec v0.18.0 (replaced by
-  per-instance resume contract). The fixture file no longer exists.
+- 028 fan-out-atomic-restart — REMOVED (replaced by the per-instance
+  resume contract). The fixture file no longer exists.
 - 029 subgraph-resume — supported (uses plain ``flaky``).
 - 030 checkpoint-not-found — supported.
 - 031 correlation-id-preserved-across-resume — record-level
   assertions supported here; the OTel span/log assertions are
   gated until Phase 6 lands the observability mapping.
-- 048-054 per-instance fan-out resume contract (proposal 0009) —
-  supported.
+- 048-054 per-instance fan-out resume contract — supported.
 """
 
 from __future__ import annotations
@@ -130,7 +129,7 @@ class _CapturingCheckpointer:
     in order so the harness can assert against the fixture's
     ``expected.checkpoint_saves`` block. Implements the
     :class:`Checkpointer` Protocol shape AND the optional
-    ``save_fan_out_internal`` hook (per §10.11.4 batching) so the
+    ``save_fan_out_internal`` hook (batching) so the
     engine routes inner-instance saves here.
 
     ``abort_after_instance``: when set, the wrapper raises
@@ -138,8 +137,8 @@ class _CapturingCheckpointer:
     the named instance index from ``not_started`` / ``in_flight`` to
     ``completed``. Simulates a crash at that exact point — used by
     fixture 052 to test collect-mode error-record rollforward, and by
-    the ``crash_injection: {after_fan_out_instance}`` directive (proposal
-    0070). ``abort_after_node``: the same simulated crash AFTER the save
+    the ``crash_injection: {after_fan_out_instance}`` directive.
+    ``abort_after_node``: the same simulated crash AFTER the save
     that records the named node in ``completed_positions`` — the
     ``crash_injection: {after_node}`` boundary.
     """
@@ -317,7 +316,7 @@ def _build_capturing(spec: Mapping[str, Any]) -> _CapturingCheckpointer:
     The fixture's ``checkpointer`` field accepts two shapes:
     - ``"in_memory"``: default no-batching backend.
     - ``{kind: in_memory_batched, fan_out_internal_save_batching: {flush_every: N}}``:
-      the §10.11.4 batched backend with N-save flush interval.
+      the batched backend with N-save flush interval.
 
     The fixture's fan-out node may also carry ``abort_after_instance: N``
     — a harness-level directive that simulates a crash after the named
@@ -913,8 +912,8 @@ def _assert_saved_record_from(
     :func:`_assert_saved_record` but the caller supplies the record
     directly (used for fixtures where the assertion targets the
     loaded record rather than the last in-memory save call —
-    e.g., the §10.11.4 batching case where buffered saves are
-    invisible to ``load``)."""
+    e.g., the batching case where buffered saves are invisible to
+    ``load``)."""
     if "completed_positions" in block:
         expected_positions = cast("list[Mapping[str, Any]]", block["completed_positions"])
         actual = [
diff --git a/tests/conformance/test_llm_provider.py b/tests/conformance/test_llm_provider.py
index 73dd9c9..0059053 100644
--- a/tests/conformance/test_llm_provider.py
+++ b/tests/conformance/test_llm_provider.py
@@ -4,8 +4,8 @@
 behavior in terms of OpenAI Chat Completions wire-format mock
 responses + expected ``Provider.complete()`` / ``Provider.ready()``
 outcomes. The harness drives the real :class:`OpenAIProvider` via
-``httpx.MockTransport`` so the wire-mapping path (spec §8.1) is
-exercised end-to-end — fixture 005 explicitly tests that mapping, so
+``httpx.MockTransport`` so the wire-mapping path is exercised
+end-to-end — fixture 005 explicitly tests that mapping, so
 mocking at the Provider boundary would skip what we want to verify.
 
 Fixture shapes the harness handles:
@@ -269,7 +269,7 @@ def _build_tool_choice(raw: Any) -> ToolChoice | None:
     """Translate a fixture's ``tool_choice:`` value into the
     :class:`ToolChoice` discriminated-union value.
 
-    Two YAML shapes per spec proposal 0025:
+    Two YAML shapes:
 
     - String: ``auto`` / ``required`` / ``none`` — passes through
       verbatim.
diff --git a/tests/conformance/test_observability_langfuse.py b/tests/conformance/test_observability_langfuse.py
index c9f17e5..0c10fb5 100644
--- a/tests/conformance/test_observability_langfuse.py
+++ b/tests/conformance/test_observability_langfuse.py
@@ -354,9 +354,9 @@ class _MockPromptBackend:
     - ``mock_with_langfuse_reference``: attaches the supplied
       ``langfuse_prompt_reference`` sentinel under
       ``Prompt.observability_entities['langfuse_prompt']``. Verifies
-      §8.4.4 case 1 (Generation linked to Prompt entity).
-    - ``filesystem``: no Langfuse reference attached. Verifies §8.4.4
-      case 2 (metadata-only).
+      the Generation-linked-to-Prompt-entity case.
+    - ``filesystem``: no Langfuse reference attached. Verifies the
+      metadata-only case.
     """
 
     def __init__(self, prompts: dict[str, dict[str, Any]], *, with_langfuse_reference: bool) -> None:
diff --git a/tests/conformance/test_pipeline_utilities.py b/tests/conformance/test_pipeline_utilities.py
index b488fbf..616f609 100644
--- a/tests/conformance/test_pipeline_utilities.py
+++ b/tests/conformance/test_pipeline_utilities.py
@@ -1,6 +1,6 @@
 """Run every spec pipeline-utilities conformance fixture against the engine.
 
-Phase 2 scope (proposal 0004 middleware): fixtures 001-016. Fixtures
+Phase 2 scope (middleware): fixtures 001-016. Fixtures
 017-019 (fan-out) and 020-021 (fan-out + middleware composition) skip
 via `_unsupported_directive` until Phase 3 lands the fan-out runtime.
 Fixtures 022-031 (fan-out and checkpointing) similarly skip until their
@@ -319,7 +319,7 @@ def _translate_parallel_branches_branch_middleware(
     """Walk ``spec.nodes`` for parallel_branches blocks with per-branch
     ``middleware:`` and translate each into a list of Middleware
     instances. Returned map is keyed by parallel-branches node name
-    then branch name (per spec §11.7 branch middleware) and consumed by
+    then branch name (branch middleware) and consumed by
     build_graph's ``parallel_branches_branch_middleware`` kwarg."""
     out: dict[str, dict[str, list[Middleware]]] = {}
     nodes = cast("dict[str, dict[str, Any]]", spec.get("nodes") or {})
@@ -849,7 +849,7 @@ def _collect_parallel_branches_errors_fields(spec: Mapping[str, Any]) -> set[str
     """Return the set of parent-state field names used as
     ``errors_field`` on any parallel_branches node in ``spec``.
 
-    Per spec §11.1 ``errors_field`` carries an implementation-defined
+    The ``errors_field`` carries an implementation-defined
     record shape; the spec only mandates ``branch_name`` + category. The
     engine's record carries additional engine-defined keys (``message``,
     ``cause_type``). Fixtures asserting against ``errors_field`` records
diff --git a/tests/conformance/test_prompt_management.py b/tests/conformance/test_prompt_management.py
index d2e6884..62aa0e4 100644
--- a/tests/conformance/test_prompt_management.py
+++ b/tests/conformance/test_prompt_management.py
@@ -69,10 +69,10 @@ def _fixture_id(path: Path) -> str:
 def _segment_from_fixture(entry: dict[str, Any]) -> Any:
     """Map one ``chat_template`` entry from a fixture YAML to an OA
     ChatSegment.  Uses ``model_construct`` to bypass construction-time
-    §11 validators — the harness exists to test render-time behavior,
+    validators — the harness exists to test render-time behavior,
     including fixtures that intentionally build prompts violating
     construction-time invariants (placeholder regex, role-block
-    compat).  Render-time enforcement (§11 spec-normative trigger)
+    compat).  Render-time enforcement (the spec-normative trigger)
     still runs; only the construction-time ergonomic-only check is
     bypassed.
 
@@ -166,7 +166,7 @@ def _message_from_fixture(entry: dict[str, Any]) -> Message:
     """Map one fixture placeholder-list entry to an OA ``Message``.
 
     Placeholder injection carries caller-supplied ``Message`` lists
-    so all four llm-provider §3 roles are valid here (``system`` /
+    so all four llm-provider roles are valid here (``system`` /
     ``user`` / ``assistant`` / ``tool``).  Unknown or misspelled
     roles raise rather than silently coerce to user — fail-closed
     posture symmetric to the Langfuse backend's mapper.
diff --git a/tests/conformance/test_state_migration.py b/tests/conformance/test_state_migration.py
index 6f9f30a..ca37460 100644
--- a/tests/conformance/test_state_migration.py
+++ b/tests/conformance/test_state_migration.py
@@ -9,12 +9,11 @@
 specifying either an ``expected`` happy-path or an
 ``expected_error`` raise.
 
-Fixture 047 (``state-migration-chain-ambiguous``, added in spec
-v0.16.0 / proposal 0018) exercises the
+Fixture 047 (``state-migration-chain-ambiguous``) exercises the
 ``expected_chain_ambiguity_error`` harness primitive that accepts
 the canonical ``checkpoint_state_migration_chain_ambiguous``
 category at EITHER build time (duplicate-pair registration) or
-resume time (multi-shortest-path detection) per spec §10.12.2's
+resume time (multi-shortest-path detection), per the
 compile-time-SHOULD / load-time-acceptable carve-out.
 
 The driver:
@@ -232,8 +231,8 @@ async def _run_one_case(case: dict[str, Any], tmp_path: Path) -> None:
     OR inside ``resume:`` (load-time detection: registration
     succeeds and BFS raises during the resume attempt). The driver
     wraps both phases in try/except so the canonical category
-    surfaces from either timing per spec §10.12.2's compile-time-
-    SHOULD / load-time-acceptable carve-out.
+    surfaces from either timing, per the compile-time-SHOULD /
+    load-time-acceptable carve-out.
     """
     state_cls = _build_state_cls(case["state"], model_name=f"Case_{case['name']}")
 

From 214abfb626ca4b7f07c7d2b65d909cde6029322e Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 11:02:18 -0700
Subject: [PATCH 09/15] Strip spec refs from test_observability docstrings

---
 tests/conformance/test_observability.py | 56 ++++++++++++-------------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/tests/conformance/test_observability.py b/tests/conformance/test_observability.py
index c6b7bcb..be17996 100644
--- a/tests/conformance/test_observability.py
+++ b/tests/conformance/test_observability.py
@@ -4,14 +4,14 @@
 
 - **001-basic-trace** (Phase 6.0) — full span shape.
 - **002-subgraph-hierarchy** (PR-C) — synthetic dispatch span +
-  inner-node parenting per §4.5.
-- **003-error-status** (PR-C) — §4.2 ERROR status mapping for the
+  inner-node parenting.
+- **003-error-status** (PR-C) — ERROR status mapping for the
   ``node_exception`` case.
-- **005-llm-provider-span-nested** (Phase 6.0) — §5.5 LLM span +
-  ``disable_llm_spans`` opt-out + §6 TracerProvider isolation.
+- **005-llm-provider-span-nested** (Phase 6.0) — LLM span +
+  ``disable_llm_spans`` opt-out + TracerProvider isolation.
 - **007-retry-attempt-spans** (PR-C) — sibling attempt spans with
   per-attempt ``attempt_index`` under retry middleware.
-- **008-detached-trace-mode** (Phase 6.0) — §4.4 detached subgraph
+- **008-detached-trace-mode** (Phase 6.0) — detached subgraph
   + detached fan-out + cross-trace ``correlation_id``.
 - **009-correlation-id-cross-cutting** (Phase 6.0) — every span
   carries ``openarmature.correlation_id``; back-to-back
@@ -350,7 +350,7 @@ async def _run_fixture_001(spec: Mapping[str, Any]) -> None:
 
 
 async def _run_fixture_002(spec: Mapping[str, Any]) -> None:
-    """Spec §4.5: the subgraph wrapper synthesizes a dispatch span;
+    """The subgraph wrapper synthesizes a dispatch span;
     inner-node spans parent under it; the dispatch span parents
     under the invocation."""
     observer, exporter = _build_observer()
@@ -428,7 +428,7 @@ async def _run_fixture_002(spec: Mapping[str, Any]) -> None:
 
 
 async def _run_fixture_003(spec: Mapping[str, Any]) -> None:
-    """Spec §4.2: a node-exception failure produces an ERROR span
+    """A node-exception failure produces an ERROR span
     with the canonical category in the description, an exception
     event recorded, and the ``openarmature.error.category``
     attribute. Sibling spans before the failure stay OK; the
@@ -494,8 +494,8 @@ async def _run_fixture_003(spec: Mapping[str, Any]) -> None:
 
 
 async def _run_fixture_004(spec: Mapping[str, Any]) -> None:
-    """Spec §4.2 + spec v0.9.0 / proposal 0012: routing errors land on
-    the preceding node's ``completed`` event with ``error`` populated
+    """Routing errors land on the preceding node's ``completed`` event
+    with ``error`` populated
     (sharing the started/completed pair rather than producing a
     separate one). The OTel observer's existing
     ``_handle_completed`` ERROR-mapping path picks this up
@@ -504,9 +504,8 @@ async def _run_fixture_004(spec: Mapping[str, Any]) -> None:
     Driver verifies: the ``pick`` node's span ends ERROR with
     ``status_description == "routing_error"``, an ``exception``
     event recorded, and the ``openarmature.error.category``
-    attribute. No span for the edge function (no ``edge_spans``)
-    per §4.2's "edge logic folded into the preceding node span"
-    framing."""
+    attribute. No span for the edge function (no ``edge_spans``) —
+    edge logic is folded into the preceding node span."""
     from opentelemetry.trace import StatusCode
 
     from openarmature.graph import RuntimeGraphError
@@ -572,8 +571,8 @@ async def _run_fixture_004(spec: Mapping[str, Any]) -> None:
 
 
 async def _run_fixture_006(spec: Mapping[str, Any]) -> None:
-    """Spec §5.4 + proposal 0013 (v0.10.0): non-detached fan-out
-    instances synthesize per-instance dispatch spans nested between
+    """Non-detached fan-out instances synthesize per-instance dispatch
+    spans nested between
     the fan-out node span and the inner-node spans. The fan-out node
     span carries ``item_count`` / ``concurrency`` / ``error_policy``
     from ``NodeEvent.fan_out_config``; per-instance spans carry
@@ -826,7 +825,7 @@ def _classifier(exc: Exception, _state: Any, _transient: frozenset[str] = transi
 
 
 async def _run_fixture_011(spec: Mapping[str, Any]) -> None:
-    """Spec §8: deterministic span content is identical across two
+    """Deterministic span content is identical across two
     invocations of the same graph with the same input. The
     signature compared per-span:
     ``(name, status_code, parent_name, attrs ∖ ignored_set)``.
@@ -923,8 +922,8 @@ def _signature(
 
 
 async def _run_fixture_028(spec: Mapping[str, Any]) -> None:
-    """Proposal 0034 §3.4: caller-supplied metadata keys under
-    reserved namespaces (``openarmature.*``, ``gen_ai.*``) MUST
+    """Caller-supplied metadata keys under reserved namespaces
+    (``openarmature.*``, ``gen_ai.*``) MUST
     raise at the ``invoke()`` boundary before any work begins.
     The harness asserts:
 
@@ -1208,12 +1207,12 @@ async def _run_fixture_009_case(case: Mapping[str, Any]) -> None:
 async def _run_fixture_005(spec: Mapping[str, Any]) -> None:
     """Three sub-cases:
 
-    1. ``default`` — LLM span emits with §5 attributes, parented under
+    1. ``default`` — LLM span emits with its attributes, parented under
        the calling node.
     2. ``disable_llm_spans`` — opt-out suppresses the LLM span entirely.
     3. ``external_auto_instrumentation_active`` — second exporter on
        the OTel global provider; openarmature spans MUST NOT leak to
-       it (the load-bearing §6 TracerProvider isolation guarantee).
+       it (the load-bearing TracerProvider isolation guarantee).
     """
     cases = cast("list[dict[str, Any]]", spec["cases"])
     for case in cases:
@@ -1420,12 +1419,12 @@ def _resolve_target_for_005(case: Mapping[str, Any]) -> Any:
 
 
 async def _run_fixture_038(spec: Mapping[str, Any]) -> None:
-    """Single-case proposal-0044 fixture: a two-branch parallel-branches
-    graph where each branch's inner ``ask`` node makes an LLM call.
+    """A two-branch parallel-branches fixture where each branch's inner
+    ``ask`` node makes an LLM call.
 
     The OTel observer MUST synthesize a per-branch dispatch span between
     the parallel-branches NODE span and each branch's inner-node spans;
-    the §5.7 attribute surface (``branch_count`` + ``error_policy`` on
+    the attribute surface (``branch_count`` + ``error_policy`` on
     the NODE span, ``branch_name`` + ``parent_node_name`` on each
     dispatch span, ``branch_name`` on inner-branch leaf spans) MUST
     appear; per-branch dispatch spans MUST close before the NODE span
@@ -2069,9 +2068,8 @@ def _compile_subgraphs(spec: Mapping[str, Any]) -> dict[str, Any]:
 
 
 async def test_phase5_fixture_031_span_assertions() -> None:
-    """Spec §10.4 step 3 + step 4 + observability §3 / §5.6: every
-    span across BOTH the original and resumed runs MUST carry the
-    same ``openarmature.correlation_id``; ``invocation_id`` differs
+    """Every span across BOTH the original and resumed runs MUST carry
+    the same ``openarmature.correlation_id``; ``invocation_id`` differs
     across the two runs (each is its own invocation in the
     observability sense)."""
     fixture_path = _PIPELINE_CONFORMANCE_DIR / "031-checkpoint-correlation-id-preserved-across-resume.yaml"
@@ -2499,7 +2497,7 @@ def _build_observer_with_detached(detached_subgraphs: frozenset[str]) -> tuple[O
 
 
 async def _run_llm_payload_fixture(spec: Mapping[str, Any]) -> None:
-    """Generic driver for the ten v0.17.0 LLM-attribute fixtures.
+    """Generic driver for the ten LLM-attribute fixtures.
 
     Each fixture is single-case (GraphFixture shape) with a top-level
     ``cases:`` list of one entry; the case carries the graph + the
@@ -3323,7 +3321,7 @@ def _assert_node_completed_event_carries_error(
 ) -> None:
     """Failure-path assertion (fixture 053): the calling node's
     completed NodeEvent carries an error whose cause chain bottoms
-    out in an llm-provider §7 category matching the expectation.
+    out in an llm-provider category matching the expectation.
     The engine wraps the underlying ProviderUnavailable (etc.) in a
     NodeException; walk ``__cause__`` to reach the categorized cause.
     """
@@ -3363,8 +3361,8 @@ async def __call__(self, event: Any) -> None:
 
 
 async def _run_llm_cache_fixture(spec: Mapping[str, Any]) -> None:
-    """Run the proposal 0047 §5.5.3.1 cache-attribute fixtures (040,
-    041, 042). All three share the same simple-shape graph and assert
+    """Run the cache-attribute fixtures (040, 041, 042). All three
+    share the same simple-shape graph and assert
     on ``Response.usage`` cache fields plus the LLM provider span's
     ``openarmature.llm.cache_read.input_tokens`` /
     ``openarmature.llm.cache_creation.input_tokens`` attribute set.

From f2e7ef0e53d9ca3b4c464b9e52c705d362c125cb Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 11:03:19 -0700
Subject: [PATCH 10/15] Strip remaining spec ref from test_checkpoint docstring

---
 tests/conformance/test_checkpoint.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/conformance/test_checkpoint.py b/tests/conformance/test_checkpoint.py
index d90303f..142c524 100644
--- a/tests/conformance/test_checkpoint.py
+++ b/tests/conformance/test_checkpoint.py
@@ -8,7 +8,8 @@
 ``fan_out_progress`` matchers), ``expected.checkpoint_saves``,
 ``invariants``, and resume expectations (including per-instance
 ``instances_executed_during_resume`` / ``instances_skipped_during_resume``
-and per-instance attempt-count assertions from proposal 0009 fixtures).
+and per-instance attempt-count assertions from the per-instance resume
+fixtures).
 
 Fixture-by-fixture status:
 

From 1b3a76ec999de39997b3624d3269c67ff333bb0d Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 11:06:10 -0700
Subject: [PATCH 11/15] Strip spec refs from small unit test docstrings

---
 tests/integration/test_langfuse_sdk_adapter.py |  4 ++--
 tests/unit/test_compile_errors.py              |  4 ++--
 tests/unit/test_correlation.py                 | 18 +++++++++---------
 tests/unit/test_drain.py                       |  2 +-
 .../unit/test_failure_isolation_middleware.py  |  2 +-
 tests/unit/test_llm_provider.py                |  2 +-
 tests/unit/test_middleware.py                  | 18 +++++++++---------
 tests/unit/test_observability_metadata.py      |  6 +++---
 tests/unit/test_observer.py                    |  2 +-
 tests/unit/test_parallel_branches.py           |  2 +-
 tests/unit/test_projection.py                  |  4 ++--
 tests/unit/test_runtime_errors.py              | 12 ++++++------
 tests/unit/test_state_migration.py             | 12 ++++++------
 tests/unit/test_tool_choice.py                 | 11 +++++------
 14 files changed, 49 insertions(+), 50 deletions(-)

diff --git a/tests/integration/test_langfuse_sdk_adapter.py b/tests/integration/test_langfuse_sdk_adapter.py
index 972cda6..1b449f2 100644
--- a/tests/integration/test_langfuse_sdk_adapter.py
+++ b/tests/integration/test_langfuse_sdk_adapter.py
@@ -152,8 +152,8 @@ async def test_sdk_adapter_generation_timestamps_round_trip_through_langfuse() -
     reading the projected timestamps back from the REST API and
     asserting they reflect the back-dated values.
 
-    Catches the failure mode the v0.13.0 Langfuse migration is
-    susceptible to: if a future SDK release renames _otel_tracer,
+    Catches the failure mode the Langfuse migration is susceptible to:
+    if a future SDK release renames _otel_tracer,
     moves LangfuseGeneration, or otherwise breaks the private-API
     surface the adapter relies on, the back-dating routing fails
     silently — the Langfuse UI shows call-time timestamps instead
diff --git a/tests/unit/test_compile_errors.py b/tests/unit/test_compile_errors.py
index cf6b987..2c455e9 100644
--- a/tests/unit/test_compile_errors.py
+++ b/tests/unit/test_compile_errors.py
@@ -52,8 +52,8 @@ def test_duplicate_subgraph_name_raises_value_error() -> None:
 
 
 def test_compile_validates_subgraph_projection_mapping() -> None:
-    """Per spec v0.2.0 §2: compilation MUST fail when a subgraph-as-node
-    mapping references a field not declared in the relevant state schema."""
+    """Compilation MUST fail when a subgraph-as-node mapping references
+    a field not declared in the relevant state schema."""
 
     inner = GraphBuilder(ChildS).add_node("i", _noop).add_edge("i", END).set_entry("i").compile()
 
diff --git a/tests/unit/test_correlation.py b/tests/unit/test_correlation.py
index e0edd5a..6298c00 100644
--- a/tests/unit/test_correlation.py
+++ b/tests/unit/test_correlation.py
@@ -1,6 +1,6 @@
 """Cross-backend correlation primitives — no OTel deps.
 
-Verifies the spec observability §3 contract independently of any
+Verifies the cross-backend correlation contract independently of any
 backend mapping. Lives in the unit test root rather than under any
 backend-specific directory because correlation_id is core: it MUST be
 readable from any user code (node bodies, middleware, observers) even
@@ -42,8 +42,8 @@ async def test_caller_supplied_correlation_id_visible_inside_node() -> None:
 
 
 async def test_auto_generated_correlation_id_is_uuidv4() -> None:
-    """Per spec §3.1, when the caller does not supply a correlation_id
-    the framework MUST auto-generate a canonical 36-character UUIDv4."""
+    """When the caller does not supply a correlation_id the framework
+    MUST auto-generate a canonical 36-character UUIDv4."""
     g = GraphBuilder(_S).add_node("read", _read_correlation).add_edge("read", END).set_entry("read").compile()
     final = await g.invoke(_S())  # no caller correlation_id
     cid = final.captured
@@ -56,8 +56,8 @@ async def test_auto_generated_correlation_id_is_uuidv4() -> None:
 
 
 async def test_correlation_id_resets_between_invocations() -> None:
-    """Spec §3.1: ``Reset the context after the invocation completes
-    so subsequent invocations get fresh correlation IDs.``"""
+    """The context resets after the invocation completes so subsequent
+    invocations get fresh correlation IDs."""
     # Outside any invocation, correlation_id is None.
     assert current_correlation_id() is None
     g = GraphBuilder(_S).add_node("read", _read_correlation).add_edge("read", END).set_entry("read").compile()
@@ -89,8 +89,8 @@ async def test_correlation_id_isolated_across_concurrent_invocations() -> None:
 
 
 async def test_correlation_id_and_invocation_id_are_structurally_distinct() -> None:
-    """Spec observability §3.2: ``correlation_id`` and
-    ``invocation_id`` serve different purposes and MUST NOT be
+    """``correlation_id`` and ``invocation_id`` serve different
+    purposes and MUST NOT be
     conflated. Drive a real invocation with a checkpointer and read
     both ids from the saved record (deterministic) — plus an in-body
     cross-check via the public ContextVar readers — to verify the
@@ -155,8 +155,8 @@ def test_current_correlation_id_returns_none_outside_invocation() -> None:
 
 
 async def test_resume_preserves_correlation_id_visible_to_user_code() -> None:
-    """Spec §10.4 step 3: resume MUST preserve the original
-    correlation_id verbatim. The Phase 5 checkpoint test verifies
+    """Resume MUST preserve the original correlation_id verbatim. The
+    Phase 5 checkpoint test verifies
     this at the saved-record level; here we additionally verify it
     propagates to the ContextVar that user code reads from inside
     node bodies during the resumed invocation."""
diff --git a/tests/unit/test_drain.py b/tests/unit/test_drain.py
index 27498f5..e1b3fe2 100644
--- a/tests/unit/test_drain.py
+++ b/tests/unit/test_drain.py
@@ -6,7 +6,7 @@
 
 """Unit tests for `CompiledGraph.drain(timeout=...)` + `DrainSummary`.
 
-Per spec graph-engine §6 (amended by proposal 0010): drain accepts an
+Drain accepts an
 optional timeout, returns a `DrainSummary` with at least
 `undelivered_count` + `timeout_reached`, MUST cancel workers cleanly
 so the graph remains usable for subsequent invocations.
diff --git a/tests/unit/test_failure_isolation_middleware.py b/tests/unit/test_failure_isolation_middleware.py
index 10b84a4..1acbcd0 100644
--- a/tests/unit/test_failure_isolation_middleware.py
+++ b/tests/unit/test_failure_isolation_middleware.py
@@ -1,4 +1,4 @@
-"""Unit + integration tests for FailureIsolationMiddleware (proposal 0050 §6.3).
+"""Unit + integration tests for FailureIsolationMiddleware.
 
 Covers the middleware's catch / degrade / predicate / on_caught
 contract, the framework-emitted ``FailureIsolatedEvent`` and its field
diff --git a/tests/unit/test_llm_provider.py b/tests/unit/test_llm_provider.py
index 11f330a..13f6e3d 100644
--- a/tests/unit/test_llm_provider.py
+++ b/tests/unit/test_llm_provider.py
@@ -522,7 +522,7 @@ def test_usage_negative_token_count_rejected_at_construction() -> None:
 async def test_complete_negative_usage_surfaces_as_invalid_response() -> None:
     """A wire response carrying a negative token count MUST surface as
     ``provider_invalid_response`` rather than silently passing through —
-    spec §6 token counts are non-negative integers."""
+    token counts are non-negative integers."""
 
     def _bad(_req: httpx.Request) -> httpx.Response:
         return httpx.Response(
diff --git a/tests/unit/test_middleware.py b/tests/unit/test_middleware.py
index efc9b33..ccb587a 100644
--- a/tests/unit/test_middleware.py
+++ b/tests/unit/test_middleware.py
@@ -239,8 +239,8 @@ def test_retry_middleware_rejects_non_config() -> None:
 
 
 async def test_error_recovery_via_catch_and_return_partial() -> None:
-    """Per spec §5: middleware MAY catch an exception and return a
-    partial update. The chain returns successfully — no exception
+    """Middleware MAY catch an exception and return a partial update.
+    The chain returns successfully — no exception
     reaches the engine."""
     inner_called = [0]
 
@@ -323,7 +323,7 @@ async def innermost(_state: Any) -> Mapping[str, Any]:
 
 
 async def test_middleware_can_call_next_repeatedly() -> None:
-    """Per spec §2: a middleware MAY call ``next`` more than once. Retry
+    """A middleware MAY call ``next`` more than once. Retry
     exercises this with N=2-3 attempts; this test pins the contract
     independently by calling ``next`` 5 times in a loop and asserting the
     inner runs exactly that many times."""
@@ -358,8 +358,8 @@ async def test_timing_callback_failure_replaces_original_exception() -> None:
     exception chaining preserves the original on ``__context__``, but the
     active exception observers see is the callback's.
 
-    Spec §6.2 says callbacks SHOULD be fast and infallible — this test
-    documents what happens if a user violates that, so a future change
+    Callbacks SHOULD be fast and infallible — this test documents what
+    happens if a user violates that, so a future change
     that wants to preserve the original (e.g., via explicit ``raise exc
     from cb_exc``) doesn't silently regress this contract.
     """
@@ -394,8 +394,8 @@ class InnerState(State):
 
 
 async def test_parent_middleware_does_not_wrap_subgraph_internal_nodes() -> None:
-    """Per spec §4: parent's middleware wraps the SubgraphNode dispatch
-    but NOT the subgraph's internal nodes. The subgraph's own middleware
+    """Parent's middleware wraps the SubgraphNode dispatch but NOT the
+    subgraph's internal nodes. The subgraph's own middleware
     is the only thing wrapping its inner nodes."""
     parent_calls: list[str] = []
     sub_calls: list[str] = []
@@ -462,8 +462,8 @@ def test_default_classifier_recognizes_transient_via_direct_category() -> None:
 
 
 def test_default_classifier_walks_cause_for_node_exception_wrappers() -> None:
-    """Per spec §6.1: a node_exception whose ``__cause__`` is a transient
-    category MUST be classified as transient."""
+    """A node_exception whose ``__cause__`` is a transient category MUST
+    be classified as transient."""
     raw = _CategorizedTransient()
     wrapper = RuntimeError("wrapped")
     wrapper.__cause__ = raw
diff --git a/tests/unit/test_observability_metadata.py b/tests/unit/test_observability_metadata.py
index cc07236..ba5825e 100644
--- a/tests/unit/test_observability_metadata.py
+++ b/tests/unit/test_observability_metadata.py
@@ -1,6 +1,6 @@
-"""Unit tests for the caller-supplied invocation surface: metadata
-(proposal 0034), the caller-supplied invocation_id (proposal 0039),
-and the reserved exact-key-name rejection (proposal 0041).
+"""Unit tests for the caller-supplied invocation surface: metadata,
+the caller-supplied invocation_id, and the reserved exact-key-name
+rejection.
 
 These tests pin the validation rules, the ContextVar lifecycle, the
 mid-invocation augmentation helper, and the per-async-context COW
diff --git a/tests/unit/test_observer.py b/tests/unit/test_observer.py
index 99bdc05..b235c0b 100644
--- a/tests/unit/test_observer.py
+++ b/tests/unit/test_observer.py
@@ -1,6 +1,6 @@
 """Unit tests for the observer delivery queue mechanics.
 
-Per spec v0.6.0 §6: delivery is strictly serial, ordered, isolates
+Delivery is strictly serial, ordered, isolates
 observer exceptions, and filters by per-observer phase subscription.
 These tests exercise the queue/worker pair in isolation — no graph
 engine — so behavior bugs surface here rather than inside fixture
diff --git a/tests/unit/test_parallel_branches.py b/tests/unit/test_parallel_branches.py
index f8f268d..507188e 100644
--- a/tests/unit/test_parallel_branches.py
+++ b/tests/unit/test_parallel_branches.py
@@ -1,4 +1,4 @@
-"""Unit tests for the parallel-branches runtime (pipeline-utilities §11).
+"""Unit tests for the parallel-branches runtime.
 
 Covers spec corner cases the conformance fixtures exercise only
 implicitly:
diff --git a/tests/unit/test_projection.py b/tests/unit/test_projection.py
index c3f03ad..bf81e32 100644
--- a/tests/unit/test_projection.py
+++ b/tests/unit/test_projection.py
@@ -29,8 +29,8 @@ class ChildNoOverlap(State):
 
 
 def test_project_in_returns_subgraph_defaults() -> None:
-    """Per spec v0.2.0 §2: default projection-in is no projection — subgraph
-    starts from its own field defaults regardless of parent state."""
+    """Default projection-in is no projection — subgraph starts from
+    its own field defaults regardless of parent state."""
 
     proj = FieldNameMatching[Parent, ChildOverlap]()
     sub = proj.project_in(Parent(shared="ignored"), ChildOverlap)
diff --git a/tests/unit/test_runtime_errors.py b/tests/unit/test_runtime_errors.py
index 51d7629..d1d6810 100644
--- a/tests/unit/test_runtime_errors.py
+++ b/tests/unit/test_runtime_errors.py
@@ -1,6 +1,6 @@
 """Runtime-error categories not exercised by the conformance suite.
 
-Spec §4 defines five runtime categories. The conformance fixtures cover
+The spec defines five runtime categories. The conformance fixtures cover
 `node_exception` (009) and `routing_error` (008) directly and reach the
 others incidentally via 001–006. These tests target the three categories no
 fixture triggers: `edge_exception`, `reducer_error`, and
@@ -101,7 +101,7 @@ async def node_a(_state: Any) -> dict[str, Any]:
 
 async def test_subgraph_projection_error_wrapped_as_node_exception() -> None:
     """Errors from a subgraph's projection (project_in / project_out) are
-    NOT spec §4 categories on their own. The engine wraps them as
+    NOT runtime-error categories on their own. The engine wraps them as
     NodeException tagged with the subgraph wrapper's name so callers see
     a uniform error contract."""
 
@@ -158,8 +158,8 @@ def project_out(
 
 
 async def test_routing_error_lands_on_preceding_node_completed_event() -> None:
-    """Per §3 step 3 (revised) + §6 (revised): a `routing_error` from a
-    conditional edge that returns an undeclared target lands on the
+    """A `routing_error` from a conditional edge that returns an
+    undeclared target lands on the
     preceding node's `completed` event with `error` populated, NOT in a
     separate event pair. The downstream node never fires events."""
     from openarmature.graph import (
@@ -218,8 +218,8 @@ def routing_to_nowhere(_state: Any) -> str | EndSentinel:
 
 
 async def test_edge_exception_lands_on_preceding_node_completed_event() -> None:
-    """Per §3 step 3 (revised) + §6 (revised): an `edge_exception` from a
-    conditional edge function raising lands on the preceding node's
+    """An `edge_exception` from a conditional edge function raising
+    lands on the preceding node's
     `completed` event with `error` populated, NOT in a separate event
     pair. The downstream node never fires events."""
     from openarmature.graph import (
diff --git a/tests/unit/test_state_migration.py b/tests/unit/test_state_migration.py
index efd100e..2d8ad6f 100644
--- a/tests/unit/test_state_migration.py
+++ b/tests/unit/test_state_migration.py
@@ -46,8 +46,8 @@ def test_registry_lists_registered_in_order() -> None:
 
 
 def test_registry_rejects_empty_to_version() -> None:
-    """Per spec §10.2 / proposal 0014, empty to_version routes the
-    chain TO the "not declared" sentinel — incoherent. Registration
+    """Empty to_version routes the chain TO the "not declared"
+    sentinel — incoherent. Registration
     MUST reject it. Empty from_version stays valid (documented
     bridging path for pre-declaration records)."""
     registry = MigrationRegistry()
@@ -142,9 +142,9 @@ def test_resolve_chain_picks_shortest_when_unique() -> None:
 
 
 def test_resolve_chain_ambiguous_shortest_paths_raises() -> None:
-    """Diamond with two distinct same-length paths is ambiguous per
-    spec §10.10 / §10.12.2 (proposal 0018). ``resolve_chain``
-    raises the canonical ``CheckpointStateMigrationChainAmbiguous``
+    """Diamond with two distinct same-length paths is ambiguous.
+    ``resolve_chain`` raises the canonical
+    ``CheckpointStateMigrationChainAmbiguous``
     directly — no boundary wrap needed at the resume site; the
     registry's exception contract is one type regardless of when
     ambiguity surfaces (register vs resolve)."""
@@ -160,7 +160,7 @@ def test_resolve_chain_ambiguous_shortest_paths_raises() -> None:
 
 
 def test_chain_ambiguous_category_string() -> None:
-    """The canonical category string per spec §10.10 (proposal 0018)."""
+    """The canonical category string."""
     exc = CheckpointStateMigrationChainAmbiguous("boom")
     assert exc.category == "checkpoint_state_migration_chain_ambiguous"
 
diff --git a/tests/unit/test_tool_choice.py b/tests/unit/test_tool_choice.py
index cad3e8c..61fd426 100644
--- a/tests/unit/test_tool_choice.py
+++ b/tests/unit/test_tool_choice.py
@@ -5,12 +5,11 @@
 
 """Unit tests for `tool_choice` validation and wire mapping.
 
-Per spec llm-provider §5 (amended by proposal 0025): `tool_choice`
-is one of `"auto"`, `"required"`, `"none"`, or a `ForceTool` record;
-violations of the three pre-send validation rules (required-with-
-empty-tools, force-specific-with-empty-tools, force-specific-with-
-name-not-in-tools) raise `ProviderInvalidRequest` (§7's existing
-category — no new error category per the proposal's framing).
+`tool_choice` is one of `"auto"`, `"required"`, `"none"`, or a
+`ForceTool` record; violations of the three pre-send validation rules
+(required-with-empty-tools, force-specific-with-empty-tools, force-
+specific-with-name-not-in-tools) raise `ProviderInvalidRequest`
+(an existing category, not a new one).
 """
 
 from __future__ import annotations

From 5e57eaab6d4fc8d3aeb59e09918ccf3e682aadc6 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 13:45:46 -0700
Subject: [PATCH 12/15] Strip spec refs from checkpoint/fan-out unit docstrings

---
 tests/unit/test_checkpoint.py | 45 +++++++++++++++++------------------
 tests/unit/test_fan_out.py    | 38 ++++++++++++++---------------
 2 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/tests/unit/test_checkpoint.py b/tests/unit/test_checkpoint.py
index e4defed..91b58d8 100644
--- a/tests/unit/test_checkpoint.py
+++ b/tests/unit/test_checkpoint.py
@@ -5,7 +5,7 @@
 fixtures. These unit tests fill gaps the conformance suite doesn't
 exercise directly: backend round-trip + durability, the canonical
 category-string contract, schema_version mismatch handling, the
-fan-out save gate, the §10.1.1 default-off behavior, and the
+fan-out save gate, the default-off behavior, and the
 subgraph-resume parent_states preservation that fixture 029 covers
 in conformance but is awaiting spec namespace clarification (see
 test_checkpoint.py's _DEFERRED_FIXTURES note).
@@ -181,7 +181,7 @@ async def test_sqlite_pickle_round_trip(tmp_path: Path) -> None:
 
 
 async def test_sqlite_json_round_trip_with_pydantic_state(tmp_path: Path) -> None:
-    """Spec §10.11: JSON mode accepts Pydantic State instances. The
+    """JSON mode accepts Pydantic State instances. The
     backend's encoder MUST walk the value tree converting BaseModel
     instances via ``model_dump(mode="json")`` before ``json.dumps`` —
     otherwise live State instances handed in by the engine raise
@@ -240,7 +240,7 @@ async def test_sqlite_durability_across_reopen(tmp_path: Path) -> None:
 
 
 async def test_sqlite_upsert_retention(tmp_path: Path) -> None:
-    """Spec §10.3.1: upsert — one row per invocation_id, overwritten on
+    """Upsert — one row per invocation_id, overwritten on
     every save. After two saves with the same id, only the second
     record is retrievable."""
     cp = SQLiteCheckpointer(tmp_path / "ck.db")
@@ -272,7 +272,7 @@ async def test_sqlite_upsert_retention(tmp_path: Path) -> None:
 async def test_sqlite_serialization_mismatch_raises(tmp_path: Path) -> None:
     """A record written with serialization=pickle MUST NOT be loadable
     by a checkpointer constructed with serialization=json (and vice
-    versa). The mismatch raises CheckpointRecordInvalid per §10.10."""
+    versa). The mismatch raises CheckpointRecordInvalid."""
     db_path = tmp_path / "ck.db"
     cp_pickle = SQLiteCheckpointer(db_path, serialization="pickle")
     record = CheckpointRecord(
@@ -314,8 +314,8 @@ async def test_schema_version_round_trips(tmp_path: Path) -> None:
 
 
 async def test_schema_version_round_trips_through_sqlite_unchanged(tmp_path: Path) -> None:
-    """Per spec §10.12 (proposal 0014), the SQLite backend no longer
-    rejects records with non-default ``schema_version`` values — that
+    """The SQLite backend no longer rejects records with non-default
+    ``schema_version`` values — that
     routing is now an engine concern at resume time. The backend
     just round-trips the version identifier as opaque data so the
     engine's migration registry has the chance to bridge it."""
@@ -368,8 +368,8 @@ def _build_simple_graph(checkpointer: Checkpointer | None = None) -> CompiledGra
 
 
 async def test_no_checkpointer_means_no_saves() -> None:
-    """§10.1.1: without a registered Checkpointer the engine never
-    calls ``save()`` — no record is produced."""
+    """Without a registered Checkpointer the engine never calls
+    ``save()`` — no record is produced."""
     compiled = _build_simple_graph(None)
     final = await compiled.invoke(_SimpleState())
     assert final.a == 1
@@ -377,8 +377,8 @@ async def test_no_checkpointer_means_no_saves() -> None:
 
 
 async def test_no_checkpointer_resume_raises_not_found() -> None:
-    """§10.1.1: ``invoke(resume_invocation=X)`` against an unregistered
-    backend raises checkpoint_not_found — the user has misconfigured
+    """``invoke(resume_invocation=X)`` against an unregistered backend
+    raises checkpoint_not_found — the user has misconfigured
     the run."""
     compiled = _build_simple_graph(None)
     with pytest.raises(CheckpointNotFound):
@@ -386,8 +386,7 @@ async def test_no_checkpointer_resume_raises_not_found() -> None:
 
 
 async def test_resume_against_empty_checkpointer_raises_not_found() -> None:
-    """§10.10: load() returning None surfaces as
-    checkpoint_not_found."""
+    """load() returning None surfaces as checkpoint_not_found."""
     cp = InMemoryCheckpointer()
     compiled = _build_simple_graph(cp)
     with pytest.raises(CheckpointNotFound):
@@ -395,8 +394,8 @@ async def test_resume_against_empty_checkpointer_raises_not_found() -> None:
 
 
 async def test_resume_with_invalid_saved_state_raises_record_invalid() -> None:
-    """§10.10: a saved record whose state-shape doesn't validate
-    against the current graph's state class MUST surface as
+    """A saved record whose state-shape doesn't validate against the
+    current graph's state class MUST surface as
     ``checkpoint_record_invalid``, not a raw pydantic ValidationError.
     Models the JSON-serialized backend path: the load returns a
     dict that the engine re-validates against ``state_cls``; an
@@ -500,8 +499,8 @@ async def _scorer(s: _ItemState) -> dict[str, int]:
 
 
 async def test_fan_out_internal_saves_fire_per_instance() -> None:
-    """Per spec §10.3 (revised by proposal 0009 / v0.18.0): fan-out
-    instance internal nodes DO produce saves. Each per-instance
+    """Fan-out instance internal nodes DO produce saves. Each
+    per-instance
     completion emits at least one save with ``fan_out_index``
     populated on the inner-node position, plus an explicit "instance
     completed" save that flips the instance's ``fan_out_progress``
@@ -592,7 +591,7 @@ async def _failing_scorer(s: _FailingItemState) -> dict[str, int]:
 
 
 async def test_fail_fast_cancellation_leaves_failed_instance_in_flight() -> None:
-    """Per §10.11.2 fail_fast cancellation contract: the failed
+    """Per the fail_fast cancellation contract: the failed
     instance's ``fan_out_progress`` state on the saved record is
     ``in_flight`` (no ``result`` recorded), and cancelled siblings
     are also ``in_flight`` or ``not_started`` — never ``completed``
@@ -678,8 +677,8 @@ async def _nested_schema_scorer(s: _NestedSchemaInnerState) -> dict[str, int]:
 
 
 async def test_nested_fan_out_records_outermost_schema_version() -> None:
-    """Per spec §10.2: a ``CheckpointRecord``'s ``schema_version`` is the
-    outermost graph state's declared version (the record represents the
+    """A ``CheckpointRecord``'s ``schema_version`` is the outermost
+    graph state's declared version (the record represents the
     whole invocation tree). For a fan-out inside a subgraph, the
     engine's ``_save_instance_completed`` / ``_save_instance_in_flight``
     helpers read from the outermost state via
@@ -766,8 +765,8 @@ async def _inner_step(_s: _InnerState) -> dict[str, Any]:
 
 
 async def test_inner_node_save_carries_parent_states() -> None:
-    """Spec §10.2: a save from inside a subgraph populates
-    ``parent_states`` with the chain of containing-graph states.
+    """A save from inside a subgraph populates ``parent_states`` with
+    the chain of containing-graph states.
     This is the contract that fixture 029 verifies in conformance —
     here we isolate the parent_states logic without depending on
     the namespace-convention question."""
@@ -824,8 +823,8 @@ async def _flaky_node(_s: _SimpleState) -> dict[str, int]:
 
 
 async def test_resume_preserves_correlation_id_and_mints_new_invocation_id() -> None:
-    """Spec §10.4 steps 3+4: resume MUST keep the original
-    correlation_id verbatim (cross-backend join key) AND mint a new
+    """Resume MUST keep the original correlation_id verbatim
+    (cross-backend join key) AND mint a new
     invocation_id (each attempt is its own invocation)."""
     _first_run_should_fail[0] = True
     cp = InMemoryCheckpointer()
diff --git a/tests/unit/test_fan_out.py b/tests/unit/test_fan_out.py
index bff1688..18cc13d 100644
--- a/tests/unit/test_fan_out.py
+++ b/tests/unit/test_fan_out.py
@@ -1,4 +1,4 @@
-"""Unit tests for the fan-out runtime (pipeline-utilities §9).
+"""Unit tests for the fan-out runtime.
 
 Covers the spec-corner cases the conformance fixtures exercise only
 implicitly:
@@ -97,7 +97,7 @@ class ItemsParentState(State):
 
 async def test_items_field_projection_doubles_each() -> None:
     """Each instance receives one item; collected results preserve input
-    order (per §9.3 / §9.4)."""
+    order."""
     inner = _build_doubler()
     builder: GraphBuilder[ItemsParentState] = GraphBuilder(ItemsParentState)
     builder.set_entry("process")
@@ -167,7 +167,7 @@ async def test_count_mode_state_reading_callable() -> None:
 
 
 async def test_count_callable_resolved_exactly_once_at_entry() -> None:
-    """Per §9.2: count callable is invoked exactly once at fan-out entry.
+    """The count callable is invoked exactly once at fan-out entry.
     A callable with side effects (counter increment) MUST be observed to
     run exactly once."""
     inner = _build_constant_one()
@@ -195,7 +195,7 @@ def counting_count(s: CountParentState) -> int:
 
 
 async def test_concurrency_callable_resolved_exactly_once_at_entry() -> None:
-    """Per §9.2: concurrency callable, like count, is invoked exactly
+    """The concurrency callable, like count, is invoked exactly
     once at fan-out entry — even with many instances (which would
     otherwise be a natural place to call it per-instance by mistake)."""
 
@@ -242,8 +242,8 @@ class InputsParentState(State):
 
 
 async def test_inputs_mapping_projects_parent_fields() -> None:
-    """Per §9.1: ``inputs`` maps parent fields onto the per-instance
-    subgraph state at entry, alongside item_field."""
+    """``inputs`` maps parent fields onto the per-instance subgraph
+    state at entry, alongside item_field."""
 
     async def compute(state: WorkerState) -> Mapping[str, Any]:
         return {"result": state.item + state.extra}
@@ -332,7 +332,7 @@ class FailFastParentState(State):
 
 
 async def test_fail_fast_propagates_first_failure_with_parent_recoverable_state() -> None:
-    """Per §9.5: the first failure raises through the fan-out as a
+    """The first failure raises through the fan-out as a
     NodeException whose recoverable_state is the parent's pre-fan-out
     snapshot, NOT the inner instance's state."""
 
@@ -424,7 +424,7 @@ class CollectParentState(State):
 
 
 async def test_collect_records_per_instance_errors() -> None:
-    """Per §9.5: collect mode runs all instances to completion; failures
+    """Collect mode runs all instances to completion; failures
     are recorded in errors_field; successes contribute to target_field."""
 
     async def maybe_fail(state: WorkerState) -> Mapping[str, Any]:
@@ -474,7 +474,7 @@ class EmptyParentState(State):
 
 
 async def test_on_empty_raise_default_raises_fan_out_empty() -> None:
-    """Per §9.1: empty fan-out with on_empty='raise' (default) raises
+    """Empty fan-out with on_empty='raise' (default) raises
     a NodeException tagged with fan_out_category='fan_out_empty'."""
     inner = _build_doubler()
     builder: GraphBuilder[EmptyParentState] = GraphBuilder(EmptyParentState)
@@ -497,7 +497,7 @@ async def test_on_empty_raise_default_raises_fan_out_empty() -> None:
 
 
 async def test_on_empty_noop_writes_count_field_zero() -> None:
-    """Per §9.1: on_empty='noop' produces a clean no-op; count_field
+    """on_empty='noop' produces a clean no-op; count_field
     captures the resolved 0."""
     inner = _build_doubler()
     builder: GraphBuilder[EmptyParentState] = GraphBuilder(EmptyParentState)
@@ -533,8 +533,8 @@ class CountFieldParentState(State):
 
 
 async def test_count_field_records_actual_count_on_success() -> None:
-    """Per §9 Configuration: count_field is written with the resolved
-    instance count after fan-in, regardless of whether on_empty fires."""
+    """count_field is written with the resolved instance count after
+    fan-in, regardless of whether on_empty fires."""
     inner = _build_doubler()
     builder: GraphBuilder[CountFieldParentState] = GraphBuilder(CountFieldParentState)
     builder.set_entry("process")
@@ -567,7 +567,7 @@ class ExtraOutputsParentState(State):
 
 
 async def test_extra_outputs_merges_additional_per_instance_fields() -> None:
-    """Per §9.3: extra_outputs collects additional non-collected fields
+    """extra_outputs collects additional non-collected fields
     from each instance and merges them via the parent's reducer."""
 
     async def compute(state: WorkerState) -> Mapping[str, Any]:
@@ -699,7 +699,7 @@ async def slow(state: WorkerState) -> Mapping[str, Any]:
 
 
 async def test_fan_in_preserves_input_order_under_random_completion_timing() -> None:
-    """Per §9.4: target_field is in instance-index order, NOT completion
+    """target_field is in instance-index order, NOT completion
     order. Run the same fan-out N times with different random sleep
     seeds; every run produces the same result list."""
     expected = list(range(20))
@@ -720,7 +720,7 @@ class _CompileTestState(State):
 
 
 def test_compile_error_count_mode_ambiguous_when_both_specified() -> None:
-    """Per spec §9: specifying both items_field AND count is a compile
+    """Specifying both items_field AND count is a compile
     error with category fan_out_count_mode_ambiguous."""
     inner = _build_doubler()
     builder: GraphBuilder[_CompileTestState] = GraphBuilder(_CompileTestState)
@@ -750,8 +750,8 @@ def test_compile_error_count_mode_ambiguous_when_neither_specified() -> None:
 
 
 def test_compile_error_field_not_list() -> None:
-    """Per spec §9: items_field must reference a list-typed parent
-    field. A non-list type is a compile error with category
+    """items_field must reference a list-typed parent field. A non-list
+    type is a compile error with category
     fan_out_field_not_list."""
     inner = _build_doubler()
     builder: GraphBuilder[_CompileTestState] = GraphBuilder(_CompileTestState)
@@ -767,8 +767,8 @@ def test_compile_error_field_not_list() -> None:
 
 
 def test_compile_error_inputs_references_undeclared_parent_field() -> None:
-    """Per spec §9: ``inputs`` mapping entries MUST refer to declared
-    fields on both sides. An undeclared parent field raises
+    """``inputs`` mapping entries MUST refer to declared fields on both
+    sides. An undeclared parent field raises
     ``mapping_references_undeclared_field`` at registration time."""
     from openarmature.graph import MappingReferencesUndeclaredField
 

From da20870f9042ad79f0658cfb55a34d127bd209e6 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 13:47:20 -0700
Subject: [PATCH 13/15] Strip spec refs from otel observer unit docstrings

---
 tests/unit/test_observability_otel.py | 61 +++++++++++++--------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/tests/unit/test_observability_otel.py b/tests/unit/test_observability_otel.py
index 2fe2b0e..c569527 100644
--- a/tests/unit/test_observability_otel.py
+++ b/tests/unit/test_observability_otel.py
@@ -7,16 +7,16 @@
 
 These tests fill the gaps the conformance harness defers:
 
-- §6 TracerProvider isolation — the load-bearing "spans don't leak
+- TracerProvider isolation — the load-bearing "spans don't leak
   into the OTel global provider" guarantee.
-- §5 attribute population on every span type.
-- §4.2 status mapping for every §4 error category.
-- §5.5 LLM provider span via the ContextVar dispatch hook (queue-
+- attribute population on every span type.
+- status mapping for every error category.
+- LLM provider span via the ContextVar dispatch hook (queue-
   mediated; no synchronous direct dispatch).
-- §4.4 detached trace mode key separation in the span stack.
-- §10.8 checkpoint_saved → ``openarmature.checkpoint.save`` zero-
+- detached trace mode key separation in the span stack.
+- checkpoint_saved → ``openarmature.checkpoint.save`` zero-
   duration span.
-- §7 log bridge filter + correlation_id injection.
+- log bridge filter + correlation_id injection.
 """
 
 from __future__ import annotations
@@ -121,7 +121,7 @@ def _reset_otel_global_tracer_provider(restore_to: object) -> None:
 
 
 async def test_observer_uses_private_provider_not_global() -> None:
-    """Spec §6 TracerProvider isolation: the OTelObserver MUST use a
+    """TracerProvider isolation: the OTelObserver MUST use a
     PRIVATE TracerProvider; spans MUST NOT appear on the OTel global
     provider's exporter (this is the load-bearing guarantee against
     duplicate spans from external auto-instrumentation libraries)."""
@@ -159,8 +159,8 @@ async def test_observer_uses_private_provider_not_global() -> None:
 
 
 async def test_node_span_carries_required_attributes() -> None:
-    """Spec §5.2: every node span MUST carry the four
-    ``openarmature.node.*`` base attributes."""
+    """Every node span MUST carry the four ``openarmature.node.*``
+    base attributes."""
     g, exporter = _build_linear_graph()
     await g.invoke(_LinearState(), correlation_id="test-cid")  # type: ignore[attr-defined]
     await g.drain()  # type: ignore[attr-defined]
@@ -178,8 +178,8 @@ async def test_node_span_carries_required_attributes() -> None:
 
 
 async def test_invocation_span_carries_required_attributes() -> None:
-    """Spec §5.1: invocation span MUST carry
-    ``openarmature.graph.entry_node`` + ``openarmature.graph.spec_version``."""
+    """Invocation span MUST carry ``openarmature.graph.entry_node`` +
+    ``openarmature.graph.spec_version``."""
     exporter = InMemorySpanExporter()
     observer = OTelObserver(span_processor=SimpleSpanProcessor(exporter))
     g, _ = _build_linear_graph(observer)
@@ -307,7 +307,7 @@ async def _failing_node(_s: _FailState) -> dict[str, int]:
 
 
 async def test_failing_node_span_carries_error_status() -> None:
-    """Spec §4.2: a node-exception failure produces a span with
+    """A node-exception failure produces a span with
     ERROR status, an exception event recorded, and the
     ``openarmature.error.category`` attribute on the span."""
     from opentelemetry.trace import StatusCode
@@ -340,8 +340,7 @@ async def test_failing_node_span_carries_error_status() -> None:
 
 
 async def test_checkpoint_migrate_emits_span_with_chain_metadata(tmp_path: Path) -> None:
-    """Spec §6 cross-ref in proposal 0014: a versioned resume whose
-    migration chain runs SHOULD emit an
+    """A versioned resume whose migration chain runs SHOULD emit an
     ``openarmature.checkpoint.migrate`` span carrying
     ``from_version`` / ``to_version`` (final) / ``chain_length``."""
     from openarmature.checkpoint import (
@@ -405,8 +404,8 @@ async def _noop(_s: _MigState) -> dict[str, int]:
 
 
 async def test_checkpoint_migrate_span_absent_on_version_match(tmp_path: Path) -> None:
-    """Spec §10.12.3 fast path: when the saved record's schema_version
-    equals the current state class's schema_version, the migration
+    """Fast path: when the saved record's schema_version equals the
+    current state class's schema_version, the migration
     registry is NOT consulted. The OTel observer MUST NOT emit a
     ``openarmature.checkpoint.migrate`` span in that case."""
     from openarmature.checkpoint import CheckpointRecord, SQLiteCheckpointer
@@ -454,8 +453,8 @@ async def _noop(_s: _MatchState) -> dict[str, int]:
 
 
 async def test_checkpoint_save_emits_zero_duration_span() -> None:
-    """Spec §10.8: a checkpoint save SHOULD emit a §6-style observer
-    event surfaced as a span. Our implementation emits a
+    """A checkpoint save SHOULD emit an observer event surfaced as a
+    span. Our implementation emits a
     ``openarmature.checkpoint.save`` span on every save."""
     cp = InMemoryCheckpointer()
     exporter = InMemorySpanExporter()
@@ -493,8 +492,8 @@ async def test_checkpoint_save_emits_zero_duration_span() -> None:
 
 
 async def test_active_prompt_propagates_to_llm_span_attributes() -> None:
-    """Spec prompt-management §11: when an LLM call fires inside a
-    ``with_active_prompt`` context, the OTel observer MUST surface
+    """When an LLM call fires inside a ``with_active_prompt`` context,
+    the OTel observer MUST surface
     ``openarmature.prompt.*`` attributes on the LLM-call span.
     ``with_active_prompt_group`` adds ``openarmature.prompt.group_name``."""
     from datetime import UTC, datetime
@@ -660,8 +659,8 @@ async def test_llm_span_emits_cache_creation_attribute_when_payload_carries_it()
 
 
 async def test_disable_llm_spans_skips_llm_provider_span() -> None:
-    """Spec §5.5: ``disable_llm_spans=True`` MUST suppress the
-    LLM-provider span emission while leaving all other spans intact."""
+    """``disable_llm_spans=True`` MUST suppress the LLM-provider span
+    emission while leaving all other spans intact."""
     from openarmature.graph.events import NodeEvent
 
     # We don't drive a real provider here; instead we emit a synthetic
@@ -845,8 +844,8 @@ async def test_llm_error_path_emits_error_span_from_typed_failed_event() -> None
 
 
 def test_log_record_factory_injects_correlation_id() -> None:
-    """Spec §7: every log record emitted during an invocation MUST
-    carry ``openarmature.correlation_id``. The bridge installs a
+    """Every log record emitted during an invocation MUST carry
+    ``openarmature.correlation_id``. The bridge installs a
     process-global :class:`logging.LogRecord` factory (rather than
     a logger-level filter) so the attribute lands on every record
     regardless of which logger originated it — Python's logging
@@ -1028,8 +1027,8 @@ def test_install_log_bridge_adds_handler_when_pre_attached_uses_different_provid
 
 
 def test_log_bridge_exports_records_with_correlation_id() -> None:
-    """Spec §7 end-to-end: a log record emitted on a CHILD logger
-    under ``current_correlation_id`` flows through the bridge to
+    """End-to-end: a log record emitted on a CHILD logger under
+    ``current_correlation_id`` flows through the bridge to
     the OTel ``LoggerProvider``'s exporter with
     ``openarmature.correlation_id`` populated. Child-logger emit
     is the load-bearing case — Python's logging propagates child
@@ -1106,8 +1105,8 @@ def test_log_bridge_exports_records_with_correlation_id() -> None:
 
 async def test_shared_observer_concurrent_invocations_dont_collide() -> None:
     """A single observer shared across concurrent invocations MUST
-    keep their span trees isolated. Per spec §5.1 each invocation
-    has its own ``invocation_id`` and therefore its own
+    keep their span trees isolated. Each invocation has its own
+    ``invocation_id`` and therefore its own
     ``trace_id``; with shared internal state keyed by
     ``invocation_id`` the observer no longer collides on overlapping
     namespaces, no longer closes another in-flight invocation's span
@@ -1239,7 +1238,7 @@ async def _double(s: _ChildState) -> dict[str, int]:
 
 
 async def test_concurrent_fan_out_llm_spans_parent_under_calling_instance() -> None:
-    """Spec §5.5 under concurrent fan-out: each instance's
+    """Under concurrent fan-out: each instance's
     ``openarmature.llm.complete`` span MUST parent under that
     instance's calling node, not a sibling instance's. The Phase 6.1
     calling-node identity (namespace_prefix + attempt_index +
@@ -1362,7 +1361,7 @@ async def _ask(s: _ChildState) -> dict[str, str]:
 
 
 async def test_llm_call_inside_retried_node_parents_per_attempt() -> None:
-    """Spec §5.5 under retry: when an LLM ``complete()`` call
+    """Under retry: when an LLM ``complete()`` call
     happens inside a node body wrapped with retry middleware, each
     attempt's LLM span MUST parent under THAT attempt's node span,
     not a hardcoded ``attempt_index=0``. Phase 6.1's

From 3ac562e130dc01f6ce5d2e64f8e7894921e075e8 Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 14:03:00 -0700
Subject: [PATCH 14/15] Strip phase/PR process labels from docstrings and
 comments

Remove opaque development-history tags (PR-C, PR-B, Phase 6.0/6.1) and
rephrase the "Phase N scope/deferred" mentions to preserve their
covered-vs-deferred meaning without the process vocabulary. Rename the
langfuse resume test's internal "Phase 1/2/3" step labels to "Step" to
avoid confusion with build phases.

Leaves the harness skip registry untouched: its DIRECTIVE_PHASE dict
maps directives to phase integers as functional data, and its comments
document that live mapping. Fixture-number references are kept
throughout (concrete in-repo identifiers, not spec refs).
---
 src/openarmature/graph/compiled.py            |  4 +--
 src/openarmature/graph/middleware/_core.py    |  2 +-
 src/openarmature/observability/correlation.py |  4 +--
 .../observability/otel/observer.py            |  2 +-
 tests/conformance/adapter.py                  |  2 +-
 tests/conformance/harness/__init__.py         |  4 +--
 tests/conformance/harness/directives.py       |  8 +++---
 tests/conformance/harness/expectations.py     | 13 +++++-----
 tests/conformance/harness/fixtures.py         |  2 +-
 tests/conformance/test_checkpoint.py          |  4 +--
 tests/conformance/test_conformance.py         |  2 +-
 tests/conformance/test_fixture_parsing.py     |  7 +++--
 tests/conformance/test_observability.py       | 26 +++++++++----------
 .../test_observability_langfuse.py            | 14 +++++-----
 tests/conformance/test_pipeline_utilities.py  | 20 +++++++-------
 tests/unit/test_correlation.py                |  4 +--
 tests/unit/test_middleware.py                 |  2 +-
 tests/unit/test_observability_otel.py         | 16 ++++++------
 18 files changed, 67 insertions(+), 69 deletions(-)

diff --git a/src/openarmature/graph/compiled.py b/src/openarmature/graph/compiled.py
index f12b382..5114f31 100644
--- a/src/openarmature/graph/compiled.py
+++ b/src/openarmature/graph/compiled.py
@@ -1521,7 +1521,7 @@ async def innermost(s: Any) -> Mapping[str, Any]:
             innermost,
         )
 
-        # Spec observability §3 / Phase 6 LLM-span hook: capability
+        # Spec observability §3 LLM-span hook: capability
         # backends emitting from inside a node body (the
         # llm-provider span instrumentation in OpenAIProvider) need
         # to find the observers active for THIS invocation, which
@@ -2503,7 +2503,7 @@ async def _maybe_save_checkpoint(
         # before/after distinction for a save like there is for a
         # node attempt. The field is repurposed because a save
         # event represents "the state was persisted" rather than
-        # "the state transitioned." Phase 6 OTel mapping reads
+        # "the state transitioned." The OTel mapping reads
         # ``pre_state`` as the save's state.
         _dispatch(
             context,
diff --git a/src/openarmature/graph/middleware/_core.py b/src/openarmature/graph/middleware/_core.py
index 732a8a7..3f0d3a4 100644
--- a/src/openarmature/graph/middleware/_core.py
+++ b/src/openarmature/graph/middleware/_core.py
@@ -109,7 +109,7 @@ def compose_chain(
     ``CompiledGraph._step_function_node``, producing one closure layer
     per middleware on every node step. For typical workloads
     (single-digit middleware × hundreds of node activations) this is
-    negligible. Under heavy fan-out (Phase 3+), e.g. 10K instances × 5
+    negligible. Under heavy fan-out, e.g. 10K instances × 5
     inner nodes × 3 middlewares = 150K closure constructions per
     invocation; worth measuring with realistic workloads when the
     fan-out runtime lands. The optimization shape (cache the chain at
diff --git a/src/openarmature/observability/correlation.py b/src/openarmature/observability/correlation.py
index bb8a434..7b6660f 100644
--- a/src/openarmature/observability/correlation.py
+++ b/src/openarmature/observability/correlation.py
@@ -163,7 +163,7 @@ def validate_invocation_id(value: object) -> str:
 
 # ---------------------------------------------------------------------------
 # Active observer set — for capability backends emitting from outside the
-# engine's per-step path (llm-provider span hook in Phase 6, future
+# engine's per-step path (llm-provider span hook, future
 # Langfuse/Datadog backends, user-written instrumented capabilities).
 # ---------------------------------------------------------------------------
 
@@ -206,7 +206,7 @@ def _reset_active_observers(token: Token[tuple[SubscribedObserver, ...]]) -> Non
 # Active dispatch hook — queue-mediated event emission from outside the
 # engine's per-step path. The engine sets this ContextVar to a closure
 # over the current invocation's delivery queue + observer chain;
-# capability backends (the LLM provider span hook in Phase 6, future
+# capability backends (the LLM provider span hook, future
 # Langfuse/Datadog instrumentations) call ``current_dispatch()(event)``
 # to enqueue an event for the same delivery worker the engine uses.
 #
diff --git a/src/openarmature/observability/otel/observer.py b/src/openarmature/observability/otel/observer.py
index b1b6b20..97b8b31 100644
--- a/src/openarmature/observability/otel/observer.py
+++ b/src/openarmature/observability/otel/observer.py
@@ -2283,7 +2283,7 @@ def close_invocation(self, invocation_id: str) -> None:
         invocation in one call without needing to track ids
         externally. A first-class engine-level signal that lets
         observers auto-drain per-invocation state on completion is
-        tracked as Phase 6.1+ follow-up work in
+        tracked as follow-up work in
         ``openarmature-coord/docs/phase-6-1-conformance-fillin.md``.
         """
         inv_state = self._inv_states.pop(invocation_id, None)
diff --git a/tests/conformance/adapter.py b/tests/conformance/adapter.py
index 5171d7a..772fdd5 100644
--- a/tests/conformance/adapter.py
+++ b/tests/conformance/adapter.py
@@ -991,7 +991,7 @@ def _add_fan_out_node(
     - ``state_field_read`` — read an int from a parent state field.
     - ``queue_chunk`` — ``max(1, len(state.<field>) // chunk_size)``.
 
-    These are the only callable shapes the in-scope Phase 3 fixtures
+    These are the only callable shapes the in-scope fan-out fixtures
     use. Adding more is straightforward.
     """
     sub_name = cfg["subgraph"]
diff --git a/tests/conformance/harness/__init__.py b/tests/conformance/harness/__init__.py
index 4fb4ec6..c86e5f2 100644
--- a/tests/conformance/harness/__init__.py
+++ b/tests/conformance/harness/__init__.py
@@ -1,8 +1,8 @@
 """Conformance fixture harness — typed parsing for the four spec capabilities.
 
-Phase 0 (per the implementation plan): every fixture under
+Every fixture under
 ``openarmature-spec/spec/<capability>/conformance/`` lands as a typed pydantic
-config. Phases 1–6 add runtime interpretation under ``harness/runtime/``;
+config. Later stages add runtime interpretation under ``harness/runtime/``;
 they never re-touch parsing.
 
 Public surface:
diff --git a/tests/conformance/harness/directives.py b/tests/conformance/harness/directives.py
index d3f8577..256130a 100644
--- a/tests/conformance/harness/directives.py
+++ b/tests/conformance/harness/directives.py
@@ -1,6 +1,6 @@
 """Typed directive sub-models — the shapes referenced inside fixtures.
 
-Phase 0 typing strategy: model every key in every fixture, but use
+Typing strategy: model every key in every fixture, but use
 ``dict[str, Any]`` for genuinely polymorphic payloads (notably the inner
 ``state``/``nodes``/``edges`` of recursive subgraph definitions, and the
 update payloads themselves which are arbitrary state-shaped dicts). The
@@ -38,7 +38,7 @@ class _AllowExtras(BaseModel):
     middleware-specific params, flaky/fan-out config). Validates KNOWN
     keys' types but doesn't reject unknown ones — the spec evolves these
     payloads frequently and modelling every parameter exhaustively
-    creates churn without proportional value. The Phase 0 strictness
+    creates churn without proportional value. The strictness
     contract sits at the directive STRUCTURE level (above), not the
     parameter-bag level (here)."""
 
@@ -90,7 +90,7 @@ class EdgeSpec(_AllowExtras):
     The spec defines static (``from``/``to``) and conditional
     (``from``/``condition``) edges; observability/011 also uses a
     ``when``-shaped predicate. Schema is permissive here so all forms
-    parse — Phase 1 (engine retrofit) interprets each shape against
+    parse — the engine retrofit interprets each shape against
     the engine's edge model.
     """
 
@@ -208,7 +208,7 @@ class UpdateFromFieldSpec(_ForbidExtras):
     """
 
     # Free-form: some fixtures use ``{result: x, multiplier: 2}``, others
-    # ``{score: item}`` with no multiplier. Phase 4 (fan-out runtime) reads
+    # ``{score: item}`` with no multiplier. The fan-out runtime reads
     # whichever keys are present.
     model_config = ConfigDict(extra="allow")
 
diff --git a/tests/conformance/harness/expectations.py b/tests/conformance/harness/expectations.py
index 706586e..b3c3bb9 100644
--- a/tests/conformance/harness/expectations.py
+++ b/tests/conformance/harness/expectations.py
@@ -7,12 +7,11 @@
 fixture authors mixing keys across capabilities, and gives runtime code
 in :mod:`runtime` typed access to the assertion payload it needs.
 
-Phase 0 typing depth: TOP-LEVEL keys per capability are exhaustively
+Typing depth: TOP-LEVEL keys per capability are exhaustively
 typed (catches new directives the spec adds). The nested payload values
 underneath (e.g., individual span tree entries, observer event details)
 are kept loose as ``list[Any]`` / ``dict[str, Any]`` because the runtime
-phases that consume them are the right place to tighten — Phase 1
-will type observer-event entries, Phase 5 will type span_tree, etc.
+code that consumes them is the right place to tighten.
 """
 
 from __future__ import annotations
@@ -44,12 +43,12 @@ class GraphEngineExpected(_ForbidExtras):
     # Two shapes seen in fixtures:
     # - dict[observer_name, list[event_dict]] — most fixtures
     # - list[event_dict] flat — pipeline-utilities/011 (single-observer)
-    # Permissive ``Any`` until Phase 1 (engine retrofit) tightens.
+    # Permissive ``Any`` until the engine retrofit tightens.
     observer_events: Any = None
     delivery_order: list[dict[str, Any]] | None = None
     observer_event_invariants: dict[str, Any] | None = None
     # 020 — proposal-0012 fixture: assertions about edge-resolution
-    # failure event shapes. Permissive dict until Phase 1.
+    # failure event shapes. Permissive dict until the engine retrofit.
     # 022–024 (proposal 0010 §6 Drain) — drain-summary invariants
     # (drain_returned_within_timeout, graph_state_intact_after_timeout,
     # drain_waited_for_all_events) ride on the same field.
@@ -90,7 +89,7 @@ class LlmProviderRaisesAssertion(BaseModel):
 
     Permissive — fixtures attach assertion-specific knobs like
     ``retry_after_seconds`` (rate-limit fixture) without restructuring
-    the type. The runtime in Phase 2 validates the keys it reads.
+    the type. The runtime validates the keys it reads.
     """
 
     model_config = ConfigDict(extra="allow")
@@ -130,7 +129,7 @@ class PipelineUtilitiesExpected(_ForbidExtras):
     # Two shapes seen in fixtures:
     # - dict[observer_name, list[event_dict]] — most fixtures
     # - list[event_dict] flat — pipeline-utilities/011 (single-observer)
-    # Permissive ``Any`` until Phase 1 (engine retrofit) tightens.
+    # Permissive ``Any`` until the engine retrofit tightens.
     observer_events: Any = None
     observer_event_invariants: dict[str, Any] | None = None
     # Singular form used by 015 — assert one specific event shape.
diff --git a/tests/conformance/harness/fixtures.py b/tests/conformance/harness/fixtures.py
index c103266..311ffdf 100644
--- a/tests/conformance/harness/fixtures.py
+++ b/tests/conformance/harness/fixtures.py
@@ -1,6 +1,6 @@
 """Typed fixture root models.
 
-Per the Phase 0 plan: every YAML fixture under
+Every YAML fixture under
 ``openarmature-spec/spec/<capability>/conformance/`` lands as one of three
 typed shapes. The shape is chosen by a callable discriminator that inspects
 the raw dict's top-level keys (no tag field is present in the YAML).
diff --git a/tests/conformance/test_checkpoint.py b/tests/conformance/test_checkpoint.py
index 142c524..9b6861b 100644
--- a/tests/conformance/test_checkpoint.py
+++ b/tests/conformance/test_checkpoint.py
@@ -24,7 +24,7 @@
 - 030 checkpoint-not-found — supported.
 - 031 correlation-id-preserved-across-resume — record-level
   assertions supported here; the OTel span/log assertions are
-  gated until Phase 6 lands the observability mapping.
+  gated until the observability mapping lands.
 - 048-054 per-instance fan-out resume contract — supported.
 """
 
@@ -818,7 +818,7 @@ async def _run_one_case(spec: Mapping[str, Any], *, top_level: Mapping[str, Any]
         _assert_resume_invariants(invariants_block, final_resume, flaky_per_index_recorders)
 
     # Fixture 031: assert correlation_id preserved + invocation_id
-    # changed. Span/log assertions deferred to Phase 6 — observability
+    # changed. Span/log assertions deferred — observability
     # isn't wired yet. Skip those cleanly here.
     if "correlation_id_assertions" in resume_expected:
         cid_block = cast("Mapping[str, Any]", resume_expected["correlation_id_assertions"])
diff --git a/tests/conformance/test_conformance.py b/tests/conformance/test_conformance.py
index 899d409..c874e43 100644
--- a/tests/conformance/test_conformance.py
+++ b/tests/conformance/test_conformance.py
@@ -88,7 +88,7 @@ def _fixture_id(path: Path) -> str:
 }
 
 
-# Node directives the legacy adapter doesn't (yet) translate. Phase 1+ will
+# Node directives the legacy adapter doesn't (yet) translate. A later pass will
 # either expand the adapter or replace it with the typed harness.
 _UNSUPPORTED_NODE_DIRECTIVES = frozenset(
     {
diff --git a/tests/conformance/test_fixture_parsing.py b/tests/conformance/test_fixture_parsing.py
index 63ed1a6..a4a964c 100644
--- a/tests/conformance/test_fixture_parsing.py
+++ b/tests/conformance/test_fixture_parsing.py
@@ -1,4 +1,4 @@
-"""Phase 0 exit criterion: every fixture in the spec submodule parses into a
+"""Every fixture in the spec submodule parses into a
 typed harness config, AND the parse is round-trip stable (parse → dump →
 parse produces an equal model).
 
@@ -529,9 +529,8 @@ def test_fixture_parses(case: tuple[str, Path]) -> None:
 
 @pytest.mark.parametrize("case", _FIXTURES, ids=_id)
 def test_fixture_round_trips(case: tuple[str, Path]) -> None:
-    """Parse → ``model_dump`` → re-parse → equal. Exit criterion for
-    Phase 0 per the implementation plan: catches dropped fields the user
-    intended to use later."""
+    """Parse → ``model_dump`` → re-parse → equal. Catches dropped
+    fields the user intended to use later."""
     case_id = _id(case)
     skip_if_deferred(case_id, _DEFERRED_FIXTURES)
     _, path = case
diff --git a/tests/conformance/test_observability.py b/tests/conformance/test_observability.py
index be17996..ab2c25a 100644
--- a/tests/conformance/test_observability.py
+++ b/tests/conformance/test_observability.py
@@ -2,26 +2,26 @@
 
 Driven fixtures:
 
-- **001-basic-trace** (Phase 6.0) — full span shape.
-- **002-subgraph-hierarchy** (PR-C) — synthetic dispatch span +
+- **001-basic-trace** — full span shape.
+- **002-subgraph-hierarchy** — synthetic dispatch span +
   inner-node parenting.
-- **003-error-status** (PR-C) — ERROR status mapping for the
+- **003-error-status** — ERROR status mapping for the
   ``node_exception`` case.
-- **005-llm-provider-span-nested** (Phase 6.0) — LLM span +
+- **005-llm-provider-span-nested** — LLM span +
   ``disable_llm_spans`` opt-out + TracerProvider isolation.
-- **007-retry-attempt-spans** (PR-C) — sibling attempt spans with
+- **007-retry-attempt-spans** — sibling attempt spans with
   per-attempt ``attempt_index`` under retry middleware.
-- **008-detached-trace-mode** (Phase 6.0) — detached subgraph
+- **008-detached-trace-mode** — detached subgraph
   + detached fan-out + cross-trace ``correlation_id``.
-- **009-correlation-id-cross-cutting** (Phase 6.0) — every span
+- **009-correlation-id-cross-cutting** — every span
   carries ``openarmature.correlation_id``; back-to-back
   invocations get distinct UUIDv4s.
-- **010-log-correlation** (PR-C.3) — log records emitted from
+- **010-log-correlation** — log records emitted from
   inside node bodies pick up the active node span's
   ``trace_id``/``span_id`` via the engine-side
   ``prepare_sync`` → OTel context attach pipeline; both nested
   and detached-trace cases.
-- **011-determinism** (PR-C) — deterministic span content
+- **011-determinism** — deterministic span content
   (hierarchy, names, status, attributes minus the canonical
   non-deterministic-by-design list) is identical across runs.
 
@@ -557,7 +557,7 @@ async def _run_fixture_004(spec: Mapping[str, Any]) -> None:
         assert unreachable not in by_name, f"{unreachable!r} MUST not produce a span — never reached"
 
     # Invocation span ends ERROR per the §4.2 invocation-status
-    # propagation contract (PR-C review fix).
+    # propagation contract.
     inv = by_name.get("openarmature.invocation")
     assert inv is not None
     assert inv.status.status_code == StatusCode.ERROR, (
@@ -2052,11 +2052,11 @@ def _compile_subgraphs(spec: Mapping[str, Any]) -> dict[str, Any]:
 
 
 # ---------------------------------------------------------------------------
-# Phase 5 fixture 031 — span/log assertions deferred from Phase 5
+# Fixture 031 — span/log assertions deferred
 #
 # Lives in this file (not test_checkpoint.py) because the assertions
 # verify OTel span attributes across the original + resumed runs of
-# the same checkpoint fixture. The Phase 5 harness already covers the
+# the same checkpoint fixture. The checkpoint harness already covers the
 # record-level half (correlation_id preserved, invocation_id changes);
 # this picks up the cross-run span-attribute half.
 # ---------------------------------------------------------------------------
@@ -2172,7 +2172,7 @@ async def delete(self, invocation_id: str) -> None:
 
 
 # ---------------------------------------------------------------------------
-# Fixture 010 — log correlation (PR-C.3)
+# Fixture 010 — log correlation
 #
 # Two sub-cases. Both build the graph by hand rather than going through the
 # adapter — fixture 010's ``emits_log:`` directive isn't an adapter primitive
diff --git a/tests/conformance/test_observability_langfuse.py b/tests/conformance/test_observability_langfuse.py
index 0c10fb5..afe50cc 100644
--- a/tests/conformance/test_observability_langfuse.py
+++ b/tests/conformance/test_observability_langfuse.py
@@ -802,22 +802,22 @@ async def _run_resume_case(
 ) -> None:
     """Two-phase test flow for fixture 037 case 5.
 
-    Phase 1 — first invoke catches the expected NodeException at the
+    Step 1 — first invoke catches the expected NodeException at the
     designated node; the captured Langfuse Trace's input/output match
     ``first_run_expected.langfuse_trace``.  We snapshot the first trace's
     headline fields immediately so the ``first_trace_unchanged`` invariant
     can verify the resumed invoke leaves them untouched.
 
-    Phase 2 — resume invoke runs the same graph with
+    Step 2 — resume invoke runs the same graph with
     ``resume_invocation=first_invocation_id``, completes successfully, and
     the resumed Trace's input/output match ``resume.expected.langfuse_trace``.
 
-    Phase 3 — invariants compare the two traces (distinct trace ids,
+    Step 3 — invariants compare the two traces (distinct trace ids,
     shared correlation_id, the snapshotted first trace's fields unchanged).
     """
     from openarmature.graph.errors import RuntimeGraphError  # noqa: PLC0415
 
-    # ---- Phase 1: first invoke catches expected error
+    # ---- Step 1: first invoke catches expected error
     first_run_expected_error = cast("dict[str, Any]", case.get("first_run_expected_error") or {})
     expected_category = cast("str", first_run_expected_error.get("category", "node_exception"))
     expected_raised_from = cast("str | None", first_run_expected_error.get("raised_from"))
@@ -870,7 +870,7 @@ async def _run_resume_case(
     first_expected_trace = cast("dict[str, Any]", first_run_expected["langfuse_trace"])
     _assert_trace(first_trace, first_expected_trace, expected_invariants={})
 
-    # ---- Phase 2: resume invoke
+    # ---- Step 2: resume invoke
     resume_block = cast("dict[str, Any]", case["resume"])
     # Drop ``correlation_id`` from invoke_kwargs on resume — the engine
     # restores it from the saved record per §3.1.
@@ -883,7 +883,7 @@ async def _run_resume_case(
     await graph.drain()
 
     # Python dicts are insertion-ordered (PEP 468; guaranteed since
-    # 3.7).  Phase 1 added one trace; phase 2 added the resumed trace.
+    # 3.7).  The first invoke added one trace; the resume added another.
     # Reading by position is more deterministic than scanning by
     # not-equal — if a future engine change adds synthetic traces, the
     # scan would silently pick the wrong key, but the position-based
@@ -902,7 +902,7 @@ async def _run_resume_case(
     resume_expected_trace = cast("dict[str, Any]", resume_expected["langfuse_trace"])
     _assert_trace(resumed_trace, resume_expected_trace, expected_invariants={})
 
-    # ---- Phase 3: invariants
+    # ---- Step 3: invariants
     if resume_expected.get("first_trace_unchanged"):
         assert first_trace.input == first_trace_snapshot["input"], (
             f"first_trace_unchanged failed: input was {first_trace_snapshot['input']!r}, "
diff --git a/tests/conformance/test_pipeline_utilities.py b/tests/conformance/test_pipeline_utilities.py
index 616f609..3bd0305 100644
--- a/tests/conformance/test_pipeline_utilities.py
+++ b/tests/conformance/test_pipeline_utilities.py
@@ -1,10 +1,10 @@
 """Run every spec pipeline-utilities conformance fixture against the engine.
 
-Phase 2 scope (middleware): fixtures 001-016. Fixtures
+Middleware scope: fixtures 001-016. Fixtures
 017-019 (fan-out) and 020-021 (fan-out + middleware composition) skip
-via `_unsupported_directive` until Phase 3 lands the fan-out runtime.
+via `_unsupported_directive` until the fan-out runtime lands.
 Fixtures 022-031 (fan-out and checkpointing) similarly skip until their
-phases.
+support lands.
 
 The driver translates a fixture's `middleware:` block into actual
 middleware instances, wires up capture sinks per fixture-defined
@@ -58,8 +58,8 @@
 )
 
 
-# Phase 3 lands fan-out (proposal 0005 PU side). Checkpointing
-# (proposal 0008) comes in Phase 5; its fixtures use directives we
+# Fan-out (proposal 0005 PU side) lands later. Checkpointing
+# (proposal 0008) comes later still; its fixtures use directives we
 # don't translate yet.
 _UNSUPPORTED_NODE_DIRECTIVES = frozenset(
     {
@@ -76,9 +76,9 @@ def _load(path: Path) -> dict[str, Any]:
         return yaml.safe_load(f)
 
 
-# Phase 3 target: fan-out (proposal 0005 PU side) covers fixtures 017-023.
-# Phase 5 will pick up the checkpointing fixtures (024-031). PR-5
-# (proposal 0011) drives fixtures 032-038 through this same harness.
+# Fan-out (proposal 0005 PU side) covers fixtures 017-023.
+# The checkpointing fixtures (024-031) come later. Proposal 0011
+# drives fixtures 032-038 through this same harness.
 # State-migration fixtures 039-047 run via a dedicated runner
 # (``test_state_migration.py``); they need a separate driver because
 # the `cases:` shape carries seeded-record + migrations + resume blocks.
@@ -823,7 +823,7 @@ async def _capture_isolation(event: ObserverEvent) -> None:
 
     # Timing record assertions.
     if "timing_records" in expected:
-        # Two shapes per Phase 0 typed harness: dict-of-lists OR a flat list.
+        # Two shapes the typed harness accepts: dict-of-lists OR a flat list.
         expected_timing = expected["timing_records"]
         if isinstance(expected_timing, list):
             empty: list[TimingRecord] = []
@@ -837,7 +837,7 @@ async def _capture_isolation(event: ObserverEvent) -> None:
     # Single observer-event assertion (fixture 015 uses singular form).
     if "expected_observer_event" in expected:
         # Fixture 015 has a top-level observer attached; we'd need observer
-        # wiring just for this. For Phase 2's reduced scope, skip the
+        # wiring just for this. For this harness's reduced scope, skip the
         # singular-observer-event check — fixture 015 is gated on retry's
         # per-attempt event behavior, which we test via flaky+retry against
         # final_state/execution_order. The detailed single-event assertion
diff --git a/tests/unit/test_correlation.py b/tests/unit/test_correlation.py
index 6298c00..2e7d7e0 100644
--- a/tests/unit/test_correlation.py
+++ b/tests/unit/test_correlation.py
@@ -148,7 +148,7 @@ def test_current_correlation_id_returns_none_outside_invocation() -> None:
 
 
 # ---------------------------------------------------------------------------
-# Phase 5 / §10.4 step 3 + 4 — resume preserves correlation_id, mints new
+# §10.4 step 3 + 4 — resume preserves correlation_id, mints new
 # invocation_id. Already covered in test_checkpoint.py at the record
 # level; here we additionally verify the user-visible ContextVar half.
 # ---------------------------------------------------------------------------
@@ -156,7 +156,7 @@ def test_current_correlation_id_returns_none_outside_invocation() -> None:
 
 async def test_resume_preserves_correlation_id_visible_to_user_code() -> None:
     """Resume MUST preserve the original correlation_id verbatim. The
-    Phase 5 checkpoint test verifies
+    conformance checkpoint test verifies
     this at the saved-record level; here we additionally verify it
     propagates to the ContextVar that user code reads from inside
     node bodies during the resumed invocation."""
diff --git a/tests/unit/test_middleware.py b/tests/unit/test_middleware.py
index ccb587a..010a541 100644
--- a/tests/unit/test_middleware.py
+++ b/tests/unit/test_middleware.py
@@ -1,6 +1,6 @@
 """Unit tests for the middleware infrastructure and canonical middleware.
 
-Covers the eight items from the Phase 2 plan:
+Covers eight middleware behaviors:
 
 1. Chain composition + ordering
 2. Short-circuit (middleware skips ``next``)
diff --git a/tests/unit/test_observability_otel.py b/tests/unit/test_observability_otel.py
index c569527..a9aa72f 100644
--- a/tests/unit/test_observability_otel.py
+++ b/tests/unit/test_observability_otel.py
@@ -907,7 +907,7 @@ def test_install_log_bridge_is_idempotent() -> None:
     already-installed one.
 
     Wrapped in ``warnings.catch_warnings("error")`` to lock in the
-    Phase 6.1 PR-B migration: this is the canonical surface where
+    logging-handler migration: this is the canonical surface where
     the deprecated ``opentelemetry.sdk._logs.LoggingHandler`` used
     to emit a ``DeprecationWarning``. Any future regression that
     re-introduces the deprecated path fires here immediately."""
@@ -1036,9 +1036,9 @@ def test_log_bridge_exports_records_with_correlation_id() -> None:
     filter-on-root placement (the prior implementation) misses
     every reasonable user's logger.
 
-    Wrapped in ``warnings.catch_warnings("error")`` so the PR-B
-    migration's "no more deprecation warning" guarantee is
-    asserted on the affirmative export path too."""
+    Wrapped in ``warnings.catch_warnings("error")`` so the
+    logging-handler migration's "no more deprecation warning"
+    guarantee is asserted on the affirmative export path too."""
     import warnings
 
     from opentelemetry.sdk._logs import LoggerProvider
@@ -1099,7 +1099,7 @@ def test_log_bridge_exports_records_with_correlation_id() -> None:
 
 
 # ---------------------------------------------------------------------------
-# Phase 6.1: concurrency-safe state scoping + §5.5 calling-node attribution
+# Concurrency-safe state scoping + §5.5 calling-node attribution
 # ---------------------------------------------------------------------------
 
 
@@ -1169,7 +1169,7 @@ async def test_shared_observer_concurrent_invocations_dont_collide() -> None:
 async def test_concurrent_fan_out_no_lifo_violation() -> None:
     """Regression check: under fan-out with multiple concurrent
     instances, started/completed events for different instances
-    interleave on the observer's call queue. The Phase 6.0
+    interleave on the observer's call queue. An earlier
     architecture used cross-event ``opentelemetry.context.attach``
     tokens that produced LIFO violations on out-of-order detach
     (suppressed by try/except guards in round-4 / round-7). Phase
@@ -1240,7 +1240,7 @@ async def _double(s: _ChildState) -> dict[str, int]:
 async def test_concurrent_fan_out_llm_spans_parent_under_calling_instance() -> None:
     """Under concurrent fan-out: each instance's
     ``openarmature.llm.complete`` span MUST parent under that
-    instance's calling node, not a sibling instance's. The Phase 6.1
+    instance's calling node, not a sibling instance's. The
     calling-node identity (namespace_prefix + attempt_index +
     fan_out_index threaded via ContextVar onto the LLM event
     payload) is what makes this attribution correct."""
@@ -1364,7 +1364,7 @@ async def test_llm_call_inside_retried_node_parents_per_attempt() -> None:
     """Under retry: when an LLM ``complete()`` call
     happens inside a node body wrapped with retry middleware, each
     attempt's LLM span MUST parent under THAT attempt's node span,
-    not a hardcoded ``attempt_index=0``. Phase 6.1's
+    not a hardcoded ``attempt_index=0``. The
     ``current_attempt_index`` ContextVar (set inside the per-attempt
     ``innermost`` scope) is what makes this work."""
     import httpx

From e191500dac731279255614b323fa43b8f3da3f4a Mon Sep 17 00:00:00 2001
From: chris-colinsky <chris@lunarcommand.xyz>
Date: Thu, 18 Jun 2026 15:08:26 -0700
Subject: [PATCH 15/15] Rephrase residual spec-authority citations in
 docstrings

The first sweep stripped structured spec references (section markers,
proposal numbers, versions) but left prose that still cited the spec
as the authority for a rule ("The spec defines/permits/mandates",
"spec-mandated", "spec-normative", "per spec"). State the behavior
directly, or move the basis to a nearby comment where load-bearing.

Structural mentions are kept: the spec/ conformance directories, the
spec parameter in the harness, spec-version reads, and descriptions of
what the conformance suite covers.
---
 src/openarmature/graph/compiled.py                | 15 +++++++--------
 src/openarmature/graph/events.py                  |  6 +++---
 src/openarmature/graph/observer.py                | 12 ++++++------
 src/openarmature/graph/parallel_branches.py       |  2 +-
 src/openarmature/llm/messages.py                  |  2 +-
 .../observability/langfuse/observer.py            |  6 +++---
 src/openarmature/observability/metadata.py        |  2 +-
 src/openarmature/observability/otel/observer.py   |  7 +++----
 src/openarmature/prompts/backends/filesystem.py   |  6 +++---
 src/openarmature/prompts/group.py                 |  2 +-
 src/openarmature/prompts/prompt.py                |  2 +-
 tests/conformance/adapter.py                      |  4 ++--
 tests/conformance/harness/directives.py           | 14 +++++++-------
 tests/conformance/harness/expectations.py         |  2 +-
 tests/conformance/test_pipeline_utilities.py      |  4 ++--
 tests/conformance/test_prompt_management.py       |  4 ++--
 tests/unit/test_correlation.py                    |  4 ++--
 tests/unit/test_fan_out.py                        |  2 +-
 tests/unit/test_parallel_branches.py              |  2 +-
 tests/unit/test_projection.py                     |  2 +-
 tests/unit/test_runtime_errors.py                 |  2 +-
 21 files changed, 50 insertions(+), 52 deletions(-)

diff --git a/src/openarmature/graph/compiled.py b/src/openarmature/graph/compiled.py
index 5114f31..9036b96 100644
--- a/src/openarmature/graph/compiled.py
+++ b/src/openarmature/graph/compiled.py
@@ -372,18 +372,17 @@ def _restore_fan_out_progress_state(
     into the mutable per-fan-out tracking dict that ``FanOutNode``
     consults to decide which instances to skip vs re-run.
 
-    Extra-output state isn't preserved across resume — the spec models
-    ``result`` as a single accumulator entry and is silent on
+    Extra-output state isn't preserved across resume: ``result`` is
+    modeled as a single accumulator entry, with nothing recorded for
     ``extra_outputs``. Reconstructing them would require either
-    serializing them on the record (a spec change) or recomputing them
+    serializing them on the record (a record-format change) or recomputing them
     (defeating the point of skip-on-resume). Fixtures don't exercise
     ``extra_outputs`` on the resume path; if a future workload needs
     them, surface as a follow-on.
 
     ``result_is_error`` is read verbatim from the saved record's
-    explicit field. The earlier structural-pattern heuristic is gone
-    — the spec mandates the
-    explicit field as the authoritative discriminator because the
+    explicit field. The earlier structural-pattern heuristic is gone:
+    the explicit field is the authoritative discriminator because the
     user's state schema can legitimately contain values that match
     the engine's canonical error-record shape, and a heuristic would
     misclassify them.
@@ -1412,7 +1411,7 @@ async def _step_function_node(
         (``node_exception`` / ``reducer_error`` /
         ``state_validation_error``) stay inline in ``innermost`` —
         those errors short-circuit before edge eval can run, so the
-        spec's "before the failure propagates" MUST is preserved by
+        "before the failure propagates" requirement is preserved by
         the inline dispatch.
 
         Returns a :class:`_StepResult` carrying the merged state +
@@ -2375,7 +2374,7 @@ async def _maybe_save_checkpoint(
 
         Atomicity contract: the save-call site below
         completes the "produce contribution + record into accumulator
-        + save" sequence the spec mandates. ``FanOutNode.run_with_context``
+        + save" sequence. ``FanOutNode.run_with_context``
         flips an instance's state to ``completed`` and stashes its
         ``result`` BEFORE invoking the save that durably records the
         transition. A crash between that state mutation and the save
diff --git a/src/openarmature/graph/events.py b/src/openarmature/graph/events.py
index 8542651..09b3b50 100644
--- a/src/openarmature/graph/events.py
+++ b/src/openarmature/graph/events.py
@@ -536,8 +536,8 @@ class LlmCompletionEvent:
       lifetime, unique within the run. Distinct from
       ``response_id``.
     - ``caller_invocation_metadata``: optional snapshot of caller-
-      supplied invocation metadata at LLM-call time. Spec-defined as
-      OPTIONAL; the python OpenAIProvider populates it by default so
+      supplied invocation metadata at LLM-call time. OPTIONAL; the
+      python OpenAIProvider populates it by default so
       the bundled OTel/Langfuse observers can emit the
       ``openarmature.user.<key>`` span-attribute family without an
       extra opt-in. Pass ``populate_caller_metadata=False`` to suppress
@@ -625,7 +625,7 @@ class LlmFailedEvent:
       ``provider_unsupported_content_block``,
       ``structured_output_invalid``). Always present.
     - ``error_type``: OPTIONAL impl-level / vendor-specific error
-      type or code. Two acceptable styles per spec:
+      type or code. Two acceptable styles:
       vendor error code (e.g. ``"rate_limit_exceeded"``) OR
       upstream exception class name (e.g. ``"RateLimitError"``).
       ``None`` when no impl-side type is available.
diff --git a/src/openarmature/graph/observer.py b/src/openarmature/graph/observer.py
index d1d9ea7..ce5da36 100644
--- a/src/openarmature/graph/observer.py
+++ b/src/openarmature/graph/observer.py
@@ -22,7 +22,7 @@
 - `_dispatch`: enqueues an event for the worker to deliver.
 - `deliver_loop`: the worker coroutine. Reads items from the queue and
   calls each observer in order, filtering by subscribed phase and
-  isolating exceptions via `warnings.warn` per spec.
+  isolating exceptions via `warnings.warn`.
 """
 
 from __future__ import annotations
@@ -344,7 +344,7 @@ class DrainSummary:
     delivered to every subscribed observer before cancellation, and
     `timeout_reached is True`.
 
-    The spec-mandated minimum is these two fields. Implementations MAY
+    These two fields are the required minimum. Implementations MAY
     extend the shape with diagnostic detail (per-observer counts,
     sampled event metadata) in subsequent versions; this version ships
     the minimum.
@@ -376,8 +376,8 @@ class _FanOutInstanceState:
     - ``result_is_error`` distinguishes success contributions
       (``False``) from collect-mode error contributions (``True``).
       Internal flag — not exposed on the public
-      ``FanOutInstanceProgress`` shape because the spec presents
-      ``result`` as a single typed entry per the parent state schema.
+      ``FanOutInstanceProgress`` shape because ``result`` is exposed
+      as a single typed entry per the parent state schema.
       ``FanOutNode.run_with_context`` consults this on resume to
       route the rolled-forward contribution through the
       ``errors_field`` bucket rather than ``target_field``.
@@ -385,8 +385,8 @@ class _FanOutInstanceState:
       ``extra_outputs`` mapping (parent-field -> sub-field) so that
       per-instance resume preserves the FULL per-instance contribution
       (not just the ``target_field`` slice). Internal — not exposed on
-      the public ``FanOutInstanceProgress`` shape because the spec
-      describes ``result`` as a single accumulator entry.
+      the public ``FanOutInstanceProgress`` shape because ``result``
+      is a single accumulator entry.
     - ``completed_inner_positions`` accumulates ``NodePosition`` entries
       from inner nodes that complete inside this instance's subgraph
       execution. Captures the instance's progress for observational
diff --git a/src/openarmature/graph/parallel_branches.py b/src/openarmature/graph/parallel_branches.py
index 2b6ff98..8f8aac7 100644
--- a/src/openarmature/graph/parallel_branches.py
+++ b/src/openarmature/graph/parallel_branches.py
@@ -62,7 +62,7 @@
 class BranchSpec[ChildT: State]:
     """One entry in a :class:`ParallelBranchesNode`'s branch mapping.
 
-    Branches are heterogeneous: each spec MAY reference a different
+    Branches are heterogeneous: each branch may reference a different
     compiled subgraph with a different state schema. ``inputs`` /
     ``outputs`` follow the same shape as subgraph projection
     mappings.
diff --git a/src/openarmature/llm/messages.py b/src/openarmature/llm/messages.py
index f52ba2e..778391d 100644
--- a/src/openarmature/llm/messages.py
+++ b/src/openarmature/llm/messages.py
@@ -79,7 +79,7 @@ class ForceTool(BaseModel):
 
     Use the record form of the `tool_choice` discriminated union when
     you need the model to call a specific tool by name. ``type`` is the
-    spec-level discriminator (``"tool"``); the wire mapping renames it
+    discriminator (``"tool"``); the wire mapping renames it
     to ``"function"`` for the OpenAI body. The
     ``name`` MUST match a ``Tool.name`` in the supplied ``tools``
     list; ``validate_tool_choice`` enforces this at pre-send time and
diff --git a/src/openarmature/observability/langfuse/observer.py b/src/openarmature/observability/langfuse/observer.py
index 61f0de5..587641f 100644
--- a/src/openarmature/observability/langfuse/observer.py
+++ b/src/openarmature/observability/langfuse/observer.py
@@ -164,9 +164,9 @@ def _subgraph_identity_at(event: NodeEvent, depth: int) -> str:
     given 1-based namespace depth, or the empty string when no
     identity is tracked at that depth.
 
-    The empty-string fallback matches the spec's "if the
-    implementation tracks one" clause for implementations / direct
-    ``SubgraphNode(...)`` callers that don't wire an identity through.
+    The empty-string fallback is the "no identity tracked" case, for
+    implementations / direct ``SubgraphNode(...)`` callers that don't
+    wire an identity through.
     Conformance fixtures 031/032/033 lock identity as the required
     value; the empty-string path keeps direct callers conformant but
     failing those fixtures.
diff --git a/src/openarmature/observability/metadata.py b/src/openarmature/observability/metadata.py
index 10f775d..bb856ea 100644
--- a/src/openarmature/observability/metadata.py
+++ b/src/openarmature/observability/metadata.py
@@ -27,7 +27,7 @@
 
 - Keys MUST be strings.
 - Keys MUST NOT start with ``openarmature.`` or ``gen_ai.`` (reserved
-  for spec-normative attribute namespaces; collisions would silently
+  attribute namespaces; collisions would silently
   overwrite OA-emitted state at the observer layer).
 - Keys MUST NOT exactly match a reserved OA-emitted top-level metadata
   key name (the Langfuse set plus ``invocation_id``) for the same
diff --git a/src/openarmature/observability/otel/observer.py b/src/openarmature/observability/otel/observer.py
index 97b8b31..35a2929 100644
--- a/src/openarmature/observability/otel/observer.py
+++ b/src/openarmature/observability/otel/observer.py
@@ -186,10 +186,9 @@ def _subgraph_identity_at(event: NodeEvent, depth: int) -> str:
     given 1-based namespace depth, or the empty string when no
     identity is tracked at that depth.
 
-    The empty-string fallback matches the spec's "if the implementation
-    tracks one" clause for callers using
-    ``SubgraphNode(name=..., compiled=...)`` without supplying
-    ``subgraph_identity``.
+    The empty-string fallback is the "no identity tracked" case, for
+    callers using ``SubgraphNode(name=..., compiled=...)`` without
+    supplying ``subgraph_identity``.
     """
     # Spec observability §5.3 (coord thread
     # clarify-subgraph-name-semantics).
diff --git a/src/openarmature/prompts/backends/filesystem.py b/src/openarmature/prompts/backends/filesystem.py
index 6b555a4..8277056 100644
--- a/src/openarmature/prompts/backends/filesystem.py
+++ b/src/openarmature/prompts/backends/filesystem.py
@@ -21,9 +21,9 @@ class FilesystemPromptBackend:
     - ``layout="per-label"`` (default): ``<root>/<label>/<name>.j2``.
       The ``label`` subdirectory keeps name-collisions across labels
       distinct (e.g., ``prompts/production/greeting.j2`` and
-      ``prompts/staging/greeting.j2``). The spec permits filesystem
-      backends to interpret label as "a subdirectory or filename
-      suffix"; this is the subdirectory variant.
+      ``prompts/staging/greeting.j2``). A filesystem backend may
+      interpret label as a subdirectory or filename suffix; this is
+      the subdirectory variant.
     - ``layout="flat"``: ``<root>/<name>.j2``. The same template
       is returned regardless of which label was requested; the
       Prompt's ``label`` field is the requested label verbatim.
diff --git a/src/openarmature/prompts/group.py b/src/openarmature/prompts/group.py
index 4af28eb..c4977f1 100644
--- a/src/openarmature/prompts/group.py
+++ b/src/openarmature/prompts/group.py
@@ -21,7 +21,7 @@ class PromptGroup(BaseModel):
         group_name: Stable identifier for this group pattern.
         members: Ordered sequence of at least two PromptResult
             instances. Order matches the application's intended call
-            sequence; the spec does not require sequential execution.
+            sequence; sequential execution is not required.
     """
 
     model_config = ConfigDict(extra="forbid")
diff --git a/src/openarmature/prompts/prompt.py b/src/openarmature/prompts/prompt.py
index f92e39c..c2ef368 100644
--- a/src/openarmature/prompts/prompt.py
+++ b/src/openarmature/prompts/prompt.py
@@ -183,7 +183,7 @@ class _PromptBase(BaseModel):
             into ``provider.complete(config=...)`` without translation.
         observability_entities: Optional backend-keyed references to
             first-class entities the prompt has been registered as in
-            observability backends.  Spec-normative key:
+            observability backends.  Recognized key:
             ``langfuse_prompt`` (the Langfuse SDK Prompt-entity ref).
         metadata: Optional backend-supplied metadata.
     """
diff --git a/tests/conformance/adapter.py b/tests/conformance/adapter.py
index 772fdd5..e5c9fe9 100644
--- a/tests/conformance/adapter.py
+++ b/tests/conformance/adapter.py
@@ -504,7 +504,7 @@ def _make_flaky_fn(
       031): the engine's first invoke aborts on this node; the
       resumed invoke succeeds. No retry middleware is wrapped around
       these nodes (any wrapping would bypass the resume path), so
-      "fail-once-then-succeed" matches the spec contract directly.
+      "fail-once-then-succeed" matches the resume contract directly.
     """
     sequence = list(flaky.get("failure_sequence", []))
     success_update = dict(flaky.get("success_update", {}))
@@ -694,7 +694,7 @@ def _projection_for(node_spec: Mapping[str, Any]) -> ProjectionStrategy[State, S
     """Pick the projection strategy declared on a subgraph node spec.
 
     `inputs:` and/or `outputs:` in the YAML → `ExplicitMapping`. Both absent →
-    the spec's default `FieldNameMatching`.
+    the default `FieldNameMatching`.
     """
 
     inputs = node_spec.get("inputs")
diff --git a/tests/conformance/harness/directives.py b/tests/conformance/harness/directives.py
index 256130a..cfe0696 100644
--- a/tests/conformance/harness/directives.py
+++ b/tests/conformance/harness/directives.py
@@ -28,7 +28,7 @@
 class _ForbidExtras(BaseModel):
     """Strict — used for the structural skeleton (state schema, node primary
     directive set, edges, observer registration, middleware config split).
-    Catches new directives the spec adds at the load-bearing places."""
+    Catches new fixture directives at the load-bearing places."""
 
     model_config = ConfigDict(extra="forbid")
 
@@ -36,8 +36,8 @@ class _ForbidExtras(BaseModel):
 class _AllowExtras(BaseModel):
     """Permissive — used for payload-shape models (mock LLM responses,
     middleware-specific params, flaky/fan-out config). Validates KNOWN
-    keys' types but doesn't reject unknown ones — the spec evolves these
-    payloads frequently and modelling every parameter exhaustively
+    keys' types but doesn't reject unknown ones — these payloads evolve
+    frequently and modelling every parameter exhaustively
     creates churn without proportional value. The strictness
     contract sits at the directive STRUCTURE level (above), not the
     parameter-bag level (here)."""
@@ -87,8 +87,8 @@ class StateSchema(_ForbidExtras):
 class EdgeSpec(_AllowExtras):
     """One edge in a graph definition.
 
-    The spec defines static (``from``/``to``) and conditional
-    (``from``/``condition``) edges; observability/011 also uses a
+    Edges come in static (``from``/``to``) and conditional
+    (``from``/``condition``) forms; observability/011 also uses a
     ``when``-shaped predicate. Schema is permissive here so all forms
     parse — the engine retrofit interprets each shape against
     the engine's edge model.
@@ -576,8 +576,8 @@ class MockResponse(_AllowExtras):
 
     Permissive shape because the body's content mirrors OpenAI's wire
     format which is wide and evolving; modelling every field would
-    duplicate the OpenAI schema. The ``llm-provider`` capability's
-    spec is the authoritative shape.
+    duplicate the OpenAI schema. The OpenAI wire format is the
+    authoritative shape.
     """
 
     status: int | None = None
diff --git a/tests/conformance/harness/expectations.py b/tests/conformance/harness/expectations.py
index b3c3bb9..fd682b9 100644
--- a/tests/conformance/harness/expectations.py
+++ b/tests/conformance/harness/expectations.py
@@ -8,7 +8,7 @@
 in :mod:`runtime` typed access to the assertion payload it needs.
 
 Typing depth: TOP-LEVEL keys per capability are exhaustively
-typed (catches new directives the spec adds). The nested payload values
+typed (catches new fixture directives). The nested payload values
 underneath (e.g., individual span tree entries, observer event details)
 are kept loose as ``list[Any]`` / ``dict[str, Any]`` because the runtime
 code that consumes them is the right place to tighten.
diff --git a/tests/conformance/test_pipeline_utilities.py b/tests/conformance/test_pipeline_utilities.py
index 3bd0305..3f697fb 100644
--- a/tests/conformance/test_pipeline_utilities.py
+++ b/tests/conformance/test_pipeline_utilities.py
@@ -850,10 +850,10 @@ def _collect_parallel_branches_errors_fields(spec: Mapping[str, Any]) -> set[str
     ``errors_field`` on any parallel_branches node in ``spec``.
 
     The ``errors_field`` carries an implementation-defined
-    record shape; the spec only mandates ``branch_name`` + category. The
+    record shape; only ``branch_name`` + category are required. The
     engine's record carries additional engine-defined keys (``message``,
     ``cause_type``). Fixtures asserting against ``errors_field`` records
-    use subset semantics — assert the spec-mandated keys are present
+    use subset semantics — assert the required keys are present
     with the expected values, ignore the rest.
     """
     out: set[str] = set()
diff --git a/tests/conformance/test_prompt_management.py b/tests/conformance/test_prompt_management.py
index 62aa0e4..e917d0f 100644
--- a/tests/conformance/test_prompt_management.py
+++ b/tests/conformance/test_prompt_management.py
@@ -72,7 +72,7 @@ def _segment_from_fixture(entry: dict[str, Any]) -> Any:
     validators — the harness exists to test render-time behavior,
     including fixtures that intentionally build prompts violating
     construction-time invariants (placeholder regex, role-block
-    compat).  Render-time enforcement (the spec-normative trigger)
+    compat).  Render-time enforcement (the normative trigger)
     still runs; only the construction-time ergonomic-only check is
     bypassed.
 
@@ -546,7 +546,7 @@ def _assert_capture_attrs(capture_name: str, actual: Any, expected: dict[str, An
 
 def _message_to_dict_for_compare(message: Message) -> dict[str, Any]:
     """Dump a Message to a plain dict for structural equality against
-    a fixture YAML expected value.  Mirrors the spec's documented
+    a fixture YAML expected value.  Mirrors the documented
     Message shape: ``{role, content}`` with optional extras."""
     dumped = message.model_dump(exclude_none=True)
     # Normalize content-blocks shape: drop pydantic internal
diff --git a/tests/unit/test_correlation.py b/tests/unit/test_correlation.py
index 2e7d7e0..f690c1d 100644
--- a/tests/unit/test_correlation.py
+++ b/tests/unit/test_correlation.py
@@ -34,8 +34,8 @@ async def _read_correlation(state: _S) -> dict[str, str]:
 
 async def test_caller_supplied_correlation_id_visible_inside_node() -> None:
     """User code in a node body can read the supplied correlation_id
-    via :func:`current_correlation_id` — the spec's mandated
-    cross-backend join key surface."""
+    via :func:`current_correlation_id`, the cross-backend join key
+    surface."""
     g = GraphBuilder(_S).add_node("read", _read_correlation).add_edge("read", END).set_entry("read").compile()
     final = await g.invoke(_S(), correlation_id="my-business-request-42")
     assert final.captured == "my-business-request-42"
diff --git a/tests/unit/test_fan_out.py b/tests/unit/test_fan_out.py
index 18cc13d..7307efe 100644
--- a/tests/unit/test_fan_out.py
+++ b/tests/unit/test_fan_out.py
@@ -1,6 +1,6 @@
 """Unit tests for the fan-out runtime.
 
-Covers the spec-corner cases the conformance fixtures exercise only
+Covers the edge cases the conformance fixtures exercise only
 implicitly:
 
 - items_field projection
diff --git a/tests/unit/test_parallel_branches.py b/tests/unit/test_parallel_branches.py
index 507188e..3637563 100644
--- a/tests/unit/test_parallel_branches.py
+++ b/tests/unit/test_parallel_branches.py
@@ -1,6 +1,6 @@
 """Unit tests for the parallel-branches runtime.
 
-Covers spec corner cases the conformance fixtures exercise only
+Covers edge cases the conformance fixtures exercise only
 implicitly:
 
 - compile-time empty-branches rejection
diff --git a/tests/unit/test_projection.py b/tests/unit/test_projection.py
index bf81e32..f83d074 100644
--- a/tests/unit/test_projection.py
+++ b/tests/unit/test_projection.py
@@ -106,7 +106,7 @@ def test_explicit_mapping_outputs_projects_only_named_pairs() -> None:
 
 
 def test_explicit_mapping_outputs_absent_falls_back_to_field_name_matching() -> None:
-    """`outputs=None` (absent) falls back to spec default field-name matching;
+    """`outputs=None` (absent) falls back to the default field-name matching;
     `outputs={}` (present, empty) projects nothing."""
 
     sub_final = ChildEM(input=1, result=2, note="from-child")
diff --git a/tests/unit/test_runtime_errors.py b/tests/unit/test_runtime_errors.py
index d1d6810..94f00ef 100644
--- a/tests/unit/test_runtime_errors.py
+++ b/tests/unit/test_runtime_errors.py
@@ -1,6 +1,6 @@
 """Runtime-error categories not exercised by the conformance suite.
 
-The spec defines five runtime categories. The conformance fixtures cover
+There are five runtime-error categories. The conformance fixtures cover
 `node_exception` (009) and `routing_error` (008) directly and reach the
 others incidentally via 001–006. These tests target the three categories no
 fixture triggers: `edge_exception`, `reducer_error`, and