Skip to content

Commit 6fd75aa

Browse files
Bump spec conformance pin to v0.84.0
Advance the spec submodule to v0.84.0 and defer every conformance fixture the jump pulls in for proposals not yet implemented (0062, 0077-0089), at both the parser and run layers, so the suite is green at the new baseline. Mechanical: no proposal implemented, no src behavior changed. - spec submodule + the three pin constants (pyproject, __init__, test_smoke) move 0.70.1 -> 0.84.0. - conformance.toml: spec_pin -> v0.84.0 + 14 not-yet proposal rows. - The deferral dicts in the parser-level harness and the four per-capability runners gain the new fixtures, each reason naming its proposal; AGENTS.md regenerated. - 008's detached fan-out-instance error-status case (proposal 0061, implemented) is deferred pending a harness-wiring fix tracked separately; the mechanism stays verified by the subgraph case.
1 parent e10fc07 commit 6fd75aa

11 files changed

Lines changed: 338 additions & 19 deletions

conformance.toml

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232

3333
[manifest]
3434
implementation = "openarmature-python"
35-
spec_pin = "v0.70.1"
35+
spec_pin = "v0.84.0"
3636

3737
# Status values:
3838
# implemented — shipped behavior matches the proposal's contract
@@ -764,3 +764,96 @@ note = "ParallelBranchesNode gains two additive branch forms. (1) Inline-callabl
764764
status = "implemented"
765765
since = "0.15.0"
766766
note = "The model's output tool calls get an output-side home on the openarmature.llm.complete span. observability §5.5.10 adds the UNGATED identity projections openarmature.llm.output.tool_calls.count / .names / .ids (the class of openarmature.llm.model / attempt_index; emitted only on a tool-calling completion, omitted entirely otherwise -- not count=0); .names and .ids are index-aligned in request order, .count equals their length. §5.5.1 adds the GATED openarmature.llm.output.tool_calls, the full [{id, name, arguments}] serialization (reusing the §5.5.5 input tool-call encoding) carrying the arguments, suppressed under disable_provider_payload and subject to the truncation contract. graph-engine §6: LlmCompletionEvent gains an output_tool_calls field (the ToolCall records, populated unconditionally). python carries the field on BOTH the terminal LlmCompletionEvent (spec-conformance + the Langfuse/consumer path) and the python-internal per-attempt LlmRetryAttemptEvent, and the OTel observer renders the span attributes from the per-attempt event (the LLM-span source since 0050) -- mirroring how output_content already works. OA-namespace, no gen_ai.* mirror (the attempt_index precedent). Langfuse request-side mapping is OUT OF SCOPE (proposal defers it as future work); no Langfuse change. Fixtures 085 (two calls -> count/names/ids), 086 (no calls -> family absent), 087 (payload-gating: identity survives off, gated full present only on)."
767+
768+
# Spec v0.71.0 (proposal 0062). LLM completion streaming -- an opt-in
769+
# stream flag on complete() emitting a per-chunk LlmTokenEvent plus the
770+
# §6 streaming-assembly reassembly into the atomic Response. Python has
771+
# not yet shipped streaming; v0.16.0 leaves it not-yet. llm-provider
772+
# fixtures 059/060 and observability fixtures 111-118 defer with it.
773+
[proposals."0062"]
774+
status = "not-yet"
775+
776+
# Spec v0.72.0 (proposal 0077). Retrieval-provider TEI wire mapping
777+
# (§8 / §8.1) plus the input_type embedding knob (§2 / §3). Python has
778+
# not yet shipped the retrieval wire mappings; not-yet. retrieval-
779+
# provider fixtures 013-017 defer with it.
780+
[proposals."0077"]
781+
status = "not-yet"
782+
783+
# Spec v0.73.0 (proposal 0078). Retrieval-provider Jina hosted wire
784+
# mapping (§8.2). Not-yet; retrieval-provider fixtures 018-022 defer
785+
# with it.
786+
[proposals."0078"]
787+
status = "not-yet"
788+
789+
# Spec v0.74.0 (proposal 0079). Retrieval-provider OpenAI-compatible
790+
# embeddings wire mapping (§8.3). Not-yet; retrieval-provider fixtures
791+
# 023-027 defer with it.
792+
[proposals."0079"]
793+
status = "not-yet"
794+
795+
# Spec v0.75.0 (proposal 0080). PromptGroup arity enforcement
796+
# (prompt-management §10 / §11 -- construct-time raise plus the new
797+
# prompt_group_invalid error category). Not-yet; prompt-management
798+
# fixture 035 defers with it.
799+
[proposals."0080"]
800+
status = "not-yet"
801+
802+
# Spec v0.76.0 (proposal 0081). Conformance-adapter value-matcher
803+
# vocabulary (§5.10) -- ratifies the fixture matcher tokens already in
804+
# use. Not-yet (no fixture changes); awaits a conformance-pin batch.
805+
[proposals."0081"]
806+
status = "not-yet"
807+
808+
# Spec v0.77.0 (proposal 0082). Structured-output failure diagnostics
809+
# (graph-engine §6 -- LlmFailedEvent response-side surface for
810+
# structured_output_invalid + llm-provider §7 finish_reason / usage).
811+
# Not-yet; llm-provider fixtures 022/023 and observability fixtures
812+
# 120-125 defer with it.
813+
[proposals."0082"]
814+
status = "not-yet"
815+
816+
# Spec v0.78.0 (proposal 0083). Per-prompt token-budget observability
817+
# (prompt-management §3 / graph-engine §6 / observability §5.5.15).
818+
# Not-yet; observability fixtures 126-131 defer with it.
819+
[proposals."0083"]
820+
status = "not-yet"
821+
822+
# Spec v0.81.0 (proposal 0084). Nested-fan-out span lineage chain
823+
# (graph-engine §6 fan_out_index_chain / branch_name_chain +
824+
# observability §4 / §5.5 lineage-resolved parent). Not-yet; graph-
825+
# engine fixture 039 and observability fixtures 132-134 (+ a new case
826+
# on 008) defer with it.
827+
[proposals."0084"]
828+
status = "not-yet"
829+
830+
# Spec v0.80.0 (proposal 0085). Nested-fan-out checkpoint resume
831+
# lineage (pipeline-utilities §10.11 enclosing_fan_out_lineage). Not-
832+
# yet; pipeline-utilities fixture 076 defers with it.
833+
[proposals."0085"]
834+
status = "not-yet"
835+
836+
# Spec v0.79.0 (proposal 0086). Service-wide default cache_ttl_seconds
837+
# on PromptManager (prompt-management §6). Not-yet; prompt-management
838+
# fixture 036 defers with it.
839+
[proposals."0086"]
840+
status = "not-yet"
841+
842+
# Spec v0.82.0 (proposal 0087). Conformance-adapter within-node
843+
# directive execution order (§8.3). Not-yet; observability fixture 135
844+
# defers with it.
845+
[proposals."0087"]
846+
status = "not-yet"
847+
848+
# Spec v0.83.0 (proposal 0088). Langfuse parallel-branches mapping
849+
# parity (observability §8.4.8). Not-yet; observability fixture 136
850+
# defers with it.
851+
[proposals."0088"]
852+
status = "not-yet"
853+
854+
# Spec v0.84.0 (proposal 0089). Embedding / rerank typed-event output
855+
# (graph-engine §6 EmbeddingEvent.output_vectors / RerankEvent.
856+
# output_results + observability output mappings). Not-yet;
857+
# observability fixtures 137/138 defer with it (and re-source 083/108).
858+
[proposals."0089"]
859+
status = "not-yet"

openarmature-spec

Submodule openarmature-spec updated 160 files

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec"
6363
openarmature = "openarmature.cli:main"
6464

6565
[tool.openarmature]
66-
spec_version = "0.70.1"
66+
spec_version = "0.84.0"
6767

6868
[dependency-groups]
6969
dev = [

src/openarmature/AGENTS.md

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# OpenArmature — Agent documentation
22

3-
*This is the agent guide bundled with the openarmature Python package, version 0.15.0 (spec v0.70.1). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
3+
*This is the agent guide bundled with the openarmature Python package, version 0.15.0 (spec v0.84.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
44

55
## TL;DR
66

@@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents:
1010

1111
## Capability contracts
1212

13-
_Sourced from openarmature-spec v0.70.1. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md` verbatim — including additions from accepted proposals that this Python implementation may not yet ship. For per-proposal implementation status (implemented / partial / textual-only / not-yet), see the `conformance.toml` manifest at the repo root. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
13+
_Sourced from openarmature-spec v0.84.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md` verbatim — including additions from accepted proposals that this Python implementation may not yet ship. For per-proposal implementation status (implemented / partial / textual-only / not-yet), see the `conformance.toml` manifest at the repo root. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
1414

1515
### Capability: `graph-engine`
1616

@@ -221,7 +221,7 @@ and is invisible to nodes that don't opt into middleware.
221221

222222
**Middleware.** An async callable with the shape:
223223

224-
```
224+
```python
225225
async def middleware(state, next) -> partial_update
226226
```
227227

@@ -262,13 +262,14 @@ second, and so on, with the original node at the inner end.
262262

263263
For a chain `[m1, m2, m3]` wrapping node `n`, execution proceeds:
264264

265-
```
266-
m1 sees state, calls next(s) ────► m2 sees state, calls next(s) ────► m3 sees state, calls next(s)
267-
268-
269-
n(state) → partial_update
270-
271-
m1 returns partial_update ◄──── m2 returns partial_update ◄──── m3 returns partial_update
265+
```mermaid
266+
flowchart LR
267+
m1["m1 sees state,<br/>calls next(s)"] --> m2["m2 sees state,<br/>calls next(s)"]
268+
m2 --> m3["m3 sees state,<br/>calls next(s)"]
269+
m3 --> n["n(state) produces<br/>partial_update"]
270+
n --> r3["m3 returns<br/>partial_update"]
271+
r3 --> r2["m2 returns<br/>partial_update"]
272+
r2 --> r1["m1 returns<br/>partial_update"]
272273
```
273274

274275
Each middleware's return value flows back through the previous layer's `next` call return.
@@ -283,7 +284,7 @@ The two phases are tied to a single position in the chain: if `m1` is outermost,
283284
runs first AND `m1`'s post-phase runs last. Pre-order and post-order are not configured
284285
independently. Concretely, a middleware function carries both phases:
285286

286-
```
287+
```python
287288
async def my_middleware(state, next):
288289
# ── pre-node phase: runs on the way IN ──
289290
started_at = time.time()

src/openarmature/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
"""
2626

2727
__version__ = "0.15.0"
28-
__spec_version__ = "0.70.1"
28+
__spec_version__ = "0.84.0"
2929
# Proposal 0052 (spec observability §5.1 / §8.4.1): canonical
3030
# package-registry name for this implementation. Surfaces on every
3131
# OTel invocation span as ``openarmature.implementation.name`` and on

tests/conformance/test_fixture_parsing.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,78 @@ def _id(case: tuple[str, Path]) -> str:
557557
"observability/110-otel-callable-branch-span": (
558558
"Cross-capability parser doesn't model final_state + span_tree together; runs in test_observability"
559559
),
560+
# ----- v0.16.0 spec-pin bump (v0.70.1 -> v0.84.0) -----------------------
561+
# New fixtures whose directive shapes the cross-capability parser doesn't
562+
# model, for proposals deferred to their own later v0.16.0 PRs. Each runs
563+
# (or stays accounted) in its capability runner once that proposal lands.
564+
# Proposal 0062 (LLM completion streaming, v0.71.0) -- the stream-flag
565+
# llm-provider wire fixtures + the per-chunk LlmTokenEvent observability
566+
# fixtures. (117 parses cleanly and is accounted in test_observability.)
567+
"llm-provider/059-openai-streaming-wire": "Proposal 0062 streaming; not implemented",
568+
"llm-provider/060-stream-unsupported-mapping-rejects": "Proposal 0062 streaming; not implemented",
569+
"observability/111-llm-token-event-dispatch-on-stream": "Proposal 0062 streaming; not implemented",
570+
"observability/112-llm-token-event-absent-without-stream": "Proposal 0062 streaming; not implemented",
571+
"observability/113-streamed-tool-call-reassembles-no-token-events": (
572+
"Proposal 0062 streaming; not implemented"
573+
),
574+
"observability/114-llm-token-event-then-failure-mid-stream": "Proposal 0062 streaming; not implemented",
575+
"observability/115-llm-token-event-call-id-links-to-completion": (
576+
"Proposal 0062 streaming; not implemented"
577+
),
578+
"observability/116-llm-token-event-call-level-retry-one-call-id": (
579+
"Proposal 0062 streaming; not implemented"
580+
),
581+
"observability/118-llm-token-event-reasoning-delta-kind": "Proposal 0062 streaming; not implemented",
582+
# Proposal 0075 (callable branches) coverage round-out fixture 119
583+
# (v0.73.1); the cross-capability parser doesn't model its graph-style
584+
# shape (cf. 110). Accounted in test_observability.
585+
"observability/119-otel-callable-branch-attempt-index-under-node-retry": (
586+
"Proposal 0075 callable-branch coverage round-out; harness shape not modelled"
587+
),
588+
# Proposal 0082 (structured-output failure diagnostics, v0.77.0) -- the
589+
# LlmFailedEvent response-side directive shape.
590+
"observability/120-llm-failure-event-structured-output-truncation": (
591+
"Proposal 0082 structured-output failure diagnostics; not implemented"
592+
),
593+
"observability/121-llm-failure-event-structured-output-schema-mismatch": (
594+
"Proposal 0082 structured-output failure diagnostics; not implemented"
595+
),
596+
"observability/122-llm-failure-event-response-side-null-on-non-body-failure": (
597+
"Proposal 0082 structured-output failure diagnostics; not implemented"
598+
),
599+
# Proposal 0083 (per-prompt token-budget observability, v0.78.0) -- the
600+
# token_budget directive shape + budget-exceeded expectations.
601+
"observability/126-token-budget-input-exceeded": "Proposal 0083 token-budget; not implemented",
602+
"observability/127-token-budget-total-exceeded": "Proposal 0083 token-budget; not implemented",
603+
"observability/128-token-budget-under-budget-no-warning": "Proposal 0083 token-budget; not implemented",
604+
"observability/129-token-budget-absent-unchanged": "Proposal 0083 token-budget; not implemented",
605+
"observability/130-langfuse-token-budget-warning-level": "Proposal 0083 token-budget; not implemented",
606+
"observability/131-token-budget-on-structured-output-failure": (
607+
"Proposal 0083 token-budget; not implemented"
608+
),
609+
# Proposal 0084 (nested-fan-out span lineage, v0.81.0) -- the
610+
# lineage-chain directive shapes. (132 parses cleanly; accounted in
611+
# test_observability.)
612+
"observability/133-otel-nested-fan-out-orphan-llm-fallback": (
613+
"Proposal 0084 nested-fan-out span lineage; not implemented"
614+
),
615+
"observability/134-langfuse-nested-fan-out-parent-resolution": (
616+
"Proposal 0084 nested-fan-out span lineage; not implemented"
617+
),
618+
# Proposal 0087 (within-node directive execution order, v0.82.0).
619+
"observability/135-within-node-directive-execution-order": (
620+
"Proposal 0087 within-node directive execution order; not implemented"
621+
),
622+
# Proposal 0089 (embedding / rerank typed-event output, v0.84.0) -- the
623+
# rerank failure observation directive shape (rerank capability unshipped).
624+
"observability/138-langfuse-rerank-failure-observation": (
625+
"Proposal 0089 rerank failure observation; rerank capability not implemented"
626+
),
627+
# Proposal 0086 (PromptManager default cache_ttl_seconds, v0.79.0) -- the
628+
# manager default-cache-ttl directive shape.
629+
"prompt-management/036-prompt-manager-default-cache-ttl": (
630+
"Proposal 0086 default cache_ttl_seconds; not implemented"
631+
),
560632
}
561633

562634

tests/conformance/test_llm_provider.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,23 @@
112112
"056-call-level-retry-transient": ("per-attempt LLM spans; see test_observability_otel.py"),
113113
"057-call-level-retry-exhaustion": ("per-attempt LLM spans; see test_observability_otel.py"),
114114
"058-call-level-retry-non-transient-no-retry": ("per-attempt LLM spans; see test_observability_otel.py"),
115+
# ----- v0.16.0 spec-pin bump (v0.70.1 -> v0.84.0) -------------------
116+
# Proposal 0082 (structured-output failure diagnostics, spec v0.77.0)
117+
# extended fixtures 022/023 to additionally assert the now-required
118+
# finish_reason + usage on the structured_output_invalid error; python
119+
# does not yet carry those response-side fields on the error, so the
120+
# extended fixtures defer until a later v0.16.0 PR lands 0082.
121+
"022-structured-output-parse-failure": (
122+
"Proposal 0082 finish_reason/usage on structured_output_invalid; not implemented"
123+
),
124+
"023-structured-output-validation-failure": (
125+
"Proposal 0082 finish_reason/usage on structured_output_invalid; not implemented"
126+
),
127+
# Proposal 0062 (LLM completion streaming, spec v0.71.0) -- the stream
128+
# flag on complete() + SSE wire handling + the streaming-unsupported
129+
# rejection. Unimplemented until a later v0.16.0 PR.
130+
"059-openai-streaming-wire": "Proposal 0062 streaming; not implemented",
131+
"060-stream-unsupported-mapping-rejects": "Proposal 0062 streaming; not implemented",
115132
}
116133

117134

0 commit comments

Comments
 (0)