Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 99 additions & 1 deletion conformance.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

[manifest]
implementation = "openarmature-python"
spec_pin = "v0.70.1"
spec_pin = "v0.84.0"

# Status values:
# implemented — shipped behavior matches the proposal's contract
Expand Down Expand Up @@ -764,3 +764,101 @@ note = "ParallelBranchesNode gains two additive branch forms. (1) Inline-callabl
status = "implemented"
since = "0.15.0"
note = "The model's output tool calls get an output-side home on the openarmature.llm.complete span. observability §5.5.10 adds the UNGATED identity projections openarmature.llm.output.tool_calls.count / .names / .ids (the class of openarmature.llm.model / attempt_index; emitted only on a tool-calling completion, omitted entirely otherwise -- not count=0); .names and .ids are index-aligned in request order, .count equals their length. §5.5.1 adds the GATED openarmature.llm.output.tool_calls, the full [{id, name, arguments}] serialization (reusing the §5.5.5 input tool-call encoding) carrying the arguments, suppressed under disable_provider_payload and subject to the truncation contract. graph-engine §6: LlmCompletionEvent gains an output_tool_calls field (the ToolCall records, populated unconditionally). python carries the field on BOTH the terminal LlmCompletionEvent (spec-conformance + the Langfuse/consumer path) and the python-internal per-attempt LlmRetryAttemptEvent, and the OTel observer renders the span attributes from the per-attempt event (the LLM-span source since 0050) -- mirroring how output_content already works. OA-namespace, no gen_ai.* mirror (the attempt_index precedent). Langfuse request-side mapping is OUT OF SCOPE (proposal defers it as future work); no Langfuse change. Fixtures 085 (two calls -> count/names/ids), 086 (no calls -> family absent), 087 (payload-gating: identity survives off, gated full present only on)."

# Spec v0.71.0 (proposal 0062). LLM completion streaming -- an opt-in
# stream flag on complete() emitting a per-chunk LlmTokenEvent plus the
# §6 streaming-assembly reassembly into the atomic Response. Python has
# not yet shipped streaming; v0.16.0 leaves it not-yet. llm-provider
# fixtures 059/060 and observability fixtures 111-118 defer with it.
[proposals."0062"]
status = "not-yet"

# Spec v0.72.0 (proposal 0077). Retrieval-provider TEI wire mapping
# (§8 / §8.1) plus the input_type embedding knob (§2 / §3). Python has
# not yet shipped the retrieval wire mappings; not-yet. retrieval-
# provider fixtures 013-017 defer with it.
[proposals."0077"]
status = "not-yet"

# Spec v0.73.0 (proposal 0078). Retrieval-provider Jina hosted wire
# mapping (§8.2). Not-yet; retrieval-provider fixtures 018-022 defer
# with it.
[proposals."0078"]
status = "not-yet"

# Spec v0.74.0 (proposal 0079). Retrieval-provider OpenAI-compatible
# embeddings wire mapping (§8.3). Not-yet; retrieval-provider fixtures
# 023-027 defer with it.
[proposals."0079"]
status = "not-yet"

# Spec v0.75.0 (proposal 0080). PromptGroup arity enforcement
# (prompt-management §10 / §11 -- construct-time raise plus the new
# prompt_group_invalid error category). Not-yet; prompt-management
# fixture 035 defers with it.
[proposals."0080"]
status = "not-yet"

# Spec v0.76.0 (proposal 0081). Conformance-adapter value-matcher
# vocabulary (§5.10) -- ratifies the fixture matcher tokens already in
# use. Descriptive (no fixture changes); the one tightening with teeth
# (<any-string> = non-empty) is already enforced by the adapter
# (tests/conformance/test_observability.py _value_matches rejects the
# empty string). No module-level change required; matches the
# conformance-adapter textual-only precedent (0055 / 0071).
[proposals."0081"]
status = "textual-only"
since = "0.16.0"

# Spec v0.77.0 (proposal 0082). Structured-output failure diagnostics
# (graph-engine §6 -- LlmFailedEvent response-side surface for
# structured_output_invalid + llm-provider §7 finish_reason / usage).
# Not-yet; llm-provider fixtures 022/023 and observability fixtures
# 120-125 defer with it.
[proposals."0082"]
status = "not-yet"

# Spec v0.78.0 (proposal 0083). Per-prompt token-budget observability
# (prompt-management §3 / graph-engine §6 / observability §5.5.15).
# Not-yet; observability fixtures 126-131 defer with it.
[proposals."0083"]
status = "not-yet"

# Spec v0.81.0 (proposal 0084). Nested-fan-out span lineage chain
# (graph-engine §6 fan_out_index_chain / branch_name_chain +
# observability §4 / §5.5 lineage-resolved parent). Not-yet; graph-
# engine fixture 039 and observability fixtures 132-134 defer with it.
[proposals."0084"]
status = "not-yet"

# Spec v0.80.0 (proposal 0085). Nested-fan-out checkpoint resume
# lineage (pipeline-utilities §10.11 enclosing_fan_out_lineage).
[proposals."0085"]
status = "partial"
since = "0.16.0"
note = "The SAVE-side enclosing_fan_out_lineage keying (pipeline-utilities §10.11) shipped in #194: a fan-out instance's checkpoint tracking key carries the enclosing fan-out instance lineage in the in-memory dict and through the checkpoint projection / lookup / cleanup / restore, so concurrent outer instances no longer collide. partial because the RESUME consume-side is not yet shipped: a fan-out nested inside an outer instance re-runs rather than skipping on resume, since the saved record format carries no lineage (tracked as a follow-up). pipeline-utilities fixture 076 is not collected by the test_pipeline_utilities.py _LAST_DRIVEN_FIXTURE number gate (it is not deferred); the resume consume-side plus its fixture wiring land in a later PR."

# Spec v0.79.0 (proposal 0086). Service-wide default cache_ttl_seconds
# on PromptManager (prompt-management §6). Not-yet; prompt-management
# fixture 036 defers with it.
[proposals."0086"]
status = "not-yet"

# Spec v0.82.0 (proposal 0087). Conformance-adapter within-node
# directive execution order (§8.3). Not-yet; observability fixture 135
# defers with it.
[proposals."0087"]
status = "not-yet"

# Spec v0.83.0 (proposal 0088). Langfuse parallel-branches mapping
# parity (observability §8.4.8). Not-yet; observability fixture 136
# defers with it.
[proposals."0088"]
status = "not-yet"

# Spec v0.84.0 (proposal 0089). Embedding / rerank typed-event output
# (graph-engine §6 EmbeddingEvent.output_vectors / RerankEvent.
# output_results + observability output mappings). Not-yet;
# observability fixtures 137/138 defer with it (and re-source 083/108).
[proposals."0089"]
status = "not-yet"
2 changes: 1 addition & 1 deletion openarmature-spec
Submodule openarmature-spec updated 160 files
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec"
openarmature = "openarmature.cli:main"

[tool.openarmature]
spec_version = "0.70.1"
spec_version = "0.84.0"

[dependency-groups]
dev = [
Expand Down
23 changes: 12 additions & 11 deletions src/openarmature/AGENTS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# OpenArmature — Agent documentation

*This is the agent guide bundled with the openarmature Python package, version 0.15.0 (spec v0.70.1). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
*This is the agent guide bundled with the openarmature Python package, version 0.15.0 (spec v0.84.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*

## TL;DR

Expand All @@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents:

## Capability contracts

_Sourced from openarmature-spec v0.70.1. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md` verbatim — including additions from accepted proposals that this Python implementation may not yet ship. For per-proposal implementation status (implemented / partial / textual-only / not-yet), see the `conformance.toml` manifest at the repo root. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
_Sourced from openarmature-spec v0.84.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md` verbatim — including additions from accepted proposals that this Python implementation may not yet ship. For per-proposal implementation status (implemented / partial / textual-only / not-yet), see the `conformance.toml` manifest at the repo root. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._

### Capability: `graph-engine`

Expand Down Expand Up @@ -221,7 +221,7 @@ and is invisible to nodes that don't opt into middleware.

**Middleware.** An async callable with the shape:

```
```python
async def middleware(state, next) -> partial_update
```

Expand Down Expand Up @@ -262,13 +262,14 @@ second, and so on, with the original node at the inner end.

For a chain `[m1, m2, m3]` wrapping node `n`, execution proceeds:

```
m1 sees state, calls next(s) ────► m2 sees state, calls next(s) ────► m3 sees state, calls next(s)
n(state) → partial_update
m1 returns partial_update ◄──── m2 returns partial_update ◄──── m3 returns partial_update
```mermaid
flowchart LR
m1["m1 sees state,<br/>calls next(s)"] --> m2["m2 sees state,<br/>calls next(s)"]
m2 --> m3["m3 sees state,<br/>calls next(s)"]
m3 --> n["n(state) produces<br/>partial_update"]
n --> r3["m3 returns<br/>partial_update"]
r3 --> r2["m2 returns<br/>partial_update"]
r2 --> r1["m1 returns<br/>partial_update"]
```

Each middleware's return value flows back through the previous layer's `next` call return.
Expand All @@ -283,7 +284,7 @@ The two phases are tied to a single position in the chain: if `m1` is outermost,
runs first AND `m1`'s post-phase runs last. Pre-order and post-order are not configured
independently. Concretely, a middleware function carries both phases:

```
```python
async def my_middleware(state, next):
# ── pre-node phase: runs on the way IN ──
started_at = time.time()
Expand Down
2 changes: 1 addition & 1 deletion src/openarmature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"""

__version__ = "0.15.0"
__spec_version__ = "0.70.1"
__spec_version__ = "0.84.0"
# Proposal 0052 (spec observability §5.1 / §8.4.1): canonical
# package-registry name for this implementation. Surfaces on every
# OTel invocation span as ``openarmature.implementation.name`` and on
Expand Down
72 changes: 72 additions & 0 deletions tests/conformance/test_fixture_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,78 @@ def _id(case: tuple[str, Path]) -> str:
"observability/110-otel-callable-branch-span": (
"Cross-capability parser doesn't model final_state + span_tree together; runs in test_observability"
),
# ----- v0.16.0 spec-pin bump (v0.70.1 -> v0.84.0) -----------------------
# New fixtures whose directive shapes the cross-capability parser doesn't
# model, for proposals deferred to their own later v0.16.0 PRs. Each runs
# (or stays accounted) in its capability runner once that proposal lands.
# Proposal 0062 (LLM completion streaming, v0.71.0) -- the stream-flag
# llm-provider wire fixtures + the per-chunk LlmTokenEvent observability
# fixtures. (117 parses cleanly and is accounted in test_observability.)
"llm-provider/059-openai-streaming-wire": "Proposal 0062 streaming; not implemented",
"llm-provider/060-stream-unsupported-mapping-rejects": "Proposal 0062 streaming; not implemented",
"observability/111-llm-token-event-dispatch-on-stream": "Proposal 0062 streaming; not implemented",
"observability/112-llm-token-event-absent-without-stream": "Proposal 0062 streaming; not implemented",
"observability/113-streamed-tool-call-reassembles-no-token-events": (
"Proposal 0062 streaming; not implemented"
),
"observability/114-llm-token-event-then-failure-mid-stream": "Proposal 0062 streaming; not implemented",
"observability/115-llm-token-event-call-id-links-to-completion": (
"Proposal 0062 streaming; not implemented"
),
"observability/116-llm-token-event-call-level-retry-one-call-id": (
"Proposal 0062 streaming; not implemented"
),
"observability/118-llm-token-event-reasoning-delta-kind": "Proposal 0062 streaming; not implemented",
# Proposal 0075 (callable branches) coverage round-out fixture 119
# (v0.73.1); the cross-capability parser doesn't model its graph-style
# shape (cf. 110). Accounted in test_observability.
"observability/119-otel-callable-branch-attempt-index-under-node-retry": (
"Proposal 0075 callable-branch coverage round-out; harness shape not modelled"
),
# Proposal 0082 (structured-output failure diagnostics, v0.77.0) -- the
# LlmFailedEvent response-side directive shape.
"observability/120-llm-failure-event-structured-output-truncation": (
"Proposal 0082 structured-output failure diagnostics; not implemented"
),
"observability/121-llm-failure-event-structured-output-schema-mismatch": (
"Proposal 0082 structured-output failure diagnostics; not implemented"
),
"observability/122-llm-failure-event-response-side-null-on-non-body-failure": (
"Proposal 0082 structured-output failure diagnostics; not implemented"
),
# Proposal 0083 (per-prompt token-budget observability, v0.78.0) -- the
# token_budget directive shape + budget-exceeded expectations.
"observability/126-token-budget-input-exceeded": "Proposal 0083 token-budget; not implemented",
"observability/127-token-budget-total-exceeded": "Proposal 0083 token-budget; not implemented",
"observability/128-token-budget-under-budget-no-warning": "Proposal 0083 token-budget; not implemented",
"observability/129-token-budget-absent-unchanged": "Proposal 0083 token-budget; not implemented",
"observability/130-langfuse-token-budget-warning-level": "Proposal 0083 token-budget; not implemented",
"observability/131-token-budget-on-structured-output-failure": (
"Proposal 0083 token-budget; not implemented"
),
# Proposal 0084 (nested-fan-out span lineage, v0.81.0) -- the
# lineage-chain directive shapes. (132 parses cleanly; accounted in
# test_observability.)
"observability/133-otel-nested-fan-out-orphan-llm-fallback": (
"Proposal 0084 nested-fan-out span lineage; not implemented"
),
"observability/134-langfuse-nested-fan-out-parent-resolution": (
"Proposal 0084 nested-fan-out span lineage; not implemented"
),
# Proposal 0087 (within-node directive execution order, v0.82.0).
"observability/135-within-node-directive-execution-order": (
"Proposal 0087 within-node directive execution order; not implemented"
),
# Proposal 0089 (embedding / rerank typed-event output, v0.84.0) -- the
# rerank failure observation directive shape (rerank capability unshipped).
"observability/138-langfuse-rerank-failure-observation": (
"Proposal 0089 rerank failure observation; rerank capability not implemented"
),
# Proposal 0086 (PromptManager default cache_ttl_seconds, v0.79.0) -- the
# manager default-cache-ttl directive shape.
"prompt-management/036-prompt-manager-default-cache-ttl": (
"Proposal 0086 default cache_ttl_seconds; not implemented"
),
}


Expand Down
21 changes: 21 additions & 0 deletions tests/conformance/test_llm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,27 @@
"056-call-level-retry-transient": ("per-attempt LLM spans; see test_observability_otel.py"),
"057-call-level-retry-exhaustion": ("per-attempt LLM spans; see test_observability_otel.py"),
"058-call-level-retry-non-transient-no-retry": ("per-attempt LLM spans; see test_observability_otel.py"),
# ----- v0.16.0 spec-pin bump (v0.70.1 -> v0.84.0) -------------------
# Proposal 0082 (structured-output failure diagnostics, spec v0.77.0)
# extended fixtures 022/023 to additionally assert the now-required
# finish_reason + usage on the structured_output_invalid error. The
# base structured_output_invalid mapping still has coverage in
# tests/unit/test_structured_output.py::
# test_pydantic_validation_failure_wraps_in_structured_output_invalid;
# only 0082's additive finish_reason / usage-on-error response-side
# fields are unimplemented, which is why the conformance fixtures defer
# until a later v0.16.0 PR lands 0082.
"022-structured-output-parse-failure": (
"Proposal 0082 finish_reason/usage on structured_output_invalid; not implemented"
),
"023-structured-output-validation-failure": (
"Proposal 0082 finish_reason/usage on structured_output_invalid; not implemented"
),
# Proposal 0062 (LLM completion streaming, spec v0.71.0) -- the stream
# flag on complete() + SSE wire handling + the streaming-unsupported
# rejection. Unimplemented until a later v0.16.0 PR.
"059-openai-streaming-wire": "Proposal 0062 streaming; not implemented",
"060-stream-unsupported-mapping-rejects": "Proposal 0062 streaming; not implemented",
}


Expand Down
Loading