Extend LlmCompletionEvent with proposal 0057 fields (#141)

chris-colinsky · web-flow · commit cf76c536cba1 · 2026-06-07T19:12:32.000-07:00
Bump spec pin to v0.51.0 and extend the typed LLM completion event
with request-side fields per accepted proposal 0057: input_messages,
output_content, request_params, request_extras, active_prompt,
active_prompt_group, call_id, and response_model. Rename request_id
to response_id to align with the proposal's response-side naming.

OpenAI provider now populates the new fields at emission. Image
bytes in input_messages stay redacted via the existing serializer.

Observers (OTel, Langfuse) and fixtures 060-068 land in follow-up
PRs in this cycle; conformance.toml marks 0057 implemented since
the typed event contract is satisfied.
diff --git a/conformance.toml b/conformance.toml
@@ -32,7 +32,7 @@
 
 [manifest]
 implementation = "openarmature-python"
-spec_pin = "v0.46.0"
+spec_pin = "v0.51.0"
 
 # Status values:
 #   implemented   — shipped behavior matches the proposal's contract
@@ -202,6 +202,21 @@ status = "not-yet"
 [proposals."0020"]
 status = "not-yet"
 
+# Spec (proposal 0021).  Suspension capability — async-pause +
+# resume primitive (``suspend()`` + ``resume()``) layering on the
+# graph engine.  Python has not yet shipped suspension; v0.13.0
+# leaves the capability not-yet-implemented.
+[proposals."0021"]
+status = "not-yet"
+
+# Spec v0.49.0 (proposal 0022).  Harness capability — abstract
+# contract for wrapping the engine in deployment runtimes
+# (HTTP / event-bus / queue / CLI / streaming).  Python has not
+# yet shipped a harness binding; v0.13.0 leaves the capability
+# not-yet-implemented.  Composes with 0056 (chat sub-spec).
+[proposals."0022"]
+status = "not-yet"
+
 [proposals."0042"]
 status = "implemented"
 since = "0.11.0"
@@ -462,3 +477,35 @@ since = "0.12.0"
 [proposals."0054"]
 status = "implemented"
 since = "0.12.0"
+
+# Spec v0.47.0 (proposal 0055).  Conformance-adapter capability —
+# descriptive ratification of the existing fixture / directive
+# system. No code change; python's adapter is already structured
+# per the spec text by virtue of having grown alongside the
+# fixtures since proposal 0001.  Matches the Textual impl-tracking
+# precedent (0019 / 0026 / 0030 / 0051 / 0053).
+[proposals."0055"]
+status = "textual-only"
+since = "0.13.0"
+
+# Spec v0.48.0 (proposal 0056).  Harness-chat capability — new
+# harness sub-spec ratifying the chat-loop deployment shape
+# (ChatMessage, conversation-history convention, send() callable,
+# send_streaming() forward-looking surface, error-bucket → user-
+# facing-reply mapping).  Python does not yet ship a chat-harness
+# binding; v0.13.0 leaves the capability not-yet-implemented.
+[proposals."0056"]
+status = "not-yet"
+
+# Spec v0.51.0 (proposal 0057).  LlmCompletionEvent field-set
+# extension — eight additive request-side fields on the typed
+# event variant + ``request_id`` → ``response_id`` rename + new
+# ``response_model`` field.  Python lands the field-set extension
+# + rename + provider population in v0.13.0 PR 3a; OTel + Langfuse
+# observers continue driving their §5.5 surface off the sentinel
+# NodeEvent pair through this PR (observer migration to type
+# discrimination is queued for follow-up PRs 3b / 3c against the
+# same v0.13.0 release).
+[proposals."0057"]
+status = "implemented"
+since = "0.13.0"
diff --git a/openarmature-spec b/openarmature-spec
@@ -1 +1 @@
-Subproject commit 0264dc21d49b26aa96f33c5fd55f935b634e5f44
+Subproject commit b2045e1beed234ef6620943e13b2c5caecb66e6e
diff --git a/pyproject.toml b/pyproject.toml
@@ -63,7 +63,7 @@ Specification = "https://github.com/LunarCommand/openarmature-spec"
 openarmature = "openarmature.cli:main"
 
 [tool.openarmature]
-spec_version = "0.46.0"
+spec_version = "0.51.0"
 
 [dependency-groups]
 dev = [
diff --git a/src/openarmature/AGENTS.md b/src/openarmature/AGENTS.md
@@ -1,6 +1,6 @@
 # OpenArmature — Agent documentation
 
-*This is the agent guide bundled with the openarmature Python package, version 0.12.0 (spec v0.46.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
+*This is the agent guide bundled with the openarmature Python package, version 0.12.0 (spec v0.51.0). For the full docs site see [openarmature.ai](https://openarmature.ai). For the canonical spec text see [openarmature.org/capabilities](https://openarmature.org/capabilities/). For project-specific conventions for the code you're editing, see the host project's `AGENTS.md` or `CLAUDE.md`.*
 
 ## TL;DR
 
@@ -10,7 +10,7 @@ OpenArmature is a workflow framework for LLM pipelines and tool-calling agents:
 
 ## Capability contracts
 
-_Sourced from openarmature-spec v0.46.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
+_Sourced from openarmature-spec v0.51.0. Each entry below reproduces §1 (Purpose) and §2 (Concepts) of the capability's `spec.md`. For the full spec text (execution model, error semantics, determinism, observer hooks, etc.) see the linked docs site._
 
 ### Capability: `graph-engine`
 
diff --git a/src/openarmature/__init__.py b/src/openarmature/__init__.py
@@ -25,7 +25,7 @@
 """
 
 __version__ = "0.12.0"
-__spec_version__ = "0.46.0"
+__spec_version__ = "0.51.0"
 # Proposal 0052 (spec observability §5.1 / §8.4.1): canonical
 # package-registry name for this implementation. Surfaces on every
 # OTel invocation span as ``openarmature.implementation.name`` and on
diff --git a/src/openarmature/graph/events.py b/src/openarmature/graph/events.py
@@ -450,6 +450,22 @@ class InvocationCompletedEvent:
 #
 # Field naming matches the spec-canonical names verbatim per the spec
 # Q5 ack — Python snake_case happens to match the spec table 1:1.
+#
+# Spec proposal 0057 (v0.51.0) extension: adds 8 additive request-side
+# fields (input_messages, output_content, request_params,
+# request_extras, active_prompt, active_prompt_group, call_id,
+# response_model) and renames request_id → response_id to match the
+# response-side data the field carries. Inline image bytes in
+# input_messages MUST be redacted per observability §5.5.5 before
+# population — the provider reuses _serialize_messages_for_payload
+# which already enforces the redaction. The three payload-bearing
+# fields (input_messages, output_content, request_extras) are
+# populated unconditionally on the typed event per §5.5.7; observer-
+# side privacy gates (OTel disable_llm_payload, Langfuse equivalents)
+# apply at rendering, symmetric with the §5.5.1 span attribute path.
+# Custom queryable observers (per observability §9) own their own
+# redaction posture — gating belongs at rendering with the consumer's
+# awareness.
 @dataclass(frozen=True)
 class LlmCompletionEvent:
     """A typed LLM provider call event delivered to observers.
@@ -473,17 +489,55 @@ class LlmCompletionEvent:
     - ``branch_name``: parallel-branches branch name when the
       calling node ran inside a branch; ``None`` otherwise.
     - ``provider``: provider identifier; matches ``gen_ai.system``.
-    - ``model``: the model identifier the call targeted.
-    - ``request_id``: provider-returned response id; ``None`` when
+    - ``model``: the model identifier the call targeted (the
+      request-side bound model; distinct from ``response_model``).
+    - ``response_id``: provider-returned response id; ``None`` when
+      the provider didn't return one.
+    - ``response_model``: provider-returned model identifier;
+      distinct from ``model`` (the provider may return a more
+      specific identifier than the one requested). ``None`` when
       the provider didn't return one.
-    - ``usage``: token-accounting record per ``Response.usage``
-      shape. Reuses the existing ``openarmature.llm.response.Usage``
-      class. ``None`` when the call returned no usage at all.
+    - ``usage``: token-accounting record reusing the existing
+      ``openarmature.llm.response.Usage`` class. ``None`` when the
+      call returned no usage at all.
     - ``latency_ms``: wall-clock latency measured at the adapter
       boundary, in milliseconds. ``None`` when latency was not
       measured.
     - ``finish_reason``: the call's finish reason; ``None`` when
       the call did not complete normally.
+    - ``input_messages``: the message list the call was made with,
+      serialized to the plain-dict shape. Non-nullable; empty list
+      when the call had no history. Inline image bytes are
+      redacted before population (see the comment block above for
+      the redaction contract).
+    - ``output_content``: the assistant message's content string
+      from the response. ``None`` on tool-call-only responses
+      (the structured-response and tool-call paths are mutually
+      exclusive at the response level).
+    - ``request_params``: the GenAI request-parameter set the
+      caller supplied. Absence-is-meaningful: only caller-supplied
+      keys appear; empty mapping when none supplied. Keys are the
+      cross-vendor parameter names without the ``gen_ai.request.``
+      prefix (e.g. ``temperature``, ``max_tokens``).
+    - ``request_extras``: the ``RuntimeConfig`` extras pass-
+      through bag in native mapping form (not JSON-encoded).
+      Empty mapping when no extras supplied.
+    - ``active_prompt``: 5-field identity snapshot of the active
+      ``PromptResult`` at LLM-call time (``name`` / ``version`` /
+      ``label`` / ``template_hash`` / ``rendered_hash``).
+      ``None`` when the call ran outside any prompt-context
+      binding. Typed as ``Any`` because the prompts package
+      imports State indirectly; observer-side narrowing reads
+      the attribute names directly.
+    - ``active_prompt_group``: ``{group_name}`` snapshot when the
+      call ran inside a ``PromptGroup`` context; ``None``
+      otherwise. Same ``Any`` typing rationale as
+      ``active_prompt``.
+    - ``call_id``: per-call disambiguator minted by the
+      implementation. Always present, freshly minted per
+      ``provider.complete()`` call, stable for the call's
+      lifetime, unique within the run. Distinct from
+      ``response_id``.
     - ``caller_invocation_metadata``: optional snapshot of caller-
       supplied invocation metadata at LLM-call time. Populated
       only when the provider's opt-in flag is set (per-language
@@ -499,13 +553,26 @@ class LlmCompletionEvent:
     branch_name: str | None
     provider: str
     model: str
-    request_id: str | None
+    response_id: str | None
+    response_model: str | None
     # Usage is a string-typed forward reference per the TYPE_CHECKING
     # import above — keeps the runtime import direction graph → llm
     # off the module-load path while preserving pyright resolution.
     usage: "Usage | None"
     latency_ms: float | None
     finish_reason: str | None
+    # Proposal 0057 (spec v0.51.0) additive request-side fields.
+    # Non-nullable for input_messages / request_params /
+    # request_extras — absence is represented as empty list / empty
+    # mapping, not None. output_content stays nullable for tool-
+    # call-only assistant messages.
+    input_messages: list[dict[str, Any]]
+    output_content: str | None
+    request_params: Mapping[str, Any]
+    request_extras: Mapping[str, Any]
+    active_prompt: Any
+    active_prompt_group: Any
+    call_id: str
     caller_invocation_metadata: Mapping[str, AttributeValue] | None = None
 
 
diff --git a/src/openarmature/llm/providers/openai.py b/src/openarmature/llm/providers/openai.py
@@ -525,20 +525,46 @@ async def complete(
             # observers filtering on the sentinel namespace see the
             # NodeEvent pair above. Failure path doesn't reach here.
             dispatch(
-                self._build_llm_completion_event(response, latency_ms),
+                self._build_llm_completion_event(
+                    response,
+                    latency_ms,
+                    call_id=call_id,
+                    input_messages=serialized_messages,
+                    request_params=request_params,
+                    request_extras=request_extras,
+                    active_prompt=active_prompt,
+                    active_prompt_group=active_prompt_group,
+                ),
             )
         return response
 
-    def _build_llm_completion_event(self, response: Response, latency_ms: float) -> LlmCompletionEvent:
+    def _build_llm_completion_event(
+        self,
+        response: Response,
+        latency_ms: float,
+        *,
+        call_id: str,
+        input_messages: list[dict[str, Any]],
+        request_params: dict[str, Any],
+        request_extras: dict[str, Any],
+        active_prompt: Any,
+        active_prompt_group: Any,
+    ) -> LlmCompletionEvent:
         """Construct the typed LlmCompletionEvent for the success path.
 
         Sources identity / scoping fields from the calling-node
-        ContextVars and outcome fields from the response. The calling-
-        node namespace is the FULL namespace tuple (not the legacy
-        sentinel pseudo-namespace); node_name is the last element of
-        the namespace (the user-defined node that issued the call).
-        Outside any node body (namespace empty), node_name is the
-        empty string.
+        ContextVars and outcome fields from the response. Request-side
+        fields (per proposal 0057) are passed through from the
+        provider's complete() local state — serialized message list,
+        the gen_ai.request.* parameter mapping, the RuntimeConfig
+        extras, the prompt-context snapshots taken at dispatch time,
+        and the call-id minted at the call's start.
+
+        The calling-node namespace is the FULL namespace tuple (not
+        the legacy sentinel pseudo-namespace); node_name is the last
+        element of the namespace (the user-defined node that issued
+        the call). Outside any node body (namespace empty), node_name
+        is the empty string.
         """
 
         namespace = current_namespace_prefix()
@@ -560,6 +586,14 @@ def _build_llm_completion_event(self, response: Response, latency_ms: float) ->
             # frozen view; if a node body mutates metadata after the
             # snapshot, the event still carries the at-emission view.
             caller_metadata = dict(current_invocation_metadata())
+        # ``output_content`` is None on tool-call-only assistant
+        # messages per llm-provider §6 mutual-exclusion: the
+        # tool-call path and structured-content path are mutually
+        # exclusive at the response level, and provider.complete()
+        # leaves the AssistantMessage.content as the empty string on
+        # the tool-call path (which we project to None per the
+        # typed-event contract).
+        output_content = response.message.content or None
         return LlmCompletionEvent(
             invocation_id=invocation_id,
             correlation_id=current_correlation_id(),
@@ -570,10 +604,18 @@ def _build_llm_completion_event(self, response: Response, latency_ms: float) ->
             branch_name=current_branch_name(),
             provider=self._genai_system,
             model=self.model,
-            request_id=response.response_id,
+            response_id=response.response_id,
+            response_model=response.response_model,
             usage=response.usage,
             latency_ms=latency_ms,
             finish_reason=response.finish_reason,
+            input_messages=input_messages,
+            output_content=output_content,
+            request_params=request_params,
+            request_extras=request_extras,
+            active_prompt=active_prompt,
+            active_prompt_group=active_prompt_group,
+            call_id=call_id,
             caller_invocation_metadata=caller_metadata,
         )
 
diff --git a/tests/conformance/test_fixture_parsing.py b/tests/conformance/test_fixture_parsing.py
@@ -334,6 +334,37 @@ def _id(case: tuple[str, Path]) -> str:
     "observability/056-llm-completion-event-strict-serial-ordering": (
         "Proposal 0049 typed LLM completion event; queued for v0.13.0"
     ),
+    # Proposal 0057 (LlmCompletionEvent field-set extension, v0.51.0)
+    # — fixtures 060-068 share the same ``typed_observers`` directive
+    # shape as 050-056 and inherit the same parser-deferral status
+    # pending the harness model's typed-event-collector schema work.
+    "observability/060-llm-completion-event-input-messages-populated": (
+        "Proposal 0057 typed event request-side fields; queued for v0.13.0"
+    ),
+    "observability/061-llm-completion-event-output-content-populated": (
+        "Proposal 0057 typed event request-side fields; queued for v0.13.0"
+    ),
+    "observability/062-llm-completion-event-request-params-populated": (
+        "Proposal 0057 typed event request-side fields; queued for v0.13.0"
+    ),
+    "observability/063-llm-completion-event-request-extras-populated": (
+        "Proposal 0057 typed event request-side fields; queued for v0.13.0"
+    ),
+    "observability/064-llm-completion-event-active-prompt-populated": (
+        "Proposal 0057 typed event request-side fields; queued for v0.13.0"
+    ),
+    "observability/065-llm-completion-event-active-prompt-null": (
+        "Proposal 0057 typed event request-side fields; queued for v0.13.0"
+    ),
+    "observability/066-llm-completion-event-active-prompt-group-populated": (
+        "Proposal 0057 typed event request-side fields; queued for v0.13.0"
+    ),
+    "observability/067-llm-completion-event-call-id-always-present-and-distinct": (
+        "Proposal 0057 typed event request-side fields; queued for v0.13.0"
+    ),
+    "observability/068-llm-completion-event-response-model-distinct-from-request": (
+        "Proposal 0057 typed event request-side fields; queued for v0.13.0"
+    ),
     # Proposal 0050 (failure-isolation middleware + call-level retry,
     # v0.42.0) — llm-provider fixtures 056-058 (call-level retry) and
     # pipeline-utilities fixtures 058-063 (failure-isolation
diff --git a/tests/conformance/test_typed_event_harness.py b/tests/conformance/test_typed_event_harness.py
@@ -51,10 +51,18 @@ def _make_typed_event(**overrides: Any) -> LlmCompletionEvent:
         "branch_name": None,
         "provider": "openai",
         "model": "gpt-test",
-        "request_id": "req-1",
+        "response_id": "req-1",
+        "response_model": None,
         "usage": Usage(prompt_tokens=14, completion_tokens=4, total_tokens=18),
         "latency_ms": 42.0,
         "finish_reason": "stop",
+        "input_messages": [],
+        "output_content": None,
+        "request_params": {},
+        "request_extras": {},
+        "active_prompt": None,
+        "active_prompt_group": None,
+        "call_id": "cc-1",
         "caller_invocation_metadata": None,
     }
     base.update(overrides)
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
@@ -9,7 +9,7 @@
 
 def test_package_versions() -> None:
     assert openarmature.__version__ == "0.12.0"
-    assert openarmature.__spec_version__ == "0.46.0"
+    assert openarmature.__spec_version__ == "0.51.0"
 
 
 def test_spec_version_matches_pyproject() -> None:
diff --git a/tests/unit/test_llm_provider.py b/tests/unit/test_llm_provider.py