LunarCommand
diff --git a/‎src/openarmature/graph/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/openarmature/graph/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/openarmature/graph/events.py‎
Lines changed: 81 additions & 1 deletion b/‎src/openarmature/graph/events.py‎
Lines changed: 81 additions & 1 deletion
diff --git a/‎src/openarmature/graph/observer.py‎
Lines changed: 16 additions & 5 deletions b/‎src/openarmature/graph/observer.py‎
Lines changed: 16 additions & 5 deletions
diff --git a/‎src/openarmature/llm/providers/openai.py‎
Lines changed: 96 additions & 3 deletions b/‎src/openarmature/llm/providers/openai.py‎
Lines changed: 96 additions & 3 deletions
@@ -38,6 +38,7 @@
 from .events import (
     InvocationCompletedEvent,
     InvocationStartedEvent,
+    LlmCompletionEvent,
     MetadataAugmentationEvent,
     NodeEvent,
 )
@@ -84,6 +85,7 @@
     "GraphError",
     "InvocationCompletedEvent",
     "InvocationStartedEvent",
+    "LlmCompletionEvent",
     "MappingReferencesUndeclaredField",
     "MetadataAugmentationEvent",
     "Middleware",
 
@@ -16,13 +16,21 @@
 from collections.abc import Mapping
 from dataclasses import dataclass, field
 from types import MappingProxyType
-from typing import Any, Literal
+from typing import TYPE_CHECKING, Any, Literal
 
 from openarmature.observability.metadata import AttributeValue
 
 from .errors import RuntimeGraphError
 from .state import State
 
+# TYPE_CHECKING import — the runtime Usage class lives in the llm
+# package, which transitively imports from graph.events (the
+# OpenAI provider imports NodeEvent). Using a TYPE_CHECKING import
+# plus a string annotation on LlmCompletionEvent.usage avoids the
+# circular runtime import while keeping pyright type-safe.
+if TYPE_CHECKING:
+    from openarmature.llm.response import Usage
+
 # Sentinel empty metadata mapping for events constructed without a
 # live caller-metadata snapshot (test helpers, synthetic events).
 # Read-only proxy keeps the default allocation-free.
@@ -430,10 +438,82 @@ class InvocationCompletedEvent:
     correlation_id: str | None
 
 
+# Spec: realizes proposal 0049's first spec-normatively-typed event
+# variant on the observer event union (graph-engine §6 +
+# observability §5.5.7). Dispatched on every LLM provider call that
+# returns a structured response, alongside the calling node's
+# NodeEvent pair. Failure cases (provider exceptions, malformed
+# responses) flow through the existing exception path and do NOT
+# emit this variant. Not subject to the §6 ``phases`` subscription
+# filter (matches MetadataAugmentationEvent / InvocationStartedEvent
+# / InvocationCompletedEvent treatment).
+#
+# Field naming matches the spec-canonical names verbatim per the spec
+# Q5 ack — Python snake_case happens to match the spec table 1:1.
+@dataclass(frozen=True)
+class LlmCompletionEvent:
+    """A typed LLM provider call event delivered to observers.
+
+    Carries identity, scoping, and outcome data for an LLM call as
+    structured fields. Observer code filters by type discrimination
+    (``isinstance(event, LlmCompletionEvent)``) rather than by the
+    impl-current sentinel-namespace string match the legacy
+    NodeEvent pattern uses.
+
+    Field set:
+
+    - ``invocation_id``: the outer invocation's identifier.
+    - ``correlation_id``: cross-backend correlation id when present.
+    - ``node_name``: the user-defined node that issued the call.
+    - ``namespace``: the calling node's namespace tuple (NOT the
+      legacy sentinel namespace).
+    - ``attempt_index``: retry-attempt index (0 on first attempt).
+    - ``fan_out_index``: fan-out instance index when the calling
+      node ran inside a fan-out instance; ``None`` otherwise.
+    - ``branch_name``: parallel-branches branch name when the
+      calling node ran inside a branch; ``None`` otherwise.
+    - ``provider``: provider identifier; matches ``gen_ai.system``.
+    - ``model``: the model identifier the call targeted.
+    - ``request_id``: provider-returned response id; ``None`` when
+      the provider didn't return one.
+    - ``usage``: token-accounting record per ``Response.usage``
+      shape. Reuses the existing ``openarmature.llm.response.Usage``
+      class. ``None`` when the call returned no usage at all.
+    - ``latency_ms``: wall-clock latency measured at the adapter
+      boundary, in milliseconds. ``None`` when latency was not
+      measured.
+    - ``finish_reason``: the call's finish reason; ``None`` when
+      the call did not complete normally.
+    - ``caller_invocation_metadata``: optional snapshot of caller-
+      supplied invocation metadata at LLM-call time. Populated
+      only when the provider's opt-in flag is set (per-language
+      mechanism); default ``None``.
+    """
+
+    invocation_id: str
+    correlation_id: str | None
+    node_name: str
+    namespace: tuple[str, ...]
+    attempt_index: int
+    fan_out_index: int | None
+    branch_name: str | None
+    provider: str
+    model: str
+    request_id: str | None
+    # Usage is a string-typed forward reference per the TYPE_CHECKING
+    # import above — keeps the runtime import direction graph → llm
+    # off the module-load path while preserving pyright resolution.
+    usage: "Usage | None"
+    latency_ms: float | None
+    finish_reason: str | None
+    caller_invocation_metadata: Mapping[str, AttributeValue] | None = None
+
+
 __all__ = [
     "FanOutEventConfig",
     "InvocationCompletedEvent",
     "InvocationStartedEvent",
+    "LlmCompletionEvent",
     "MetadataAugmentationEvent",
     "NodeEvent",
     "ParallelBranchesEventConfig",
 
@@ -37,17 +37,28 @@
 from .events import (
     InvocationCompletedEvent,
     InvocationStartedEvent,
+    LlmCompletionEvent,
     MetadataAugmentationEvent,
     NodeEvent,
 )
 from .state import State
 
 # Union of every event variant an Observer may receive. NodeEvent is
-# the original §6 started/completed/checkpoint shape; the other three
-# are side-channel events (proposal 0040 for augmentation; proposal
-# 0043 for invocation-boundary trace.input/output sourcing) that
-# bypass the phase filter and reach every subscribed observer.
-ObserverEvent = NodeEvent | MetadataAugmentationEvent | InvocationStartedEvent | InvocationCompletedEvent
+# the original §6 started/completed/checkpoint shape; the other
+# variants are side-channel events that bypass the phase filter and
+# reach every subscribed observer — MetadataAugmentationEvent
+# (proposal 0040 mid-invocation metadata augmentation),
+# InvocationStartedEvent / InvocationCompletedEvent (proposal 0043
+# trace.input/output sourcing), and LlmCompletionEvent (proposal
+# 0049 typed LLM provider call event, dispatched on every successful
+# LLM completion alongside the calling node's NodeEvent pair).
+ObserverEvent = (
+    NodeEvent
+    | MetadataAugmentationEvent
+    | InvocationStartedEvent
+    | InvocationCompletedEvent
+    | LlmCompletionEvent
+)
 
 
 class Observer(Protocol):
 
@@ -53,25 +53,28 @@
 import hashlib
 import json
 import re
+import time
 import uuid
-from collections.abc import Sequence
+from collections.abc import Mapping, Sequence
 from typing import Any, Literal, cast
 from urllib.parse import urlparse
 
 import httpx
 import jsonschema
 from pydantic import BaseModel, ValidationError
 
-from openarmature.graph.events import NodeEvent
+from openarmature.graph.events import LlmCompletionEvent, NodeEvent
 from openarmature.observability.correlation import (
     current_attempt_index,
     current_branch_name,
+    current_correlation_id,
     current_dispatch,
     current_fan_out_index,
+    current_invocation_id,
     current_namespace_prefix,
 )
 from openarmature.observability.llm_event import LlmEventPayload
-from openarmature.observability.metadata import current_invocation_metadata
+from openarmature.observability.metadata import AttributeValue, current_invocation_metadata
 
 # ``current_prompt_group`` / ``current_prompt_result`` are imported
 # lazily inside :meth:`OpenAIProvider.complete` to avoid a module-load
@@ -157,6 +160,7 @@ def __init__(
         force_prompt_augmentation_fallback: bool = False,
         genai_system: str = "openai",
         readiness_probe: Literal["models", "chat_completions", "both"] = "chat_completions",
+        populate_caller_metadata: bool = False,
     ) -> None:
         self.base_url = _validate_and_normalize_base_url(base_url)
         self.model = model
@@ -189,6 +193,14 @@ def __init__(
                 f"readiness_probe must be one of {sorted(_VALID_READINESS_PROBES)} (got {readiness_probe!r})"
             )
         self._readiness_probe = readiness_probe
+        # Proposal 0049's caller_invocation_metadata field is OPTIONAL
+        # on the typed LlmCompletionEvent: default absent, populated
+        # only when the consumer opts in. The per-language opt-in
+        # mechanism is constructor-knob here so the provider can decide
+        # at emission time without engine-level observer introspection.
+        # Off by default to avoid bloating every event with potentially-
+        # large metadata snapshots when nothing downstream consumes them.
+        self._populate_caller_metadata = populate_caller_metadata
         self._headers: dict[str, str] = {"Content-Type": "application/json"}
         if api_key is not None:
             self._headers["Authorization"] = f"Bearer {api_key}"
@@ -443,10 +455,30 @@ async def complete(
                 )
             )
 
+        # Wall-clock latency measured at the adapter boundary per
+        # proposal 0049's LlmCompletionEvent.latency_ms contract. The
+        # boundary spans from "just before _do_complete is called" to
+        # "_do_complete returns with a parsed Response in hand" —
+        # covers HTTP setup, request emission, provider compute,
+        # response receive, AND response parsing into the typed
+        # Response. The spec text "wall-clock latency of the LLM call
+        # measured at the adapter boundary" is silent on whether
+        # parsing is included; including it matches the operator's
+        # mental model of "how long until I had a usable answer"
+        # better than just-the-HTTP-call. perf_counter is the monotonic
+        # high-resolution clock for elapsed-time measurements.
+        adapter_start = time.perf_counter()
         try:
             response = await self._do_complete(body, schema_dict, schema_class)
         except Exception as exc:
             if dispatch is not None:
+                # Failure path: only the sentinel NodeEvent pair fires.
+                # Per proposal 0049 §3 (alternative 3): LlmCompletionEvent
+                # is completion-only; failures flow through the
+                # llm-provider §7 exception path. The error continues
+                # to surface through the existing observer chain via
+                # the sentinel NodeEvent's error_type / error_category
+                # fields on LlmEventPayload.
                 dispatch(
                     _make_llm_event(
                         "completed",
@@ -462,8 +494,14 @@ async def complete(
                     )
                 )
             raise
+        latency_ms = (time.perf_counter() - adapter_start) * 1000.0
 
         if dispatch is not None:
+            # Sentinel NodeEvent pair stays during the dual-emit window
+            # per proposal 0049 §5.5.7 SHOULD-emit-both transition. The
+            # window stays open through v0.13.0 with the sentinel
+            # emission removed in v0.15.0 (CHANGELOG callout pinned to
+            # the v0.13.0 release notes).
             dispatch(
                 _make_llm_event(
                     "completed",
@@ -482,8 +520,63 @@ async def complete(
                     active_prompt_group=active_prompt_group,
                 )
             )
+            # The new typed LlmCompletionEvent — observers filtering via
+            # isinstance(event, LlmCompletionEvent) receive this; legacy
+            # observers filtering on the sentinel namespace see the
+            # NodeEvent pair above. Failure path doesn't reach here.
+            dispatch(
+                self._build_llm_completion_event(response, latency_ms),
+            )
         return response
 
+    def _build_llm_completion_event(self, response: Response, latency_ms: float) -> LlmCompletionEvent:
+        """Construct the typed LlmCompletionEvent for the success path.
+
+        Sources identity / scoping fields from the calling-node
+        ContextVars and outcome fields from the response. The calling-
+        node namespace is the FULL namespace tuple (not the legacy
+        sentinel pseudo-namespace); node_name is the last element of
+        the namespace (the user-defined node that issued the call).
+        Outside any node body (namespace empty), node_name is the
+        empty string.
+        """
+
+        namespace = current_namespace_prefix()
+        node_name = namespace[-1] if namespace else ""
+        # invocation_id is normally always present once invoke() entry
+        # has run, but the LLM provider can be exercised in test
+        # fixtures outside an invocation. Spec proposal 0049's field
+        # table types invocation_id as a non-nullable string, so we
+        # fall back to empty string rather than None to keep the event
+        # constructable. Downstream observers using invocation_id as a
+        # correlation key should treat "" as "not in an invocation"
+        # and either skip or special-case those events; collisions
+        # across multiple out-of-invocation calls are theoretically
+        # possible but not a path production code should hit.
+        invocation_id = current_invocation_id() or ""
+        caller_metadata: Mapping[str, AttributeValue] | None = None
+        if self._populate_caller_metadata:
+            # Snapshot via dict() so downstream consumers see a stable
+            # frozen view; if a node body mutates metadata after the
+            # snapshot, the event still carries the at-emission view.
+            caller_metadata = dict(current_invocation_metadata())
+        return LlmCompletionEvent(
+            invocation_id=invocation_id,
+            correlation_id=current_correlation_id(),
+            node_name=node_name,
+            namespace=namespace,
+            attempt_index=current_attempt_index(),
+            fan_out_index=current_fan_out_index(),
+            branch_name=current_branch_name(),
+            provider=self._genai_system,
+            model=self.model,
+            request_id=response.response_id,
+            usage=response.usage,
+            latency_ms=latency_ms,
+            finish_reason=response.finish_reason,
+            caller_invocation_metadata=caller_metadata,
+        )
+
     async def _do_complete(
         self,
         body: dict[str, Any],