Skip to content

Commit 3307b5b

Browse files
Add LlmCompletionEvent typed event variant (#137)
Define LlmCompletionEvent as a new typed variant on the observer event union per proposal 0049. Fields carry the LLM call's identity, scoping, and outcome data as a structured record; observers filter via isinstance() rather than the impl-current sentinel-namespace string match. The Usage field reuses the existing openarmature.llm.response.Usage class via a TYPE_CHECKING import and string forward reference, sidestepping the runtime cycle through openarmature.llm. The OpenAI provider emits the typed event alongside the existing sentinel NodeEvent pair during the dual-emit transition window per spec section 5.5.7's SHOULD-emit-both framing. Failure paths emit only the sentinel pair; the typed event is completion-only per the proposal's v1 scope. Wall-clock latency is measured at the adapter boundary with time.perf_counter(). The opt-in caller_invocation_metadata field is wired via a new populate_caller_metadata constructor kwarg on OpenAIProvider per spec Q2 ack (default False; snapshot via dict() when True). The OTel and Langfuse observers gain an isinstance early-return branch for LlmCompletionEvent to stay Protocol-compatible with the widened ObserverEvent union. Migration to type-discrimination filtering of the typed event lands in a subsequent PR; this PR keeps observer behavior unchanged (they continue driving spans / generations off the sentinel NodeEvent pair). Foundation for the OTel and Langfuse observer migrations to type-discrimination filtering in subsequent PRs of the v0.13.0 LLM hardening cycle.
1 parent dd804e2 commit 3307b5b

8 files changed

Lines changed: 569 additions & 16 deletions

File tree

src/openarmature/graph/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
from .events import (
3939
InvocationCompletedEvent,
4040
InvocationStartedEvent,
41+
LlmCompletionEvent,
4142
MetadataAugmentationEvent,
4243
NodeEvent,
4344
)
@@ -84,6 +85,7 @@
8485
"GraphError",
8586
"InvocationCompletedEvent",
8687
"InvocationStartedEvent",
88+
"LlmCompletionEvent",
8789
"MappingReferencesUndeclaredField",
8890
"MetadataAugmentationEvent",
8991
"Middleware",

src/openarmature/graph/events.py

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,21 @@
1616
from collections.abc import Mapping
1717
from dataclasses import dataclass, field
1818
from types import MappingProxyType
19-
from typing import Any, Literal
19+
from typing import TYPE_CHECKING, Any, Literal
2020

2121
from openarmature.observability.metadata import AttributeValue
2222

2323
from .errors import RuntimeGraphError
2424
from .state import State
2525

26+
# TYPE_CHECKING import — the runtime Usage class lives in the llm
27+
# package, which transitively imports from graph.events (the
28+
# OpenAI provider imports NodeEvent). Using a TYPE_CHECKING import
29+
# plus a string annotation on LlmCompletionEvent.usage avoids the
30+
# circular runtime import while keeping pyright type-safe.
31+
if TYPE_CHECKING:
32+
from openarmature.llm.response import Usage
33+
2634
# Sentinel empty metadata mapping for events constructed without a
2735
# live caller-metadata snapshot (test helpers, synthetic events).
2836
# Read-only proxy keeps the default allocation-free.
@@ -430,10 +438,82 @@ class InvocationCompletedEvent:
430438
correlation_id: str | None
431439

432440

441+
# Spec: realizes proposal 0049's first spec-normatively-typed event
442+
# variant on the observer event union (graph-engine §6 +
443+
# observability §5.5.7). Dispatched on every LLM provider call that
444+
# returns a structured response, alongside the calling node's
445+
# NodeEvent pair. Failure cases (provider exceptions, malformed
446+
# responses) flow through the existing exception path and do NOT
447+
# emit this variant. Not subject to the §6 ``phases`` subscription
448+
# filter (matches MetadataAugmentationEvent / InvocationStartedEvent
449+
# / InvocationCompletedEvent treatment).
450+
#
451+
# Field naming matches the spec-canonical names verbatim per the spec
452+
# Q5 ack — Python snake_case happens to match the spec table 1:1.
453+
@dataclass(frozen=True)
454+
class LlmCompletionEvent:
455+
"""A typed LLM provider call event delivered to observers.
456+
457+
Carries identity, scoping, and outcome data for an LLM call as
458+
structured fields. Observer code filters by type discrimination
459+
(``isinstance(event, LlmCompletionEvent)``) rather than by the
460+
impl-current sentinel-namespace string match the legacy
461+
NodeEvent pattern uses.
462+
463+
Field set:
464+
465+
- ``invocation_id``: the outer invocation's identifier.
466+
- ``correlation_id``: cross-backend correlation id when present.
467+
- ``node_name``: the user-defined node that issued the call.
468+
- ``namespace``: the calling node's namespace tuple (NOT the
469+
legacy sentinel namespace).
470+
- ``attempt_index``: retry-attempt index (0 on first attempt).
471+
- ``fan_out_index``: fan-out instance index when the calling
472+
node ran inside a fan-out instance; ``None`` otherwise.
473+
- ``branch_name``: parallel-branches branch name when the
474+
calling node ran inside a branch; ``None`` otherwise.
475+
- ``provider``: provider identifier; matches ``gen_ai.system``.
476+
- ``model``: the model identifier the call targeted.
477+
- ``request_id``: provider-returned response id; ``None`` when
478+
the provider didn't return one.
479+
- ``usage``: token-accounting record per ``Response.usage``
480+
shape. Reuses the existing ``openarmature.llm.response.Usage``
481+
class. ``None`` when the call returned no usage at all.
482+
- ``latency_ms``: wall-clock latency measured at the adapter
483+
boundary, in milliseconds. ``None`` when latency was not
484+
measured.
485+
- ``finish_reason``: the call's finish reason; ``None`` when
486+
the call did not complete normally.
487+
- ``caller_invocation_metadata``: optional snapshot of caller-
488+
supplied invocation metadata at LLM-call time. Populated
489+
only when the provider's opt-in flag is set (per-language
490+
mechanism); default ``None``.
491+
"""
492+
493+
invocation_id: str
494+
correlation_id: str | None
495+
node_name: str
496+
namespace: tuple[str, ...]
497+
attempt_index: int
498+
fan_out_index: int | None
499+
branch_name: str | None
500+
provider: str
501+
model: str
502+
request_id: str | None
503+
# Usage is a string-typed forward reference per the TYPE_CHECKING
504+
# import above — keeps the runtime import direction graph → llm
505+
# off the module-load path while preserving pyright resolution.
506+
usage: "Usage | None"
507+
latency_ms: float | None
508+
finish_reason: str | None
509+
caller_invocation_metadata: Mapping[str, AttributeValue] | None = None
510+
511+
433512
__all__ = [
434513
"FanOutEventConfig",
435514
"InvocationCompletedEvent",
436515
"InvocationStartedEvent",
516+
"LlmCompletionEvent",
437517
"MetadataAugmentationEvent",
438518
"NodeEvent",
439519
"ParallelBranchesEventConfig",

src/openarmature/graph/observer.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,28 @@
3737
from .events import (
3838
InvocationCompletedEvent,
3939
InvocationStartedEvent,
40+
LlmCompletionEvent,
4041
MetadataAugmentationEvent,
4142
NodeEvent,
4243
)
4344
from .state import State
4445

4546
# Union of every event variant an Observer may receive. NodeEvent is
46-
# the original §6 started/completed/checkpoint shape; the other three
47-
# are side-channel events (proposal 0040 for augmentation; proposal
48-
# 0043 for invocation-boundary trace.input/output sourcing) that
49-
# bypass the phase filter and reach every subscribed observer.
50-
ObserverEvent = NodeEvent | MetadataAugmentationEvent | InvocationStartedEvent | InvocationCompletedEvent
47+
# the original §6 started/completed/checkpoint shape; the other
48+
# variants are side-channel events that bypass the phase filter and
49+
# reach every subscribed observer — MetadataAugmentationEvent
50+
# (proposal 0040 mid-invocation metadata augmentation),
51+
# InvocationStartedEvent / InvocationCompletedEvent (proposal 0043
52+
# trace.input/output sourcing), and LlmCompletionEvent (proposal
53+
# 0049 typed LLM provider call event, dispatched on every successful
54+
# LLM completion alongside the calling node's NodeEvent pair).
55+
ObserverEvent = (
56+
NodeEvent
57+
| MetadataAugmentationEvent
58+
| InvocationStartedEvent
59+
| InvocationCompletedEvent
60+
| LlmCompletionEvent
61+
)
5162

5263

5364
class Observer(Protocol):

src/openarmature/llm/providers/openai.py

Lines changed: 96 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,25 +53,28 @@
5353
import hashlib
5454
import json
5555
import re
56+
import time
5657
import uuid
57-
from collections.abc import Sequence
58+
from collections.abc import Mapping, Sequence
5859
from typing import Any, Literal, cast
5960
from urllib.parse import urlparse
6061

6162
import httpx
6263
import jsonschema
6364
from pydantic import BaseModel, ValidationError
6465

65-
from openarmature.graph.events import NodeEvent
66+
from openarmature.graph.events import LlmCompletionEvent, NodeEvent
6667
from openarmature.observability.correlation import (
6768
current_attempt_index,
6869
current_branch_name,
70+
current_correlation_id,
6971
current_dispatch,
7072
current_fan_out_index,
73+
current_invocation_id,
7174
current_namespace_prefix,
7275
)
7376
from openarmature.observability.llm_event import LlmEventPayload
74-
from openarmature.observability.metadata import current_invocation_metadata
77+
from openarmature.observability.metadata import AttributeValue, current_invocation_metadata
7578

7679
# ``current_prompt_group`` / ``current_prompt_result`` are imported
7780
# lazily inside :meth:`OpenAIProvider.complete` to avoid a module-load
@@ -157,6 +160,7 @@ def __init__(
157160
force_prompt_augmentation_fallback: bool = False,
158161
genai_system: str = "openai",
159162
readiness_probe: Literal["models", "chat_completions", "both"] = "chat_completions",
163+
populate_caller_metadata: bool = False,
160164
) -> None:
161165
self.base_url = _validate_and_normalize_base_url(base_url)
162166
self.model = model
@@ -189,6 +193,14 @@ def __init__(
189193
f"readiness_probe must be one of {sorted(_VALID_READINESS_PROBES)} (got {readiness_probe!r})"
190194
)
191195
self._readiness_probe = readiness_probe
196+
# Proposal 0049's caller_invocation_metadata field is OPTIONAL
197+
# on the typed LlmCompletionEvent: default absent, populated
198+
# only when the consumer opts in. The per-language opt-in
199+
# mechanism is constructor-knob here so the provider can decide
200+
# at emission time without engine-level observer introspection.
201+
# Off by default to avoid bloating every event with potentially-
202+
# large metadata snapshots when nothing downstream consumes them.
203+
self._populate_caller_metadata = populate_caller_metadata
192204
self._headers: dict[str, str] = {"Content-Type": "application/json"}
193205
if api_key is not None:
194206
self._headers["Authorization"] = f"Bearer {api_key}"
@@ -443,10 +455,30 @@ async def complete(
443455
)
444456
)
445457

458+
# Wall-clock latency measured at the adapter boundary per
459+
# proposal 0049's LlmCompletionEvent.latency_ms contract. The
460+
# boundary spans from "just before _do_complete is called" to
461+
# "_do_complete returns with a parsed Response in hand" —
462+
# covers HTTP setup, request emission, provider compute,
463+
# response receive, AND response parsing into the typed
464+
# Response. The spec text "wall-clock latency of the LLM call
465+
# measured at the adapter boundary" is silent on whether
466+
# parsing is included; including it matches the operator's
467+
# mental model of "how long until I had a usable answer"
468+
# better than just-the-HTTP-call. perf_counter is the monotonic
469+
# high-resolution clock for elapsed-time measurements.
470+
adapter_start = time.perf_counter()
446471
try:
447472
response = await self._do_complete(body, schema_dict, schema_class)
448473
except Exception as exc:
449474
if dispatch is not None:
475+
# Failure path: only the sentinel NodeEvent pair fires.
476+
# Per proposal 0049 §3 (alternative 3): LlmCompletionEvent
477+
# is completion-only; failures flow through the
478+
# llm-provider §7 exception path. The error continues
479+
# to surface through the existing observer chain via
480+
# the sentinel NodeEvent's error_type / error_category
481+
# fields on LlmEventPayload.
450482
dispatch(
451483
_make_llm_event(
452484
"completed",
@@ -462,8 +494,14 @@ async def complete(
462494
)
463495
)
464496
raise
497+
latency_ms = (time.perf_counter() - adapter_start) * 1000.0
465498

466499
if dispatch is not None:
500+
# Sentinel NodeEvent pair stays during the dual-emit window
501+
# per proposal 0049 §5.5.7 SHOULD-emit-both transition. The
502+
# window stays open through v0.13.0 with the sentinel
503+
# emission removed in v0.15.0 (CHANGELOG callout pinned to
504+
# the v0.13.0 release notes).
467505
dispatch(
468506
_make_llm_event(
469507
"completed",
@@ -482,8 +520,63 @@ async def complete(
482520
active_prompt_group=active_prompt_group,
483521
)
484522
)
523+
# The new typed LlmCompletionEvent — observers filtering via
524+
# isinstance(event, LlmCompletionEvent) receive this; legacy
525+
# observers filtering on the sentinel namespace see the
526+
# NodeEvent pair above. Failure path doesn't reach here.
527+
dispatch(
528+
self._build_llm_completion_event(response, latency_ms),
529+
)
485530
return response
486531

532+
def _build_llm_completion_event(self, response: Response, latency_ms: float) -> LlmCompletionEvent:
533+
"""Construct the typed LlmCompletionEvent for the success path.
534+
535+
Sources identity / scoping fields from the calling-node
536+
ContextVars and outcome fields from the response. The calling-
537+
node namespace is the FULL namespace tuple (not the legacy
538+
sentinel pseudo-namespace); node_name is the last element of
539+
the namespace (the user-defined node that issued the call).
540+
Outside any node body (namespace empty), node_name is the
541+
empty string.
542+
"""
543+
544+
namespace = current_namespace_prefix()
545+
node_name = namespace[-1] if namespace else ""
546+
# invocation_id is normally always present once invoke() entry
547+
# has run, but the LLM provider can be exercised in test
548+
# fixtures outside an invocation. Spec proposal 0049's field
549+
# table types invocation_id as a non-nullable string, so we
550+
# fall back to empty string rather than None to keep the event
551+
# constructable. Downstream observers using invocation_id as a
552+
# correlation key should treat "" as "not in an invocation"
553+
# and either skip or special-case those events; collisions
554+
# across multiple out-of-invocation calls are theoretically
555+
# possible but not a path production code should hit.
556+
invocation_id = current_invocation_id() or ""
557+
caller_metadata: Mapping[str, AttributeValue] | None = None
558+
if self._populate_caller_metadata:
559+
# Snapshot via dict() so downstream consumers see a stable
560+
# frozen view; if a node body mutates metadata after the
561+
# snapshot, the event still carries the at-emission view.
562+
caller_metadata = dict(current_invocation_metadata())
563+
return LlmCompletionEvent(
564+
invocation_id=invocation_id,
565+
correlation_id=current_correlation_id(),
566+
node_name=node_name,
567+
namespace=namespace,
568+
attempt_index=current_attempt_index(),
569+
fan_out_index=current_fan_out_index(),
570+
branch_name=current_branch_name(),
571+
provider=self._genai_system,
572+
model=self.model,
573+
request_id=response.response_id,
574+
usage=response.usage,
575+
latency_ms=latency_ms,
576+
finish_reason=response.finish_reason,
577+
caller_invocation_metadata=caller_metadata,
578+
)
579+
487580
async def _do_complete(
488581
self,
489582
body: dict[str, Any],

0 commit comments

Comments
 (0)