Skip to content

Commit 342f297

Browse files
Add retrieval-provider embedding capability
Add the embedding surface of the new retrieval-provider capability: an EmbeddingProvider protocol, the EmbeddingResponse / EmbeddingUsage / EmbeddingRuntimeConfig types, and an OpenAIEmbeddingProvider reference impl posting to an OpenAI-compatible /v1/embeddings. Typed EmbeddingEvent / EmbeddingFailedEvent join the observer event union, dispatched on every embed(); the bundled OTel and Langfuse observers safely skip them for now (the embedding span and Embedding observation are a follow-up). The provider rejects malformed responses (missing, empty, or non-numeric vectors and non-permutation indices), guards a trailing /v1 on base_url, and offers a configurable readiness probe defaulting to a universal one-input embed so it works against backends without a /v1/models catalog (e.g. TEI's OpenAI surface). Conformance fixtures 001-005 pass; unit tests cover the guard, the readiness modes, and the observer safe-handling. conformance.toml keeps 0059 not-yet; the observability rendering (074-083), the cross-provider helper cleanups, and the docs are v0.16.0 follow-ups.
1 parent 620b010 commit 342f297

12 files changed

Lines changed: 1236 additions & 93 deletions

File tree

src/openarmature/graph/events.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
if TYPE_CHECKING:
3333
from openarmature.llm.messages import ToolCall
3434
from openarmature.llm.response import Usage
35+
from openarmature.retrieval.response import EmbeddingUsage
3536

3637
# Sentinel empty metadata mapping for events constructed without a
3738
# live caller-metadata snapshot (test helpers, synthetic events).
@@ -749,6 +750,125 @@ class LlmRetryAttemptEvent:
749750
output_tool_calls: list["ToolCall"] = field(default_factory=list["ToolCall"])
750751

751752

753+
# Spec: realizes graph-engine §6 + observability §5.5.9 -- the typed
754+
# EmbeddingEvent / EmbeddingFailedEvent pair (proposal 0059,
755+
# retrieval-provider capability). Dispatched on the observer delivery
756+
# queue per EmbeddingProvider.embed() call: the success variant after the
757+
# response is parsed + validated, the failure variant alongside a raised
758+
# §7 category exception (mutually exclusive per call). Scalar
759+
# fan_out_index / branch_name only; the lineage chains arrive uniformly
760+
# across the provider events with proposal 0084 (v0.81.0). input_strings /
761+
# request_extras are payload-bearing, populated unconditionally; observer-
762+
# side privacy gates (OTel disable_provider_payload, Langfuse equivalents)
763+
# apply at rendering, symmetric with LlmCompletionEvent.
764+
@dataclass(frozen=True)
765+
class EmbeddingEvent:
766+
"""A typed embedding provider call event delivered to observers.
767+
768+
Carries identity, scoping, and outcome data for a successful
769+
``EmbeddingProvider.embed()`` call. Observer code filters by type
770+
discrimination (``isinstance(event, EmbeddingEvent)``).
771+
772+
The identity / scoping / request-side fields mirror
773+
``LlmCompletionEvent``'s convention; the outcome fields are
774+
embedding-specific:
775+
776+
- ``input_strings``: the input strings the call was made with;
777+
non-nullable, populated unconditionally (privacy gating is
778+
observer-side at rendering).
779+
- ``input_count``: ``len(input_strings)``; a convenience field.
780+
- ``dimensions``: the output vector dimensionality from the response;
781+
``None`` when the response surfaced no determinate dimensionality.
782+
- ``response_model`` / ``response_id``: the provider-returned model
783+
and response identifiers; ``None`` when the provider returned none.
784+
- ``usage``: the embedding token record; ``None`` when the call
785+
returned no usage.
786+
- ``request_params``: the embedding request parameters the caller
787+
supplied (e.g. ``dimensions``). Absence-is-meaningful: only supplied
788+
keys appear; an empty mapping when none.
789+
- ``request_extras``: the runtime-config extras pass-through bag.
790+
- ``active_prompt`` / ``active_prompt_group``: prompt-context
791+
snapshots at embed-call time; ``None`` outside a binding.
792+
- ``call_id``: a per-call disambiguator, always present, freshly
793+
minted per ``embed()`` call.
794+
"""
795+
796+
invocation_id: str
797+
correlation_id: str | None
798+
node_name: str
799+
namespace: tuple[str, ...]
800+
attempt_index: int
801+
fan_out_index: int | None
802+
branch_name: str | None
803+
provider: str
804+
model: str
805+
response_id: str | None
806+
response_model: str | None
807+
# EmbeddingUsage is a string-typed forward reference per the
808+
# TYPE_CHECKING import -- keeps the runtime import direction
809+
# graph -> retrieval off the module-load path.
810+
usage: "EmbeddingUsage | None"
811+
latency_ms: float | None
812+
input_strings: list[str]
813+
input_count: int
814+
dimensions: int | None
815+
request_params: Mapping[str, Any]
816+
request_extras: Mapping[str, Any]
817+
active_prompt: Any
818+
active_prompt_group: Any
819+
call_id: str
820+
caller_invocation_metadata: Mapping[str, AttributeValue] | None = None
821+
822+
823+
# Spec: the failure sibling of EmbeddingEvent (proposal 0059). Dispatched
824+
# whenever EmbeddingProvider.embed() raises a §7 category exception --
825+
# covers both the provider-caught path and the pre-send validation raise
826+
# (provider_invalid_request on an empty input list). Dispatched ALONGSIDE
827+
# the exception, not in place of it; mutually exclusive with EmbeddingEvent
828+
# on the same call. The response-side fields are absent (no response).
829+
@dataclass(frozen=True)
830+
class EmbeddingFailedEvent:
831+
"""A typed embedding provider call failure event delivered to observers.
832+
833+
Carries identity, scoping, and failure-context data for an ``embed()``
834+
call that raised a retrieval-provider category exception. Observer code
835+
filters by type discrimination
836+
(``isinstance(event, EmbeddingFailedEvent)``).
837+
838+
The identity / scoping / request-side field set mirrors
839+
``EmbeddingEvent``; the response-side fields are absent. Failure-
840+
specific fields:
841+
842+
- ``error_category``: the error category the call raised (one of the
843+
embedding-applicable provider categories). Always present.
844+
- ``error_type``: an optional impl-level / vendor-specific type or
845+
code; ``None`` when unavailable.
846+
- ``error_message``: a human-readable message; always present (the
847+
empty string when the exception carried no message).
848+
"""
849+
850+
invocation_id: str
851+
correlation_id: str | None
852+
node_name: str
853+
namespace: tuple[str, ...]
854+
attempt_index: int
855+
fan_out_index: int | None
856+
branch_name: str | None
857+
provider: str
858+
model: str
859+
latency_ms: float | None
860+
input_strings: list[str]
861+
request_params: Mapping[str, Any]
862+
request_extras: Mapping[str, Any]
863+
active_prompt: Any
864+
active_prompt_group: Any
865+
call_id: str
866+
error_category: str
867+
error_message: str
868+
error_type: str | None = None
869+
caller_invocation_metadata: Mapping[str, AttributeValue] | None = None
870+
871+
752872
# Spec: realizes pipeline-utilities §6.3 failure-isolation middleware
753873
# (proposal 0050). Emitted by FailureIsolationMiddleware when it
754874
# catches an exception escaping the inner chain and substitutes a
@@ -905,6 +1025,8 @@ class ToolCallFailedEvent:
9051025

9061026

9071027
__all__ = [
1028+
"EmbeddingEvent",
1029+
"EmbeddingFailedEvent",
9081030
"FailureIsolatedEvent",
9091031
"FanOutEventConfig",
9101032
"InvocationCompletedEvent",

src/openarmature/graph/observer.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
from typing import Any, Literal, Protocol
3636

3737
from .events import (
38+
EmbeddingEvent,
39+
EmbeddingFailedEvent,
3840
FailureIsolatedEvent,
3941
InvocationCompletedEvent,
4042
InvocationStartedEvent,
@@ -63,7 +65,9 @@
6365
# retry to drive the per-attempt OTel span surface),
6466
# and FailureIsolatedEvent (proposal 0050 §6.3 framework-emitted event,
6567
# dispatched by FailureIsolationMiddleware when it catches an exception
66-
# escaping the inner chain and substitutes a degraded partial update).
68+
# escaping the inner chain and substitutes a degraded partial update);
69+
# and EmbeddingEvent / EmbeddingFailedEvent (proposal 0059 typed embedding
70+
# provider call events, dispatched on every EmbeddingProvider.embed()).
6771
ObserverEvent = (
6872
NodeEvent
6973
| MetadataAugmentationEvent
@@ -75,6 +79,8 @@
7579
| FailureIsolatedEvent
7680
| ToolCallEvent
7781
| ToolCallFailedEvent
82+
| EmbeddingEvent
83+
| EmbeddingFailedEvent
7884
)
7985

8086

src/openarmature/observability/correlation.py

Lines changed: 29 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737

3838
if TYPE_CHECKING:
3939
from openarmature.graph.events import (
40+
EmbeddingEvent,
41+
EmbeddingFailedEvent,
4042
FailureIsolatedEvent,
4143
InvocationCompletedEvent,
4244
InvocationStartedEvent,
@@ -50,6 +52,25 @@
5052
)
5153
from openarmature.graph.observer import SubscribedObserver
5254

55+
# The event-record union the engine's serial dispatch worker accepts.
56+
# Defined once and reused across the ContextVar + the get/set/reset
57+
# accessors so a new capability event widens the contract in one place.
58+
_DispatchEvent = (
59+
NodeEvent
60+
| MetadataAugmentationEvent
61+
| InvocationStartedEvent
62+
| InvocationCompletedEvent
63+
| LlmCompletionEvent
64+
| LlmFailedEvent
65+
| LlmRetryAttemptEvent
66+
| FailureIsolatedEvent
67+
| ToolCallEvent
68+
| ToolCallFailedEvent
69+
| EmbeddingEvent
70+
| EmbeddingFailedEvent
71+
)
72+
_DispatchFn = Callable[[_DispatchEvent], None]
73+
5374

5475
# ---------------------------------------------------------------------------
5576
# Correlation ID (observability spec §3.1)
@@ -221,44 +242,12 @@ def _reset_active_observers(token: Token[tuple[SubscribedObserver, ...]]) -> Non
221242
# ---------------------------------------------------------------------------
222243

223244

224-
_active_dispatch_var: ContextVar[
225-
Callable[
226-
[
227-
NodeEvent
228-
| MetadataAugmentationEvent
229-
| InvocationStartedEvent
230-
| InvocationCompletedEvent
231-
| LlmCompletionEvent
232-
| LlmFailedEvent
233-
| LlmRetryAttemptEvent
234-
| FailureIsolatedEvent
235-
| ToolCallEvent
236-
| ToolCallFailedEvent
237-
],
238-
None,
239-
]
240-
| None
241-
] = ContextVar("openarmature.active_dispatch", default=None)
242-
243-
244-
def current_dispatch() -> (
245-
Callable[
246-
[
247-
NodeEvent
248-
| MetadataAugmentationEvent
249-
| InvocationStartedEvent
250-
| InvocationCompletedEvent
251-
| LlmCompletionEvent
252-
| LlmFailedEvent
253-
| LlmRetryAttemptEvent
254-
| FailureIsolatedEvent
255-
| ToolCallEvent
256-
| ToolCallFailedEvent
257-
],
258-
None,
259-
]
260-
| None
261-
):
245+
_active_dispatch_var: ContextVar[_DispatchFn | None] = ContextVar(
246+
"openarmature.active_dispatch", default=None
247+
)
248+
249+
250+
def current_dispatch() -> _DispatchFn | None:
262251
"""Return the engine's dispatch callable for the current invocation,
263252
or ``None`` outside any invocation.
264253
@@ -272,65 +261,13 @@ def current_dispatch() -> (
272261
return _active_dispatch_var.get()
273262

274263

275-
def _set_active_dispatch(
276-
dispatch: Callable[
277-
[
278-
NodeEvent
279-
| MetadataAugmentationEvent
280-
| InvocationStartedEvent
281-
| InvocationCompletedEvent
282-
| LlmCompletionEvent
283-
| LlmFailedEvent
284-
| LlmRetryAttemptEvent
285-
| FailureIsolatedEvent
286-
| ToolCallEvent
287-
| ToolCallFailedEvent
288-
],
289-
None,
290-
],
291-
) -> Token[
292-
Callable[
293-
[
294-
NodeEvent
295-
| MetadataAugmentationEvent
296-
| InvocationStartedEvent
297-
| InvocationCompletedEvent
298-
| LlmCompletionEvent
299-
| LlmFailedEvent
300-
| LlmRetryAttemptEvent
301-
| FailureIsolatedEvent
302-
| ToolCallEvent
303-
| ToolCallFailedEvent
304-
],
305-
None,
306-
]
307-
| None
308-
]:
264+
def _set_active_dispatch(dispatch: _DispatchFn) -> Token[_DispatchFn | None]:
309265
"""Set the engine's dispatch callable in scope. Internal —
310266
engine-only."""
311267
return _active_dispatch_var.set(dispatch)
312268

313269

314-
def _reset_active_dispatch(
315-
token: Token[
316-
Callable[
317-
[
318-
NodeEvent
319-
| MetadataAugmentationEvent
320-
| InvocationStartedEvent
321-
| InvocationCompletedEvent
322-
| LlmCompletionEvent
323-
| LlmFailedEvent
324-
| LlmRetryAttemptEvent
325-
| FailureIsolatedEvent
326-
| ToolCallEvent
327-
| ToolCallFailedEvent
328-
],
329-
None,
330-
]
331-
| None
332-
],
333-
) -> None:
270+
def _reset_active_dispatch(token: Token[_DispatchFn | None]) -> None:
334271
_active_dispatch_var.reset(token)
335272

336273

src/openarmature/observability/langfuse/observer.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
from typing import Any, cast
3131

3232
from openarmature.graph.events import (
33+
EmbeddingEvent,
34+
EmbeddingFailedEvent,
3335
FailureIsolatedEvent,
3436
InvocationCompletedEvent,
3537
InvocationStartedEvent,
@@ -443,6 +445,8 @@ async def __call__(
443445
| FailureIsolatedEvent
444446
| ToolCallEvent
445447
| ToolCallFailedEvent
448+
| EmbeddingEvent
449+
| EmbeddingFailedEvent
446450
),
447451
) -> None:
448452
if isinstance(event, InvocationStartedEvent):
@@ -484,6 +488,12 @@ async def __call__(
484488
if isinstance(event, MetadataAugmentationEvent):
485489
self._handle_metadata_augmentation(event)
486490
return
491+
# Proposal 0059 embedding events: the bundled Langfuse Embedding
492+
# observation is a follow-up. Until it lands the events are safely
493+
# ignored here rather than falling through to the NodeEvent phase
494+
# dispatch (which would AttributeError on the absent ``phase``).
495+
if isinstance(event, EmbeddingEvent | EmbeddingFailedEvent):
496+
return
487497
if event.phase == "started":
488498
self._open_started_observation(event)
489499
elif event.phase == "completed":

src/openarmature/observability/otel/observer.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@
100100
from opentelemetry.trace.propagation import set_span_in_context
101101

102102
from openarmature.graph.events import (
103+
EmbeddingEvent,
104+
EmbeddingFailedEvent,
103105
FailureIsolatedEvent,
104106
InvocationCompletedEvent,
105107
InvocationStartedEvent,
@@ -755,6 +757,8 @@ async def __call__(
755757
| FailureIsolatedEvent
756758
| ToolCallEvent
757759
| ToolCallFailedEvent
760+
| EmbeddingEvent
761+
| EmbeddingFailedEvent
758762
),
759763
) -> None:
760764
# Proposal 0043 invocation-boundary events: OTel has no
@@ -782,6 +786,13 @@ async def __call__(
782786
# consumers, so the OTel observer ignores them here.
783787
if isinstance(event, LlmCompletionEvent | LlmFailedEvent):
784788
return
789+
# Proposal 0059 embedding events: the bundled OTel embedding span
790+
# (openarmature.embedding.complete) is a follow-up. Until it lands
791+
# the events are safely ignored here rather than falling through to
792+
# the NodeEvent phase dispatch (which would AttributeError on the
793+
# absent ``phase`` field).
794+
if isinstance(event, EmbeddingEvent | EmbeddingFailedEvent):
795+
return
785796
# Proposal 0063 tool-execution observability: emit the
786797
# openarmature.tool.call span from the typed tool events.
787798
# Independent of disable_llm_spans (that flag is scoped to LLM

0 commit comments

Comments
 (0)