Skip to content

Commit 48db64c

Browse files
Wire fixture 084 and extend conformance harness
Activate observability fixture 084 (Langfuse session/user promotion): cases 2/3/4 (not session-bound, userId present additively, userId absent) run now; the session-bound cases 1/5 defer per-case until the sessions capability (0020) supplies a session_id source. Extend the fixture-parsing models for 084's shapes: ObservabilityExpected gains langfuse_trace / langfuse_traces (and the matching discriminator keys), and CaseSpec.invocations widens to int | list to carry case 5's multi-invocation specs alongside the existing run-count usage.
1 parent 6effe81 commit 48db64c

3 files changed

Lines changed: 86 additions & 1 deletion

File tree

tests/conformance/harness/expectations.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ class ObservabilityExpected(_ForbidExtras):
197197
determinism_check: dict[str, Any] | None = None
198198
# Multi-invocation fixtures (009 cross-cutting, 011 determinism).
199199
invocation_count: int | None = None
200+
# Langfuse Trace-level expectations (proposal 0064, fixture 084): the
201+
# single-trace shape (sessionId / userId / metadata) and the
202+
# multi-invocation grouping case's per-trace list.
203+
langfuse_trace: dict[str, Any] | None = None
204+
langfuse_traces: list[dict[str, Any]] | None = None
200205

201206

202207
# ---------------------------------------------------------------------------
@@ -240,6 +245,9 @@ class ObservabilityExpected(_ForbidExtras):
240245
"no_llm_provider_span",
241246
"determinism_check",
242247
"invocation_count",
248+
# proposal 0064 (fixture 084) Langfuse Trace-level expectations
249+
"langfuse_trace",
250+
"langfuse_traces",
243251
}
244252
)
245253

tests/conformance/harness/fixtures.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,9 @@ class CaseSpec(BaseModel):
134134
caller_correlation_id: str | None = None
135135
# observability — mock LLM responses + per-case run config.
136136
mock_llm: list[MockResponse] | None = None
137-
invocations: int | None = None
137+
# ``int`` = a run-count (fixtures 009 / 011); ``list`` = per-invocation
138+
# specs for a multi-invocation fixture (proposal 0064 fixture 084 case 5).
139+
invocations: int | list[dict[str, Any]] | None = None
138140

139141

140142
# ---------------------------------------------------------------------------

tests/conformance/test_observability.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,11 @@ def _reset_otel_global_tracer_provider(restore_to: object) -> None:
153153
# (v0.61.0) resolves the detached-invocation-span shape case 2
154154
# presupposed — the whole fixture was unwired pending that.
155155
"058-implementation-attribution-otel",
156+
# v0.62.0 — proposal 0064 (Langfuse trace.sessionId / trace.userId
157+
# population). Cases 2/3/4 (not session-bound + userId promotion)
158+
# run; session-bound cases 1/5 defer until the sessions capability
159+
# (0020) supplies openarmature.session_id.
160+
"084-langfuse-session-user-promotion",
156161
}
157162
)
158163

@@ -246,6 +251,8 @@ async def test_observability_fixture(fixture_path: Path) -> None:
246251
await _run_fixture_056(spec)
247252
elif fixture_id == "058-implementation-attribution-otel":
248253
await _run_fixture_058(spec)
254+
elif fixture_id == "084-langfuse-session-user-promotion":
255+
await _run_fixture_084(spec)
249256
elif fixture_id in {
250257
"012-otel-llm-payload-default-off",
251258
"013-otel-llm-payload-enabled",
@@ -2175,6 +2182,74 @@ async def delete(self, invocation_id: str) -> None:
21752182
)
21762183

21772184

2185+
# ---------------------------------------------------------------------------
2186+
# Fixture 084 — Langfuse session/user promotion (proposal 0064)
2187+
# ---------------------------------------------------------------------------
2188+
2189+
2190+
async def _run_fixture_084(spec: Mapping[str, Any]) -> None:
2191+
from openarmature.observability.langfuse import ( # noqa: PLC0415
2192+
InMemoryLangfuseClient,
2193+
LangfuseObserver,
2194+
)
2195+
2196+
# Proposal 0064 §8.4.1. Cases 1 + 5 are session-bound: they supply
2197+
# session_id at invoke(), which needs the sessions capability
2198+
# (proposal 0020, §5.6) to surface openarmature.session_id. That is
2199+
# unimplemented in python until v0.19.0, so trace.sessionId has no
2200+
# source and these cases defer (per-case continue). Cases 2/3/4 (not
2201+
# session-bound + the userId promotion) run now.
2202+
_deferred_cases = {
2203+
"session_bound_sets_trace_session_id",
2204+
"multi_invocation_shared_session_groups",
2205+
}
2206+
cases = cast("list[dict[str, Any]]", spec["cases"])
2207+
for case in cases:
2208+
case_name = cast("str", case["name"])
2209+
if case_name in _deferred_cases:
2210+
continue
2211+
try:
2212+
client = InMemoryLangfuseClient()
2213+
observer = LangfuseObserver(client=client)
2214+
trace: list[str] = []
2215+
built = build_graph(case, trace=trace)
2216+
compiled = built.builder.compile()
2217+
compiled.attach_observer(observer)
2218+
initial_state = built.initial_state(case.get("initial_state", {}))
2219+
caller_metadata = cast("dict[str, Any] | None", case.get("caller_metadata"))
2220+
if caller_metadata is not None:
2221+
await compiled.invoke(initial_state, metadata=caller_metadata)
2222+
else:
2223+
await compiled.invoke(initial_state)
2224+
await compiled.drain()
2225+
observer.shutdown()
2226+
2227+
assert len(client.traces) == 1, f"expected 1 trace, got {len(client.traces)}"
2228+
lf_trace = next(iter(client.traces.values()))
2229+
expected = cast("dict[str, Any]", case["expected"]["langfuse_trace"])
2230+
# trace.sessionId is unset for the runnable cases (no session
2231+
# source until 0020).
2232+
assert lf_trace.session_id == expected.get("sessionId"), (
2233+
f"sessionId: got {lf_trace.session_id!r}, expected {expected.get('sessionId')!r}"
2234+
)
2235+
# trace.userId: promoted from the userId caller key (case 3),
2236+
# unset otherwise (cases 2/4).
2237+
assert lf_trace.user_id == expected.get("userId"), (
2238+
f"userId: got {lf_trace.user_id!r}, expected {expected.get('userId')!r}"
2239+
)
2240+
# Additive promotion + unaffected metadata: every concrete
2241+
# (non-placeholder) expected metadata key also lands top-level.
2242+
expected_md = cast("dict[str, Any]", expected.get("metadata") or {})
2243+
for key, val in expected_md.items():
2244+
if isinstance(val, str) and val.startswith("<") and val.endswith(">"):
2245+
continue
2246+
assert lf_trace.metadata.get(key) == val, (
2247+
f"metadata.{key}: got {lf_trace.metadata.get(key)!r}, expected {val!r}"
2248+
)
2249+
except AssertionError as e:
2250+
raise AssertionError(f"case {case_name!r}: {e}") from e
2251+
2252+
21782253
# ---------------------------------------------------------------------------
21792254
# Fixture 010 — log correlation
21802255
#

0 commit comments

Comments
 (0)