Skip to content

Commit 2a15009

Browse files
jsonbaileyclaude
andcommitted
fix: populate AgentGraphResult.evaluations and clean up remaining review items
- Fix AgentGraphResult.evaluations type from Optional[List[Any]] to Optional[List[JudgeResult]] - Populate evaluations in both LangGraph and OpenAI runners with all judge results - Remove stray `if tracker:` guard in OpenAI _handle_handoff (tracker is always set) - Add comment documenting why output_text is empty at handoff time in OpenAI runner - flush() now returns List[JudgeResult] instead of None Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 0bd44d0 commit 2a15009

4 files changed

Lines changed: 18 additions & 5 deletions

File tree

packages/ai-providers/server-ai-langchain/src/ldai_langchain/langgraph_agent_graph_runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ async def run(self, input: Any) -> AgentGraphResult:
330330
output = extract_last_message_content(messages)
331331

332332
# Flush per-node metrics to LD trackers
333-
await handler.flush(self._graph, pending_eval_tasks)
333+
all_eval_results = await handler.flush(self._graph, pending_eval_tasks)
334334

335335
tracker.track_path(handler.path)
336336
tracker.track_duration(duration)
@@ -341,6 +341,7 @@ async def run(self, input: Any) -> AgentGraphResult:
341341
output=output,
342342
raw=result,
343343
metrics=LDAIMetrics(success=True),
344+
evaluations=all_eval_results,
344345
)
345346

346347
except Exception as exc:

packages/ai-providers/server-ai-langchain/src/ldai_langchain/langgraph_callback_handler.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from langchain_core.callbacks import BaseCallbackHandler
66
from langchain_core.outputs import ChatGeneration, LLMResult
77
from ldai.agent_graph import AgentGraphDefinition
8+
from ldai.providers.types import JudgeResult
89
from ldai.tracker import TokenUsage
910

1011
from ldai_langchain.langchain_helper import get_ai_usage_from_response
@@ -188,7 +189,9 @@ def on_tool_end(
188189
# Flush
189190
# ------------------------------------------------------------------
190191

191-
async def flush(self, graph: AgentGraphDefinition, eval_tasks=None) -> None:
192+
async def flush(
193+
self, graph: AgentGraphDefinition, eval_tasks=None
194+
) -> List[JudgeResult]:
192195
"""
193196
Emit all collected per-node metrics to the LaunchDarkly trackers.
194197
@@ -198,8 +201,10 @@ async def flush(self, graph: AgentGraphDefinition, eval_tasks=None) -> None:
198201
:param eval_tasks: Optional dict mapping node key to a list of awaitables that
199202
return judge evaluation results. Multiple tasks arise when a node is visited
200203
more than once (e.g. in a graph with cycles).
204+
:return: All judge results collected across all nodes.
201205
"""
202206
node_trackers: Dict[str, Any] = {}
207+
all_eval_results: List[JudgeResult] = []
203208
for node_key in self._path:
204209
if node_key in node_trackers:
205210
continue
@@ -229,6 +234,9 @@ async def flush(self, graph: AgentGraphDefinition, eval_tasks=None) -> None:
229234

230235
for eval_task in eval_tasks.get(node_key, []):
231236
results = await eval_task
237+
all_eval_results.extend(results)
232238
for r in results:
233239
if r.success:
234240
config_tracker.track_judge_result(r)
241+
242+
return all_eval_results

packages/ai-providers/server-ai-openai/src/ldai_openai/openai_agent_graph_runner.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,10 @@ async def run(self, input: Any) -> AgentGraphResult:
9191
root_agent = self._build_agents(path, state, tracker)
9292
result = await Runner.run(root_agent, input_str)
9393
self._flush_final_segment(state, result, input_str)
94+
all_eval_results = []
9495
for node_tracker, eval_task in state.pending_eval_tasks:
9596
eval_results = await eval_task
97+
all_eval_results.extend(eval_results)
9698
for r in eval_results:
9799
if r.success:
98100
node_tracker.track_judge_result(r)
@@ -111,6 +113,7 @@ async def run(self, input: Any) -> AgentGraphResult:
111113
output=str(result.final_output),
112114
raw=result,
113115
metrics=LDAIMetrics(success=True, usage=token_usage),
116+
evaluations=all_eval_results,
114117
)
115118
except Exception as exc:
116119
if isinstance(exc, ImportError):
@@ -251,8 +254,7 @@ def _handle_handoff(
251254
) -> None:
252255
path.append(tgt)
253256
state.last_node_key = tgt
254-
if tracker:
255-
tracker.track_handoff_success(src, tgt)
257+
tracker.track_handoff_success(src, tgt)
256258

257259
now_ns = time.perf_counter_ns()
258260
duration_ms = (now_ns - state.last_handoff_ns) // 1_000_000
@@ -275,6 +277,8 @@ def _handle_handoff(
275277

276278
src_node = self._graph.get_node(src)
277279
if src_node is not None:
280+
# The OpenAI Agents SDK does not expose the agent's text output at
281+
# handoff time via RunContextWrapper, so output_text is empty here.
278282
eval_task = src_node.get_config().evaluator.evaluate(input_str, '')
279283
state.pending_eval_tasks.append((config_tracker, eval_task))
280284

packages/sdk/server-ai/src/ldai/providers/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,4 +110,4 @@ class AgentGraphResult:
110110
output: str
111111
raw: Any
112112
metrics: LDAIMetrics
113-
evaluations: Optional[List[Any]] = None
113+
evaluations: Optional[List[JudgeResult]] = None

0 commit comments

Comments
 (0)