11"""Tests for ManagedAgent."""
22
3+ import asyncio
34import pytest
5+ from typing import List
46from unittest .mock import AsyncMock , MagicMock
57
68from ldai import LDAIClient , ManagedAgent
9+ from ldai .evaluator import Evaluator
710from ldai .managed_agent import ManagedAgent
811from ldai .models import AIAgentConfig , AIAgentConfigDefault , ModelConfig , ProviderConfig
912from ldai .providers import AgentResult
10- from ldai .providers .types import LDAIMetrics , ManagedResult
11- from ldai .tracker import LDAIMetricSummary
13+ from ldai .providers .types import JudgeResult , LDAIMetrics , ManagedResult
14+ from ldai .tracker import LDAIConfigTracker , LDAIMetricSummary
1215
1316from ldclient import Config , Context , LDClient
1417from ldclient .integrations .test_data import TestData
@@ -20,6 +23,23 @@ def _make_summary(success: bool = True) -> LDAIMetricSummary:
2023 return summary
2124
2225
26+ def _make_noop_evaluator_config () -> MagicMock :
27+ """Build a minimal mock AIAgentConfig with a noop evaluator and a mock tracker."""
28+ mock_config = MagicMock (spec = AIAgentConfig )
29+ mock_tracker = MagicMock (spec = LDAIConfigTracker )
30+ mock_tracker .track_metrics_of_async = AsyncMock (
31+ return_value = AgentResult (
32+ output = "Test response" ,
33+ raw = None ,
34+ metrics = LDAIMetrics (success = True , usage = None ),
35+ )
36+ )
37+ mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
38+ mock_config .create_tracker = MagicMock (return_value = mock_tracker )
39+ mock_config .evaluator = Evaluator .noop ()
40+ return mock_config
41+
42+
2343@pytest .fixture
2444def td () -> TestData :
2545 td = TestData .data_source ()
@@ -61,17 +81,7 @@ class TestManagedAgentRun:
6181 @pytest .mark .asyncio
6282 async def test_run_delegates_to_agent_runner (self ):
6383 """Should delegate run() to the underlying AgentRunner and return ManagedResult."""
64- mock_config = MagicMock (spec = AIAgentConfig )
65- mock_tracker = MagicMock ()
66- mock_tracker .track_metrics_of_async = AsyncMock (
67- return_value = AgentResult (
68- output = "Test response" ,
69- raw = None ,
70- metrics = LDAIMetrics (success = True , usage = None ),
71- )
72- )
73- mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
74- mock_config .create_tracker = MagicMock (return_value = mock_tracker )
84+ mock_config = _make_noop_evaluator_config ()
7585 mock_runner = MagicMock ()
7686 mock_runner .run = AsyncMock (
7787 return_value = AgentResult (
@@ -88,13 +98,16 @@ async def test_run_delegates_to_agent_runner(self):
8898 assert result .content == "Test response"
8999 assert result .metrics .success is True
90100 mock_config .create_tracker .assert_called_once ()
91- mock_tracker .track_metrics_of_async .assert_called_once ()
101+ mock_config .create_tracker .return_value .track_metrics_of_async .assert_called_once ()
102+ # evaluations should be present (from noop evaluator)
103+ if result .evaluations is not None :
104+ await result .evaluations
92105
93106 @pytest .mark .asyncio
94107 async def test_run_uses_create_tracker_for_fresh_tracker (self ):
95108 """Should use create_tracker() factory for a fresh tracker per invocation."""
96109 mock_config = MagicMock (spec = AIAgentConfig )
97- fresh_tracker = MagicMock ()
110+ fresh_tracker = MagicMock (spec = LDAIConfigTracker )
98111 fresh_tracker .track_metrics_of_async = AsyncMock (
99112 return_value = AgentResult (
100113 output = "Fresh tracker response" ,
@@ -104,6 +117,7 @@ async def test_run_uses_create_tracker_for_fresh_tracker(self):
104117 )
105118 fresh_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
106119 mock_config .create_tracker = MagicMock (return_value = fresh_tracker )
120+ mock_config .evaluator = Evaluator .noop ()
107121
108122 mock_runner = MagicMock ()
109123
@@ -114,6 +128,8 @@ async def test_run_uses_create_tracker_for_fresh_tracker(self):
114128 assert result .content == "Fresh tracker response"
115129 mock_config .create_tracker .assert_called_once ()
116130 fresh_tracker .track_metrics_of_async .assert_called_once ()
131+ if result .evaluations is not None :
132+ await result .evaluations
117133
118134 def test_get_agent_runner_returns_runner (self ):
119135 """Should return the underlying AgentRunner."""
@@ -130,6 +146,168 @@ def test_get_config_returns_config(self):
130146 assert agent .get_config () is mock_config
131147
132148
149+ class TestManagedAgentEvaluations :
150+ """Tests for ManagedAgent evaluations chain (PR 12)."""
151+
152+ @pytest .mark .asyncio
153+ async def test_run_returns_before_evaluations_resolve (self ):
154+ """run() should return before evaluations complete."""
155+ barrier = asyncio .Event ()
156+
157+ async def _slow_evaluate (input_text : str , output_text : str ) -> List [JudgeResult ]:
158+ await barrier .wait ()
159+ return []
160+
161+ mock_evaluator = MagicMock (spec = Evaluator )
162+ mock_evaluator .evaluate = MagicMock (
163+ side_effect = lambda i , o : asyncio .create_task (_slow_evaluate (i , o ))
164+ )
165+
166+ mock_config = MagicMock (spec = AIAgentConfig )
167+ mock_tracker = MagicMock (spec = LDAIConfigTracker )
168+ mock_tracker .track_metrics_of_async = AsyncMock (
169+ return_value = AgentResult (output = "resp" , raw = None , metrics = LDAIMetrics (success = True ))
170+ )
171+ mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
172+ mock_config .create_tracker = MagicMock (return_value = mock_tracker )
173+ mock_config .evaluator = mock_evaluator
174+
175+ mock_runner = MagicMock ()
176+ agent = ManagedAgent (mock_config , mock_runner )
177+ result = await agent .run ("Hello" )
178+
179+ assert result is not None
180+ assert result .evaluations is not None
181+ assert not result .evaluations .done (), "evaluations task should still be pending"
182+
183+ barrier .set ()
184+ await result .evaluations
185+
186+ @pytest .mark .asyncio
187+ async def test_await_evaluations_collects_results (self ):
188+ """await result.evaluations should return the list of JudgeResult instances."""
189+ judge_result = JudgeResult (
190+ judge_config_key = 'judge-key' ,
191+ success = True ,
192+ sampled = True ,
193+ metric_key = '$ld:ai:judge:relevance' ,
194+ score = 0.9 ,
195+ reasoning = 'Good agent response' ,
196+ )
197+
198+ async def _evaluate_coro (input_text : str , output_text : str ) -> List [JudgeResult ]:
199+ return [judge_result ]
200+
201+ mock_evaluator = MagicMock (spec = Evaluator )
202+ mock_evaluator .evaluate = MagicMock (
203+ side_effect = lambda i , o : asyncio .create_task (_evaluate_coro (i , o ))
204+ )
205+
206+ mock_config = MagicMock (spec = AIAgentConfig )
207+ mock_tracker = MagicMock (spec = LDAIConfigTracker )
208+ mock_tracker .track_metrics_of_async = AsyncMock (
209+ return_value = AgentResult (output = "resp" , raw = None , metrics = LDAIMetrics (success = True ))
210+ )
211+ mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
212+ mock_tracker .track_judge_result = MagicMock ()
213+ mock_config .create_tracker = MagicMock (return_value = mock_tracker )
214+ mock_config .evaluator = mock_evaluator
215+
216+ mock_runner = MagicMock ()
217+ agent = ManagedAgent (mock_config , mock_runner )
218+ result = await agent .run ("Hello" )
219+
220+ results = await result .evaluations # type: ignore[misc]
221+ assert results == [judge_result ]
222+
223+ @pytest .mark .asyncio
224+ async def test_tracking_fires_inside_awaited_chain (self ):
225+ """tracker.track_judge_result() must be called when evaluations are awaited."""
226+ judge_result = JudgeResult (
227+ judge_config_key = 'agent-judge' ,
228+ success = True ,
229+ sampled = True ,
230+ metric_key = '$ld:ai:judge:relevance' ,
231+ score = 0.85 ,
232+ )
233+
234+ async def _evaluate_coro (input_text : str , output_text : str ) -> List [JudgeResult ]:
235+ return [judge_result ]
236+
237+ mock_evaluator = MagicMock (spec = Evaluator )
238+ mock_evaluator .evaluate = MagicMock (
239+ side_effect = lambda i , o : asyncio .create_task (_evaluate_coro (i , o ))
240+ )
241+
242+ mock_config = MagicMock (spec = AIAgentConfig )
243+ mock_tracker = MagicMock (spec = LDAIConfigTracker )
244+ mock_tracker .track_metrics_of_async = AsyncMock (
245+ return_value = AgentResult (output = "resp" , raw = None , metrics = LDAIMetrics (success = True ))
246+ )
247+ mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
248+ mock_tracker .track_judge_result = MagicMock ()
249+ mock_config .create_tracker = MagicMock (return_value = mock_tracker )
250+ mock_config .evaluator = mock_evaluator
251+
252+ mock_runner = MagicMock ()
253+ agent = ManagedAgent (mock_config , mock_runner )
254+ result = await agent .run ("Hello" )
255+
256+ # Tracking should NOT have fired yet (before we await evaluations)
257+ mock_tracker .track_judge_result .assert_not_called ()
258+
259+ # Now await the evaluations task — tracking fires inside the chain
260+ await result .evaluations # type: ignore[misc]
261+
262+ mock_tracker .track_judge_result .assert_called_once_with (judge_result )
263+
264+ @pytest .mark .asyncio
265+ async def test_noop_evaluator_returns_empty_list (self ):
266+ """With a noop evaluator, awaiting evaluations should return an empty list."""
267+ mock_config = _make_noop_evaluator_config ()
268+ mock_runner = MagicMock ()
269+ agent = ManagedAgent (mock_config , mock_runner )
270+ result = await agent .run ("Hello" )
271+
272+ results = await result .evaluations # type: ignore[misc]
273+ assert results == []
274+
275+ @pytest .mark .asyncio
276+ async def test_tracking_not_called_for_failed_judge_result (self ):
277+ """tracker.track_judge_result() should NOT be called for unsuccessful judge results."""
278+ failed_result = JudgeResult (
279+ success = False ,
280+ sampled = True ,
281+ metric_key = '$ld:ai:judge:relevance' ,
282+ error_message = 'Judge evaluation failed' ,
283+ )
284+
285+ async def _evaluate_coro (input_text : str , output_text : str ) -> List [JudgeResult ]:
286+ return [failed_result ]
287+
288+ mock_evaluator = MagicMock (spec = Evaluator )
289+ mock_evaluator .evaluate = MagicMock (
290+ side_effect = lambda i , o : asyncio .create_task (_evaluate_coro (i , o ))
291+ )
292+
293+ mock_config = MagicMock (spec = AIAgentConfig )
294+ mock_tracker = MagicMock (spec = LDAIConfigTracker )
295+ mock_tracker .track_metrics_of_async = AsyncMock (
296+ return_value = AgentResult (output = "resp" , raw = None , metrics = LDAIMetrics (success = True ))
297+ )
298+ mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
299+ mock_tracker .track_judge_result = MagicMock ()
300+ mock_config .create_tracker = MagicMock (return_value = mock_tracker )
301+ mock_config .evaluator = mock_evaluator
302+
303+ mock_runner = MagicMock ()
304+ agent = ManagedAgent (mock_config , mock_runner )
305+ result = await agent .run ("Hello" )
306+ await result .evaluations # type: ignore[misc]
307+
308+ mock_tracker .track_judge_result .assert_not_called ()
309+
310+
133311class TestLDAIClientCreateAgent :
134312 """Tests for LDAIClient.create_agent."""
135313
0 commit comments