11"""Tests for ManagedAgent."""
22
3+ import asyncio
4+ from typing import List
35from unittest .mock import AsyncMock , MagicMock
46
57import pytest
68from ldclient import Config , Context , LDClient
79from ldclient .integrations .test_data import TestData
810
911from ldai import LDAIClient , ManagedAgent
12+ from ldai .evaluator import Evaluator
1013from ldai .managed_agent import ManagedAgent
1114from ldai .models import AIAgentConfig , AIAgentConfigDefault , ModelConfig , ProviderConfig
12- from ldai .providers .types import LDAIMetrics , ManagedResult , RunnerResult
13- from ldai .tracker import LDAIMetricSummary
15+ from ldai .providers .types import JudgeResult , LDAIMetrics , ManagedResult , RunnerResult
16+ from ldai .tracker import LDAIConfigTracker , LDAIMetricSummary
1417
1518
1619def _make_summary (success : bool = True ) -> LDAIMetricSummary :
@@ -19,6 +22,23 @@ def _make_summary(success: bool = True) -> LDAIMetricSummary:
1922 return summary
2023
2124
25+ def _make_noop_evaluator_config () -> MagicMock :
26+ """Build a minimal mock AIAgentConfig with a noop evaluator and a mock tracker."""
27+ mock_config = MagicMock (spec = AIAgentConfig )
28+ mock_tracker = MagicMock (spec = LDAIConfigTracker )
29+ mock_tracker .track_metrics_of_async = AsyncMock (
30+ return_value = RunnerResult (
31+ content = "Test response" ,
32+ raw = None ,
33+ metrics = LDAIMetrics (success = True , usage = None ),
34+ )
35+ )
36+ mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
37+ mock_config .create_tracker = MagicMock (return_value = mock_tracker )
38+ mock_config .evaluator = Evaluator .noop ()
39+ return mock_config
40+
41+
2242@pytest .fixture
2343def td () -> TestData :
2444 td = TestData .data_source ()
@@ -60,17 +80,7 @@ class TestManagedAgentRun:
6080 @pytest .mark .asyncio
6181 async def test_run_delegates_to_agent_runner (self ):
6282 """Should delegate run() to the underlying AgentRunner and return ManagedResult."""
63- mock_config = MagicMock (spec = AIAgentConfig )
64- mock_tracker = MagicMock ()
65- mock_tracker .track_metrics_of_async = AsyncMock (
66- return_value = RunnerResult (
67- content = "Test response" ,
68- metrics = LDAIMetrics (success = True , usage = None ),
69- raw = None ,
70- )
71- )
72- mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
73- mock_config .create_tracker = MagicMock (return_value = mock_tracker )
83+ mock_config = _make_noop_evaluator_config ()
7484 mock_runner = MagicMock ()
7585 mock_runner .run = AsyncMock (
7686 return_value = RunnerResult (
@@ -87,13 +97,16 @@ async def test_run_delegates_to_agent_runner(self):
8797 assert result .content == "Test response"
8898 assert result .metrics .success is True
8999 mock_config .create_tracker .assert_called_once ()
90- mock_tracker .track_metrics_of_async .assert_called_once ()
100+ mock_config .create_tracker .return_value .track_metrics_of_async .assert_called_once ()
101+ # evaluations should be present (from noop evaluator)
102+ if result .evaluations is not None :
103+ await result .evaluations
91104
92105 @pytest .mark .asyncio
93106 async def test_run_uses_create_tracker_for_fresh_tracker (self ):
94107 """Should use create_tracker() factory for a fresh tracker per invocation."""
95108 mock_config = MagicMock (spec = AIAgentConfig )
96- fresh_tracker = MagicMock ()
109+ fresh_tracker = MagicMock (spec = LDAIConfigTracker )
97110 fresh_tracker .track_metrics_of_async = AsyncMock (
98111 return_value = RunnerResult (
99112 content = "Fresh tracker response" ,
@@ -103,6 +116,7 @@ async def test_run_uses_create_tracker_for_fresh_tracker(self):
103116 )
104117 fresh_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
105118 mock_config .create_tracker = MagicMock (return_value = fresh_tracker )
119+ mock_config .evaluator = Evaluator .noop ()
106120
107121 mock_runner = MagicMock ()
108122
@@ -113,6 +127,8 @@ async def test_run_uses_create_tracker_for_fresh_tracker(self):
113127 assert result .content == "Fresh tracker response"
114128 mock_config .create_tracker .assert_called_once ()
115129 fresh_tracker .track_metrics_of_async .assert_called_once ()
130+ if result .evaluations is not None :
131+ await result .evaluations
116132
117133 def test_get_agent_runner_returns_runner (self ):
118134 """Should return the underlying AgentRunner."""
@@ -129,6 +145,168 @@ def test_get_config_returns_config(self):
129145 assert agent .get_config () is mock_config
130146
131147
148+ class TestManagedAgentEvaluations :
149+ """Tests for ManagedAgent evaluations chain (PR 12)."""
150+
151+ @pytest .mark .asyncio
152+ async def test_run_returns_before_evaluations_resolve (self ):
153+ """run() should return before evaluations complete."""
154+ barrier = asyncio .Event ()
155+
156+ async def _slow_evaluate (input_text : str , output_text : str ) -> List [JudgeResult ]:
157+ await barrier .wait ()
158+ return []
159+
160+ mock_evaluator = MagicMock (spec = Evaluator )
161+ mock_evaluator .evaluate = MagicMock (
162+ side_effect = lambda i , o : asyncio .create_task (_slow_evaluate (i , o ))
163+ )
164+
165+ mock_config = MagicMock (spec = AIAgentConfig )
166+ mock_tracker = MagicMock (spec = LDAIConfigTracker )
167+ mock_tracker .track_metrics_of_async = AsyncMock (
168+ return_value = RunnerResult (content = "resp" , raw = None , metrics = LDAIMetrics (success = True ))
169+ )
170+ mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
171+ mock_config .create_tracker = MagicMock (return_value = mock_tracker )
172+ mock_config .evaluator = mock_evaluator
173+
174+ mock_runner = MagicMock ()
175+ agent = ManagedAgent (mock_config , mock_runner )
176+ result = await agent .run ("Hello" )
177+
178+ assert result is not None
179+ assert result .evaluations is not None
180+ assert not result .evaluations .done (), "evaluations task should still be pending"
181+
182+ barrier .set ()
183+ await result .evaluations
184+
185+ @pytest .mark .asyncio
186+ async def test_await_evaluations_collects_results (self ):
187+ """await result.evaluations should return the list of JudgeResult instances."""
188+ judge_result = JudgeResult (
189+ judge_config_key = 'judge-key' ,
190+ success = True ,
191+ sampled = True ,
192+ metric_key = '$ld:ai:judge:relevance' ,
193+ score = 0.9 ,
194+ reasoning = 'Good agent response' ,
195+ )
196+
197+ async def _evaluate_coro (input_text : str , output_text : str ) -> List [JudgeResult ]:
198+ return [judge_result ]
199+
200+ mock_evaluator = MagicMock (spec = Evaluator )
201+ mock_evaluator .evaluate = MagicMock (
202+ side_effect = lambda i , o : asyncio .create_task (_evaluate_coro (i , o ))
203+ )
204+
205+ mock_config = MagicMock (spec = AIAgentConfig )
206+ mock_tracker = MagicMock (spec = LDAIConfigTracker )
207+ mock_tracker .track_metrics_of_async = AsyncMock (
208+ return_value = RunnerResult (content = "resp" , raw = None , metrics = LDAIMetrics (success = True ))
209+ )
210+ mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
211+ mock_tracker .track_judge_result = MagicMock ()
212+ mock_config .create_tracker = MagicMock (return_value = mock_tracker )
213+ mock_config .evaluator = mock_evaluator
214+
215+ mock_runner = MagicMock ()
216+ agent = ManagedAgent (mock_config , mock_runner )
217+ result = await agent .run ("Hello" )
218+
219+ results = await result .evaluations # type: ignore[misc]
220+ assert results == [judge_result ]
221+
222+ @pytest .mark .asyncio
223+ async def test_tracking_fires_inside_awaited_chain (self ):
224+ """tracker.track_judge_result() must be called when evaluations are awaited."""
225+ judge_result = JudgeResult (
226+ judge_config_key = 'agent-judge' ,
227+ success = True ,
228+ sampled = True ,
229+ metric_key = '$ld:ai:judge:relevance' ,
230+ score = 0.85 ,
231+ )
232+
233+ async def _evaluate_coro (input_text : str , output_text : str ) -> List [JudgeResult ]:
234+ return [judge_result ]
235+
236+ mock_evaluator = MagicMock (spec = Evaluator )
237+ mock_evaluator .evaluate = MagicMock (
238+ side_effect = lambda i , o : asyncio .create_task (_evaluate_coro (i , o ))
239+ )
240+
241+ mock_config = MagicMock (spec = AIAgentConfig )
242+ mock_tracker = MagicMock (spec = LDAIConfigTracker )
243+ mock_tracker .track_metrics_of_async = AsyncMock (
244+ return_value = RunnerResult (content = "resp" , raw = None , metrics = LDAIMetrics (success = True ))
245+ )
246+ mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
247+ mock_tracker .track_judge_result = MagicMock ()
248+ mock_config .create_tracker = MagicMock (return_value = mock_tracker )
249+ mock_config .evaluator = mock_evaluator
250+
251+ mock_runner = MagicMock ()
252+ agent = ManagedAgent (mock_config , mock_runner )
253+ result = await agent .run ("Hello" )
254+
255+ # Tracking should NOT have fired yet (before we await evaluations)
256+ mock_tracker .track_judge_result .assert_not_called ()
257+
258+ # Now await the evaluations task — tracking fires inside the chain
259+ await result .evaluations # type: ignore[misc]
260+
261+ mock_tracker .track_judge_result .assert_called_once_with (judge_result )
262+
263+ @pytest .mark .asyncio
264+ async def test_noop_evaluator_returns_empty_list (self ):
265+ """With a noop evaluator, awaiting evaluations should return an empty list."""
266+ mock_config = _make_noop_evaluator_config ()
267+ mock_runner = MagicMock ()
268+ agent = ManagedAgent (mock_config , mock_runner )
269+ result = await agent .run ("Hello" )
270+
271+ results = await result .evaluations # type: ignore[misc]
272+ assert results == []
273+
274+ @pytest .mark .asyncio
275+ async def test_tracking_not_called_for_failed_judge_result (self ):
276+ """tracker.track_judge_result() should NOT be called for unsuccessful judge results."""
277+ failed_result = JudgeResult (
278+ success = False ,
279+ sampled = True ,
280+ metric_key = '$ld:ai:judge:relevance' ,
281+ error_message = 'Judge evaluation failed' ,
282+ )
283+
284+ async def _evaluate_coro (input_text : str , output_text : str ) -> List [JudgeResult ]:
285+ return [failed_result ]
286+
287+ mock_evaluator = MagicMock (spec = Evaluator )
288+ mock_evaluator .evaluate = MagicMock (
289+ side_effect = lambda i , o : asyncio .create_task (_evaluate_coro (i , o ))
290+ )
291+
292+ mock_config = MagicMock (spec = AIAgentConfig )
293+ mock_tracker = MagicMock (spec = LDAIConfigTracker )
294+ mock_tracker .track_metrics_of_async = AsyncMock (
295+ return_value = RunnerResult (content = "resp" , raw = None , metrics = LDAIMetrics (success = True ))
296+ )
297+ mock_tracker .get_summary = MagicMock (return_value = _make_summary (True ))
298+ mock_tracker .track_judge_result = MagicMock ()
299+ mock_config .create_tracker = MagicMock (return_value = mock_tracker )
300+ mock_config .evaluator = mock_evaluator
301+
302+ mock_runner = MagicMock ()
303+ agent = ManagedAgent (mock_config , mock_runner )
304+ result = await agent .run ("Hello" )
305+ await result .evaluations # type: ignore[misc]
306+
307+ mock_tracker .track_judge_result .assert_not_called ()
308+
309+
132310class TestLDAIClientCreateAgent :
133311 """Tests for LDAIClient.create_agent."""
134312
0 commit comments