11"""Unit tests for LLM Manager."""
22
3+ import logging
4+
35import pytest
46from pytest_mock import MockerFixture
57
6- from lightspeed_evaluation .core .models import LLMConfig , SystemConfig
8+ from lightspeed_evaluation .core .models import (
9+ LLMConfig ,
10+ SystemConfig ,
11+ LLMPoolConfig ,
12+ JudgePanelConfig ,
13+ )
14+ from lightspeed_evaluation .core .models .system import (
15+ LLMDefaultsConfig ,
16+ LLMParametersConfig ,
17+ LLMProviderConfig ,
18+ )
719from lightspeed_evaluation .core .llm .manager import LLMManager
820
921
@@ -128,7 +140,7 @@ def test_initialization_hosted_vllm(self, mocker: MockerFixture) -> None:
128140 assert manager .model_name == "hosted_vllm/mistral-7b"
129141
130142 def test_initialization_generic_provider (
131- self , mocker : MockerFixture , capsys : pytest .CaptureFixture
143+ self , mocker : MockerFixture , caplog : pytest .LogCaptureFixture
132144 ) -> None :
133145 """Test initialization with unknown/generic provider."""
134146 config = LLMConfig (
@@ -138,14 +150,14 @@ def test_initialization_generic_provider(
138150 )
139151 mocker .patch ("lightspeed_evaluation.core.llm.manager.validate_provider_env" )
140152
141- manager = LLMManager (config )
153+ with caplog .at_level (logging .WARNING ):
154+ manager = LLMManager (config )
142155
143156 # Should construct generic model name
144157 assert manager .model_name == "custom_provider/custom-model"
145158
146- # Should print warning
147- captured = capsys .readouterr ()
148- assert "generic" in captured .out .lower () or "warning" in captured .out .lower ()
159+ # Should log warning about generic provider
160+ assert any ("generic" in record .message .lower () for record in caplog .records )
149161
150162 def test_get_model_name (
151163 self , basic_llm_config : LLMConfig , mocker : MockerFixture
@@ -231,18 +243,147 @@ def test_llm_params_with_custom_values(self, mocker: MockerFixture) -> None:
231243 assert params ["timeout" ] == 120
232244 assert params ["num_retries" ] == 5
233245
234- def test_initialization_prints_message (
246+ def test_initialization_logs_message (
235247 self ,
236248 basic_llm_config : LLMConfig ,
237249 mocker : MockerFixture ,
238- capsys : pytest .CaptureFixture ,
250+ caplog : pytest .LogCaptureFixture ,
239251 ) -> None :
240- """Test that initialization prints configuration message."""
252+ """Test that initialization logs configuration message."""
241253 mocker .patch ("lightspeed_evaluation.core.llm.manager.validate_provider_env" )
242254
243- LLMManager (basic_llm_config )
255+ with caplog .at_level (logging .INFO ):
256+ LLMManager (basic_llm_config )
257+
258+ # Should log LLM manager info
259+ assert any ("LLM Manager" in record .message for record in caplog .records )
260+ assert any ("openai" in record .message for record in caplog .records )
261+ assert any ("gpt-4" in record .message for record in caplog .records )
262+
263+
264+ def _create_llm_pool_with_judges (
265+ judges : list [tuple [str , str ]],
266+ enabled_metrics : list [str ] | None = None ,
267+ ) -> tuple [LLMPoolConfig , JudgePanelConfig ]:
268+ """Helper to create LLMPoolConfig and JudgePanelConfig from judge list.
269+
270+ Args:
271+ judges: List of (provider, model) tuples.
272+ enabled_metrics: Optional list of metrics to enable for panel.
273+ """
274+ models : dict [str , LLMProviderConfig ] = {}
275+ for provider , model in judges :
276+ models [model ] = LLMProviderConfig (provider = provider )
277+
278+ pool = LLMPoolConfig (
279+ defaults = LLMDefaultsConfig (
280+ parameters = LLMParametersConfig (temperature = 0.0 , max_completion_tokens = 512 )
281+ ),
282+ models = models ,
283+ )
284+ judge_ids = [model for _ , model in judges ]
285+ panel = JudgePanelConfig (judges = judge_ids , enabled_metrics = enabled_metrics )
286+ return pool , panel
287+
288+
289+ class TestLLMManagerJudgePanel :
290+ """Tests for LLMManager judge panel functionality."""
291+
292+ def test_without_judge_panel (self , mocker : MockerFixture ) -> None :
293+ """Test LLMManager without judge panel configured."""
294+ mocker .patch ("lightspeed_evaluation.core.llm.manager.validate_provider_env" )
295+ manager = LLMManager (LLMConfig (provider = "openai" , model = "gpt-4o-mini" ))
296+
297+ assert not manager .has_judge_panel ()
298+ assert len (manager .judge_managers ) == 0
299+ assert len (manager .get_judge_managers ()) == 1
300+ assert manager .get_primary_judge () is manager
301+ assert not manager .should_use_panel_for_metric ("ragas:faithfulness" )
302+
303+ def test_with_judge_panel (self , mocker : MockerFixture ) -> None :
304+ """Test LLMManager with judge panel configured."""
305+ mocker .patch ("lightspeed_evaluation.core.llm.manager.validate_provider_env" )
306+
307+ pool , panel = _create_llm_pool_with_judges (
308+ [
309+ ("openai" , "gpt-4o-mini" ),
310+ ("openai" , "gpt-4o" ),
311+ ("gemini" , "gemini-2.0-flash-exp" ),
312+ ]
313+ )
314+ system_config = SystemConfig (llm_pool = pool , judge_panel = panel )
315+ manager = LLMManager .from_system_config (system_config )
316+
317+ # Panel detected
318+ assert manager .has_judge_panel ()
319+ assert len (manager .judge_managers ) == 3
320+
321+ # Judge managers
322+ judges = manager .get_judge_managers ()
323+ assert len (judges ) == 3
324+ assert judges [0 ].config .model == "gpt-4o-mini"
325+ assert judges [1 ].config .model == "gpt-4o"
326+ assert judges [2 ].config .model == "gemini-2.0-flash-exp"
327+
328+ # Primary judge is first
329+ assert manager .get_primary_judge ().config .model == "gpt-4o-mini"
330+
331+ def test_should_use_panel_with_enabled_metrics (self , mocker : MockerFixture ) -> None :
332+ """Test should_use_panel with enabled_metrics."""
333+ mocker .patch ("lightspeed_evaluation.core.llm.manager.validate_provider_env" )
334+
335+ pool , panel = _create_llm_pool_with_judges (
336+ [("openai" , "gpt-4o-mini" )],
337+ enabled_metrics = ["ragas:faithfulness" , "custom:answer_correctness" ],
338+ )
339+ system_config = SystemConfig (llm_pool = pool , judge_panel = panel )
340+ manager = LLMManager .from_system_config (system_config )
244341
245- captured = capsys .readouterr ()
246- assert "LLM Manager" in captured .out
247- assert "openai" in captured .out
248- assert "gpt-4" in captured .out
342+ # Metric in list - use panel
343+ assert manager .should_use_panel_for_metric ("ragas:faithfulness" )
344+ assert manager .should_use_panel_for_metric ("custom:answer_correctness" )
345+
346+ # Metric not in list - don't use panel
347+ assert not manager .should_use_panel_for_metric ("ragas:response_relevancy" )
348+
349+ def test_should_use_panel_with_enabled_metrics_none (
350+ self , mocker : MockerFixture
351+ ) -> None :
352+ """Test should_use_panel when enabled_metrics is None (all metrics)."""
353+ mocker .patch ("lightspeed_evaluation.core.llm.manager.validate_provider_env" )
354+
355+ # enabled_metrics=None is the default, meaning all metrics use panel
356+ pool , panel = _create_llm_pool_with_judges (
357+ [("openai" , "gpt-4o-mini" )],
358+ enabled_metrics = None ,
359+ )
360+ system_config = SystemConfig (llm_pool = pool , judge_panel = panel )
361+ manager = LLMManager .from_system_config (system_config )
362+
363+ # All metrics use panel
364+ assert manager .should_use_panel_for_metric ("ragas:faithfulness" )
365+ assert manager .should_use_panel_for_metric ("custom:answer_correctness" )
366+ assert manager .should_use_panel_for_metric ("deepeval:conversation_completeness" )
367+
368+ def test_judge_panel_logs_message (
369+ self , mocker : MockerFixture , caplog : pytest .LogCaptureFixture
370+ ) -> None :
371+ """Test judge panel initialization logs messages."""
372+ mocker .patch ("lightspeed_evaluation.core.llm.manager.validate_provider_env" )
373+
374+ pool , panel = _create_llm_pool_with_judges (
375+ [
376+ ("openai" , "gpt-4o-mini" ),
377+ ("openai" , "gpt-4o" ),
378+ ]
379+ )
380+ system_config = SystemConfig (llm_pool = pool , judge_panel = panel )
381+
382+ with caplog .at_level (logging .INFO ):
383+ LLMManager .from_system_config (system_config )
384+
385+ # Should log judge panel info
386+ assert any (
387+ "Judge panel" in record .message and "2 judges" in record .message
388+ for record in caplog .records
389+ )
0 commit comments