Skip to content

Commit 3fe356c

Browse files
authored
Merge pull request lightspeed-core#164 from asamal4/judge-panel-manager
[LEADS-191] feat: handle judge panel config in manager
2 parents b6ab879 + 928059a commit 3fe356c

3 files changed

Lines changed: 300 additions & 41 deletions

File tree

src/lightspeed_evaluation/core/llm/manager.py

Lines changed: 125 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
"""LLM Manager - Generic LLM configuration, validation, and parameter provider."""
22

3+
import logging
34
import os
4-
from typing import Any
5+
from typing import Any, Optional
56

67
from lightspeed_evaluation.core.models import LLMConfig, SystemConfig
78
from lightspeed_evaluation.core.system.env_validator import validate_provider_env
89

10+
logger = logging.getLogger(__name__)
11+
912

1013
class LLMManager:
1114
"""Generic LLM Manager for all use cases (Ragas, DeepEval, Custom metrics).
@@ -14,19 +17,58 @@ class LLMManager:
1417
- Environment validation for multiple providers
1518
- Model name construction
1619
- Provides LLM parameters for consumption by framework-specific managers
20+
- Manages judge panel configurations when available
1721
"""
1822

19-
def __init__(self, config: LLMConfig):
20-
"""Initialize with validated environment and constructed model name."""
23+
def __init__(
24+
self,
25+
config: LLMConfig,
26+
system_config: Optional[SystemConfig] = None,
27+
):
28+
"""Initialize with validated environment and constructed model name.
29+
30+
Args:
31+
config: Primary LLM configuration (also used as fallback)
32+
system_config: Optional full system config for judge panel support
33+
"""
2134
self.config = config
22-
self.model_name = self._construct_model_name_and_validate()
23-
print(
24-
f"✅ LLM Manager: {self.config.provider}/{self.config.model} -> {self.model_name}"
25-
)
26-
27-
def _construct_model_name_and_validate(self) -> str:
28-
"""Construct model name and validate required environment variables."""
29-
provider = self.config.provider.lower()
35+
self.system_config = system_config
36+
self.model_name = self._construct_model_name_and_validate(config)
37+
38+
# Initialize judge panel if available
39+
self.judge_managers: list["LLMManager"] = []
40+
if system_config and system_config.judge_panel and system_config.llm_pool:
41+
panel = system_config.judge_panel
42+
logger.info("Judge panel configured with %d judges", len(panel.judges))
43+
# Create LLM managers for each judge using resolved configs from llms pool
44+
try:
45+
judge_configs = system_config.get_judge_configs()
46+
for resolved_config in judge_configs:
47+
# Create child manager without system_config to avoid recursion
48+
judge_manager = LLMManager(resolved_config)
49+
self.judge_managers.append(judge_manager)
50+
except ValueError as e:
51+
logger.error("Failed to resolve judge panel: %s", e)
52+
raise
53+
else:
54+
# No judge panel - log single LLM info
55+
logger.info(
56+
"LLM Manager: %s/%s -> %s",
57+
self.config.provider,
58+
self.config.model,
59+
self.model_name,
60+
)
61+
62+
def _construct_model_name_and_validate(self, config: LLMConfig) -> str:
63+
"""Construct model name and validate required environment variables.
64+
65+
Args:
66+
config: LLM configuration to construct model name for
67+
68+
Returns:
69+
Constructed model name string
70+
"""
71+
provider = config.provider.lower()
3072

3173
# Provider-specific validation and model name construction
3274
provider_handlers = {
@@ -44,8 +86,8 @@ def _construct_model_name_and_validate(self) -> str:
4486
return provider_handlers[provider]()
4587

4688
# Generic provider - try as-is with warning
47-
print(f"⚠️ Using generic provider format for {provider}")
48-
return f"{provider}/{self.config.model}"
89+
logger.warning("Using generic provider format for %s", provider)
90+
return f"{provider}/{config.model}"
4991

5092
def _handle_hosted_vllm_provider(self) -> str:
5193
"""Handle hosted vLLM provider setup."""
@@ -88,6 +130,59 @@ def _handle_ollama_provider(self) -> str:
88130
validate_provider_env("ollama")
89131
return f"ollama/{self.config.model}"
90132

133+
def has_judge_panel(self) -> bool:
134+
"""Check if judge panel is configured.
135+
136+
Returns:
137+
True if judge panel is configured (one or more judges)
138+
"""
139+
return len(self.judge_managers) > 0
140+
141+
def get_judge_managers(self) -> list["LLMManager"]:
142+
"""Get list of judge LLM managers.
143+
144+
Returns:
145+
List of LLMManager instances. If no panel configured, returns list
146+
with single manager (self). Always returns at least one manager.
147+
"""
148+
if self.judge_managers:
149+
return self.judge_managers
150+
# No panel - return self as single judge
151+
return [self]
152+
153+
def get_primary_judge(self) -> "LLMManager":
154+
"""Get primary judge LLM manager (first in panel or self).
155+
156+
This is used when panel is disabled for specific metrics or
157+
as fallback when panel is not configured.
158+
159+
Returns:
160+
Primary LLM manager (first judge if panel exists, otherwise self)
161+
"""
162+
if self.judge_managers:
163+
return self.judge_managers[0]
164+
return self
165+
166+
def should_use_panel_for_metric(self, metric_identifier: str) -> bool:
167+
"""Determine if a metric should use judge panel based on enabled_metrics.
168+
169+
Args:
170+
metric_identifier: Metric identifier (e.g., "ragas:faithfulness")
171+
172+
Returns:
173+
True if metric should use judge panel, False otherwise
174+
"""
175+
if self.system_config and self.system_config.judge_panel:
176+
enabled_metrics = self.system_config.judge_panel.enabled_metrics
177+
# If enabled_metrics is None, all metrics use panel
178+
if enabled_metrics is None:
179+
return True
180+
# Check if this specific metric is in the list
181+
return metric_identifier in enabled_metrics
182+
183+
# Default: Don't use panel (use primary judge only)
184+
return False
185+
91186
def get_model_name(self) -> str:
92187
"""Get the constructed model name."""
93188
return self.model_name
@@ -110,10 +205,24 @@ def get_config(self) -> LLMConfig:
110205

111206
@classmethod
112207
def from_system_config(cls, system_config: SystemConfig) -> "LLMManager":
113-
"""Create LLM Manager from system configuration."""
114-
return cls(system_config.llm)
208+
"""Create LLM Manager from system configuration.
209+
210+
Args:
211+
system_config: System configuration with LLM and optional judge panel
212+
213+
Returns:
214+
LLMManager with judge panel support if configured
215+
"""
216+
return cls(system_config.llm, system_config=system_config)
115217

116218
@classmethod
117219
def from_llm_config(cls, llm_config: LLMConfig) -> "LLMManager":
118-
"""Create LLM Manager from LLMConfig directly."""
220+
"""Create LLM Manager from LLMConfig directly (no judge panel support).
221+
222+
Args:
223+
llm_config: LLM configuration
224+
225+
Returns:
226+
LLMManager without judge panel support
227+
"""
119228
return cls(llm_config)

tests/unit/core/llm/test_llm_manager.py

Lines changed: 155 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
11
"""Unit tests for LLM Manager."""
22

3+
import logging
4+
35
import pytest
46
from pytest_mock import MockerFixture
57

6-
from lightspeed_evaluation.core.models import LLMConfig, SystemConfig
8+
from lightspeed_evaluation.core.models import (
9+
LLMConfig,
10+
SystemConfig,
11+
LLMPoolConfig,
12+
JudgePanelConfig,
13+
)
14+
from lightspeed_evaluation.core.models.system import (
15+
LLMDefaultsConfig,
16+
LLMParametersConfig,
17+
LLMProviderConfig,
18+
)
719
from lightspeed_evaluation.core.llm.manager import LLMManager
820

921

@@ -128,7 +140,7 @@ def test_initialization_hosted_vllm(self, mocker: MockerFixture) -> None:
128140
assert manager.model_name == "hosted_vllm/mistral-7b"
129141

130142
def test_initialization_generic_provider(
131-
self, mocker: MockerFixture, capsys: pytest.CaptureFixture
143+
self, mocker: MockerFixture, caplog: pytest.LogCaptureFixture
132144
) -> None:
133145
"""Test initialization with unknown/generic provider."""
134146
config = LLMConfig(
@@ -138,14 +150,14 @@ def test_initialization_generic_provider(
138150
)
139151
mocker.patch("lightspeed_evaluation.core.llm.manager.validate_provider_env")
140152

141-
manager = LLMManager(config)
153+
with caplog.at_level(logging.WARNING):
154+
manager = LLMManager(config)
142155

143156
# Should construct generic model name
144157
assert manager.model_name == "custom_provider/custom-model"
145158

146-
# Should print warning
147-
captured = capsys.readouterr()
148-
assert "generic" in captured.out.lower() or "warning" in captured.out.lower()
159+
# Should log warning about generic provider
160+
assert any("generic" in record.message.lower() for record in caplog.records)
149161

150162
def test_get_model_name(
151163
self, basic_llm_config: LLMConfig, mocker: MockerFixture
@@ -231,18 +243,147 @@ def test_llm_params_with_custom_values(self, mocker: MockerFixture) -> None:
231243
assert params["timeout"] == 120
232244
assert params["num_retries"] == 5
233245

234-
def test_initialization_prints_message(
246+
def test_initialization_logs_message(
235247
self,
236248
basic_llm_config: LLMConfig,
237249
mocker: MockerFixture,
238-
capsys: pytest.CaptureFixture,
250+
caplog: pytest.LogCaptureFixture,
239251
) -> None:
240-
"""Test that initialization prints configuration message."""
252+
"""Test that initialization logs configuration message."""
241253
mocker.patch("lightspeed_evaluation.core.llm.manager.validate_provider_env")
242254

243-
LLMManager(basic_llm_config)
255+
with caplog.at_level(logging.INFO):
256+
LLMManager(basic_llm_config)
257+
258+
# Should log LLM manager info
259+
assert any("LLM Manager" in record.message for record in caplog.records)
260+
assert any("openai" in record.message for record in caplog.records)
261+
assert any("gpt-4" in record.message for record in caplog.records)
262+
263+
264+
def _create_llm_pool_with_judges(
265+
judges: list[tuple[str, str]],
266+
enabled_metrics: list[str] | None = None,
267+
) -> tuple[LLMPoolConfig, JudgePanelConfig]:
268+
"""Helper to create LLMPoolConfig and JudgePanelConfig from judge list.
269+
270+
Args:
271+
judges: List of (provider, model) tuples.
272+
enabled_metrics: Optional list of metrics to enable for panel.
273+
"""
274+
models: dict[str, LLMProviderConfig] = {}
275+
for provider, model in judges:
276+
models[model] = LLMProviderConfig(provider=provider)
277+
278+
pool = LLMPoolConfig(
279+
defaults=LLMDefaultsConfig(
280+
parameters=LLMParametersConfig(temperature=0.0, max_completion_tokens=512)
281+
),
282+
models=models,
283+
)
284+
judge_ids = [model for _, model in judges]
285+
panel = JudgePanelConfig(judges=judge_ids, enabled_metrics=enabled_metrics)
286+
return pool, panel
287+
288+
289+
class TestLLMManagerJudgePanel:
290+
"""Tests for LLMManager judge panel functionality."""
291+
292+
def test_without_judge_panel(self, mocker: MockerFixture) -> None:
293+
"""Test LLMManager without judge panel configured."""
294+
mocker.patch("lightspeed_evaluation.core.llm.manager.validate_provider_env")
295+
manager = LLMManager(LLMConfig(provider="openai", model="gpt-4o-mini"))
296+
297+
assert not manager.has_judge_panel()
298+
assert len(manager.judge_managers) == 0
299+
assert len(manager.get_judge_managers()) == 1
300+
assert manager.get_primary_judge() is manager
301+
assert not manager.should_use_panel_for_metric("ragas:faithfulness")
302+
303+
def test_with_judge_panel(self, mocker: MockerFixture) -> None:
304+
"""Test LLMManager with judge panel configured."""
305+
mocker.patch("lightspeed_evaluation.core.llm.manager.validate_provider_env")
306+
307+
pool, panel = _create_llm_pool_with_judges(
308+
[
309+
("openai", "gpt-4o-mini"),
310+
("openai", "gpt-4o"),
311+
("gemini", "gemini-2.0-flash-exp"),
312+
]
313+
)
314+
system_config = SystemConfig(llm_pool=pool, judge_panel=panel)
315+
manager = LLMManager.from_system_config(system_config)
316+
317+
# Panel detected
318+
assert manager.has_judge_panel()
319+
assert len(manager.judge_managers) == 3
320+
321+
# Judge managers
322+
judges = manager.get_judge_managers()
323+
assert len(judges) == 3
324+
assert judges[0].config.model == "gpt-4o-mini"
325+
assert judges[1].config.model == "gpt-4o"
326+
assert judges[2].config.model == "gemini-2.0-flash-exp"
327+
328+
# Primary judge is first
329+
assert manager.get_primary_judge().config.model == "gpt-4o-mini"
330+
331+
def test_should_use_panel_with_enabled_metrics(self, mocker: MockerFixture) -> None:
332+
"""Test should_use_panel with enabled_metrics."""
333+
mocker.patch("lightspeed_evaluation.core.llm.manager.validate_provider_env")
334+
335+
pool, panel = _create_llm_pool_with_judges(
336+
[("openai", "gpt-4o-mini")],
337+
enabled_metrics=["ragas:faithfulness", "custom:answer_correctness"],
338+
)
339+
system_config = SystemConfig(llm_pool=pool, judge_panel=panel)
340+
manager = LLMManager.from_system_config(system_config)
244341

245-
captured = capsys.readouterr()
246-
assert "LLM Manager" in captured.out
247-
assert "openai" in captured.out
248-
assert "gpt-4" in captured.out
342+
# Metric in list - use panel
343+
assert manager.should_use_panel_for_metric("ragas:faithfulness")
344+
assert manager.should_use_panel_for_metric("custom:answer_correctness")
345+
346+
# Metric not in list - don't use panel
347+
assert not manager.should_use_panel_for_metric("ragas:response_relevancy")
348+
349+
def test_should_use_panel_with_enabled_metrics_none(
350+
self, mocker: MockerFixture
351+
) -> None:
352+
"""Test should_use_panel when enabled_metrics is None (all metrics)."""
353+
mocker.patch("lightspeed_evaluation.core.llm.manager.validate_provider_env")
354+
355+
# enabled_metrics=None is the default, meaning all metrics use panel
356+
pool, panel = _create_llm_pool_with_judges(
357+
[("openai", "gpt-4o-mini")],
358+
enabled_metrics=None,
359+
)
360+
system_config = SystemConfig(llm_pool=pool, judge_panel=panel)
361+
manager = LLMManager.from_system_config(system_config)
362+
363+
# All metrics use panel
364+
assert manager.should_use_panel_for_metric("ragas:faithfulness")
365+
assert manager.should_use_panel_for_metric("custom:answer_correctness")
366+
assert manager.should_use_panel_for_metric("deepeval:conversation_completeness")
367+
368+
def test_judge_panel_logs_message(
369+
self, mocker: MockerFixture, caplog: pytest.LogCaptureFixture
370+
) -> None:
371+
"""Test judge panel initialization logs messages."""
372+
mocker.patch("lightspeed_evaluation.core.llm.manager.validate_provider_env")
373+
374+
pool, panel = _create_llm_pool_with_judges(
375+
[
376+
("openai", "gpt-4o-mini"),
377+
("openai", "gpt-4o"),
378+
]
379+
)
380+
system_config = SystemConfig(llm_pool=pool, judge_panel=panel)
381+
382+
with caplog.at_level(logging.INFO):
383+
LLMManager.from_system_config(system_config)
384+
385+
# Should log judge panel info
386+
assert any(
387+
"Judge panel" in record.message and "2 judges" in record.message
388+
for record in caplog.records
389+
)

0 commit comments

Comments
 (0)