diff --git a/lmms_eval/llm_judge/factory.py b/lmms_eval/llm_judge/factory.py index e884bf3b9..092a6652b 100644 --- a/lmms_eval/llm_judge/factory.py +++ b/lmms_eval/llm_judge/factory.py @@ -5,9 +5,11 @@ from .protocol import ServerConfig from .providers import ( AsyncAzureOpenAIProvider, + AsyncMiniMaxProvider, AsyncOpenAIProvider, AzureOpenAIProvider, DummyProvider, + MiniMaxProvider, OpenAIProvider, ) @@ -15,7 +17,7 @@ class ProviderFactory: """Factory for creating judge instances based on configuration""" - _provider_classes = {"openai": OpenAIProvider, "azure": AzureOpenAIProvider, "async_openai": AsyncOpenAIProvider, "async_azure": AsyncAzureOpenAIProvider, "dummy": DummyProvider} + _provider_classes = {"openai": OpenAIProvider, "azure": AzureOpenAIProvider, "async_openai": AsyncOpenAIProvider, "async_azure": AsyncAzureOpenAIProvider, "minimax": MiniMaxProvider, "async_minimax": AsyncMiniMaxProvider, "dummy": DummyProvider} # TODO # This should actually be a decorator that registers the class diff --git a/lmms_eval/llm_judge/providers/__init__.py b/lmms_eval/llm_judge/providers/__init__.py index 9fbdb284d..bcb1cfdc2 100644 --- a/lmms_eval/llm_judge/providers/__init__.py +++ b/lmms_eval/llm_judge/providers/__init__.py @@ -1,7 +1,9 @@ from .async_azure_openai import AsyncAzureOpenAIProvider +from .async_minimax import AsyncMiniMaxProvider from .async_openai import AsyncOpenAIProvider from .azure_openai import AzureOpenAIProvider from .dummy import DummyProvider +from .minimax import MiniMaxProvider from .openai import OpenAIProvider __all__ = [ @@ -9,5 +11,7 @@ "AzureOpenAIProvider", "AsyncOpenAIProvider", "AsyncAzureOpenAIProvider", + "MiniMaxProvider", + "AsyncMiniMaxProvider", "DummyProvider", ] diff --git a/lmms_eval/llm_judge/providers/async_minimax.py b/lmms_eval/llm_judge/providers/async_minimax.py new file mode 100644 index 000000000..63d6dc890 --- /dev/null +++ b/lmms_eval/llm_judge/providers/async_minimax.py @@ -0,0 +1,170 @@ +import asyncio +import os +from typing import Dict, List, Optional, Union + +import aiohttp +from loguru import logger as eval_logger + +from lmms_eval.models.model_utils.usage_metrics import log_usage + +from ..base import AsyncServerInterface +from ..protocol import Request, Response, ServerConfig +from .minimax import MiniMaxProvider, _clamp_temperature, _strip_think_tags + + +class AsyncMiniMaxProvider(AsyncServerInterface): + """Async MiniMax API implementation of the Judge interface. + + Uses the same OpenAI-compatible endpoint as :class:`MiniMaxProvider` + but through an asynchronous client (``AsyncOpenAI`` or ``aiohttp``). + """ + + MINIMAX_BASE_URL = MiniMaxProvider.MINIMAX_BASE_URL + + def __init__(self, config: Optional[ServerConfig] = None): + super().__init__(config) + self.api_key = os.getenv("MINIMAX_API_KEY", "") + self.api_url = f"{self.MINIMAX_BASE_URL}/chat/completions" + + self.use_async_client = False + try: + from openai import AsyncOpenAI + + self.async_client = AsyncOpenAI( + api_key=self.api_key, + base_url=self.MINIMAX_BASE_URL, + ) + self.use_async_client = True + except ImportError: + eval_logger.warning( + "AsyncOpenAI client not available, using aiohttp for MiniMax" + ) + + def is_available(self) -> bool: + return bool(self.api_key) + + async def evaluate_async(self, request: Request) -> Response: + """Evaluate using MiniMax API asynchronously.""" + if not self.is_available(): + raise ValueError("MiniMax API key not configured (set MINIMAX_API_KEY)") + + config = request.config or self.config + messages = self.prepare_messages(request) + + if request.images: + messages = self._add_images_to_messages(messages, request.images) + + payload = { + "model": config.model_name, + "messages": messages, + "temperature": _clamp_temperature(config.temperature), + "max_tokens": config.max_tokens, + } + + if config.top_p is not None: + payload["top_p"] = config.top_p + + if config.response_format == "json": + payload["response_format"] = {"type": "json_object"} + + async with self.semaphore: + for attempt in range(config.num_retries): + try: + if self.use_async_client: + response = await self.async_client.chat.completions.create( + **payload + ) + content = response.choices[0].message.content + model_used = response.model + usage = ( + response.usage.model_dump() + if hasattr(response.usage, "model_dump") + else None + ) + raw_response = response + else: + response = await self._make_async_request( + payload, config.timeout + ) + content = response["choices"][0]["message"]["content"] + model_used = response["model"] + usage = response.get("usage") + raw_response = response + + content = _strip_think_tags(content) + + # Log usage + if ( + self.use_async_client + and hasattr(response, "usage") + and response.usage + ): + log_usage( + model_name=model_used or config.model_name, + task_name=None, + input_tokens=getattr( + response.usage, "prompt_tokens", 0 + ) + or 0, + output_tokens=getattr( + response.usage, "completion_tokens", 0 + ) + or 0, + reasoning_tokens=0, + source="judge", + ) + elif not self.use_async_client and isinstance(usage, dict): + log_usage( + model_name=model_used or config.model_name, + task_name=None, + input_tokens=usage.get("prompt_tokens", 0) or 0, + output_tokens=usage.get("completion_tokens", 0) or 0, + reasoning_tokens=0, + source="judge", + ) + + return Response( + content=content.strip(), + model_used=model_used, + usage=usage, + raw_response=raw_response, + ) + + except Exception as e: + eval_logger.warning( + f"MiniMax async attempt {attempt + 1}/{config.num_retries} " + f"failed: {e}" + ) + if attempt < config.num_retries - 1: + await asyncio.sleep(config.retry_delay) + else: + eval_logger.error( + f"All {config.num_retries} MiniMax async attempts failed" + ) + raise + + async def _make_async_request(self, payload: Dict, timeout: int) -> Dict: + """Make async HTTP request to MiniMax API.""" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + async with aiohttp.ClientSession() as session: + async with session.post( + self.api_url, + headers=headers, + json=payload, + timeout=aiohttp.ClientTimeout(total=timeout), + ) as response: + response.raise_for_status() + return await response.json() + + def _add_images_to_messages( + self, messages: List[Dict], images: List[Union[str, bytes]] + ) -> List[Dict]: + """Add images to messages – reuse from MiniMaxProvider.""" + return MiniMaxProvider._add_images_to_messages(self, messages, images) + + def _encode_image(self, image_path: str) -> str: + """Encode image to base64 – reuse from MiniMaxProvider.""" + return MiniMaxProvider._encode_image(self, image_path) diff --git a/lmms_eval/llm_judge/providers/minimax.py b/lmms_eval/llm_judge/providers/minimax.py new file mode 100644 index 000000000..40abfe9f0 --- /dev/null +++ b/lmms_eval/llm_judge/providers/minimax.py @@ -0,0 +1,212 @@ +import os +import re +import time +from typing import Dict, List, Optional, Union + +import requests +from loguru import logger as eval_logger + +from lmms_eval.models.model_utils.media_encoder import encode_image_to_base64 +from lmms_eval.models.model_utils.usage_metrics import log_usage + +from ..base import ServerInterface +from ..protocol import Request, Response, ServerConfig + +# MiniMax temperature must be in [0.0, 1.0] +_MINIMAX_TEMP_MIN = 0.0 +_MINIMAX_TEMP_MAX = 1.0 + + +def _clamp_temperature(temperature: float) -> float: + """Clamp temperature to MiniMax's accepted range [0.0, 1.0].""" + return max(_MINIMAX_TEMP_MIN, min(_MINIMAX_TEMP_MAX, temperature)) + + +def _strip_think_tags(text: str) -> str: + """Strip ... tags that MiniMax reasoning models may emit.""" + return re.sub(r".*?", "", text, flags=re.DOTALL).strip() + + +class MiniMaxProvider(ServerInterface): + """MiniMax API implementation of the Judge interface. + + MiniMax exposes an OpenAI-compatible chat completions endpoint at + ``https://api.minimax.io/v1``. This provider re-uses the ``openai`` + Python SDK (if available) with a custom *base_url*, falling back to + raw ``requests`` calls otherwise. + + Supported models include ``MiniMax-M2.7``, ``MiniMax-M2.5``, and + ``MiniMax-M2.5-highspeed`` (204K context). + + Environment variables + --------------------- + MINIMAX_API_KEY : str + API key for the MiniMax platform. + """ + + MINIMAX_BASE_URL = "https://api.minimax.io/v1" + + def __init__(self, config: Optional[ServerConfig] = None): + super().__init__(config) + self.api_key = os.getenv("MINIMAX_API_KEY", "") + self.api_url = f"{self.MINIMAX_BASE_URL}/chat/completions" + + # Initialise OpenAI client pointed at MiniMax + try: + from openai import OpenAI + + self.client = OpenAI( + api_key=self.api_key, + base_url=self.MINIMAX_BASE_URL, + ) + self.use_client = True + except ImportError: + eval_logger.warning( + "OpenAI client not available, falling back to requests for MiniMax" + ) + self.use_client = False + + def is_available(self) -> bool: + return bool(self.api_key) + + def evaluate(self, request: Request) -> Response: + """Evaluate using the MiniMax API.""" + if not self.is_available(): + raise ValueError("MiniMax API key not configured (set MINIMAX_API_KEY)") + + config = request.config or self.config + messages = self.prepare_messages(request) + + if request.images: + messages = self._add_images_to_messages(messages, request.images) + + payload = { + "model": config.model_name, + "messages": messages, + "temperature": _clamp_temperature(config.temperature), + "max_tokens": config.max_tokens, + } + + if config.top_p is not None: + payload["top_p"] = config.top_p + + if config.response_format == "json": + payload["response_format"] = {"type": "json_object"} + + for attempt in range(config.num_retries): + try: + if self.use_client: + response = self.client.chat.completions.create(**payload) + content = response.choices[0].message.content + model_used = response.model + usage = ( + response.usage.model_dump() + if hasattr(response.usage, "model_dump") + else None + ) + raw_response = response + else: + response = self._make_request(payload, config.timeout) + content = response["choices"][0]["message"]["content"] + model_used = response["model"] + usage = response.get("usage") + raw_response = response + + # Strip tags from reasoning models + content = _strip_think_tags(content) + + # Log usage for token tracking + if self.use_client and hasattr(response, "usage") and response.usage: + log_usage( + model_name=model_used or config.model_name, + task_name=None, + input_tokens=getattr(response.usage, "prompt_tokens", 0) or 0, + output_tokens=getattr(response.usage, "completion_tokens", 0) + or 0, + reasoning_tokens=0, + source="judge", + ) + elif not self.use_client and isinstance(usage, dict): + log_usage( + model_name=model_used or config.model_name, + task_name=None, + input_tokens=usage.get("prompt_tokens", 0) or 0, + output_tokens=usage.get("completion_tokens", 0) or 0, + reasoning_tokens=0, + source="judge", + ) + + return Response( + content=content.strip(), + model_used=model_used, + usage=usage, + raw_response=raw_response, + ) + + except Exception as e: + eval_logger.warning( + f"MiniMax attempt {attempt + 1}/{config.num_retries} failed: {e}" + ) + if attempt < config.num_retries - 1: + time.sleep(config.retry_delay) + else: + eval_logger.error( + f"All {config.num_retries} MiniMax attempts failed" + ) + raise + + def _make_request(self, payload: Dict, timeout: int) -> Dict: + """Make HTTP request to MiniMax API.""" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + response = requests.post( + self.api_url, headers=headers, json=payload, timeout=timeout + ) + response.raise_for_status() + return response.json() + + def _add_images_to_messages( + self, messages: List[Dict], images: List[Union[str, bytes]] + ) -> List[Dict]: + """Add images to the last user message.""" + for i in range(len(messages) - 1, -1, -1): + if messages[i]["role"] == "user": + if isinstance(messages[i]["content"], str): + messages[i]["content"] = [ + {"type": "text", "text": messages[i]["content"]} + ] + + for image in images: + if isinstance(image, str): + base64_image = self._encode_image(image) + messages[i]["content"].append( + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{base64_image}" + }, + } + ) + elif isinstance(image, bytes): + messages[i]["content"].append( + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image.decode()}" + }, + } + ) + break + return messages + + def _encode_image(self, image_path: str) -> str: + """Encode image to base64.""" + return encode_image_to_base64( + image_path, + image_format="JPEG", + convert_rgb=True, + quality=85, + use_path_cache=True, + ) diff --git a/test/eval/test_minimax_provider.py b/test/eval/test_minimax_provider.py new file mode 100644 index 000000000..bb7d774d3 --- /dev/null +++ b/test/eval/test_minimax_provider.py @@ -0,0 +1,431 @@ +"""Tests for MiniMax LLM judge providers. + +Covers: + - Temperature clamping + - Think-tag stripping + - MiniMaxProvider (sync) construction, availability, evaluate + - AsyncMiniMaxProvider construction, availability, evaluate_async + - ProviderFactory registration ('minimax', 'async_minimax') +""" + +import asyncio +import os +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from lmms_eval.llm_judge.factory import ProviderFactory +from lmms_eval.llm_judge.protocol import Request, Response, ServerConfig +from lmms_eval.llm_judge.providers.minimax import ( + MiniMaxProvider, + _clamp_temperature, + _strip_think_tags, +) + + +# ============================================================================ +# Temperature clamping +# ============================================================================ + + +class TestClampTemperature: + def test_within_range(self): + assert _clamp_temperature(0.5) == 0.5 + + def test_at_lower_bound(self): + assert _clamp_temperature(0.0) == 0.0 + + def test_at_upper_bound(self): + assert _clamp_temperature(1.0) == 1.0 + + def test_below_lower_bound(self): + assert _clamp_temperature(-0.5) == 0.0 + + def test_above_upper_bound(self): + assert _clamp_temperature(1.5) == 1.0 + + def test_high_temperature(self): + assert _clamp_temperature(2.0) == 1.0 + + +# ============================================================================ +# Think-tag stripping +# ============================================================================ + + +class TestStripThinkTags: + def test_no_tags(self): + assert _strip_think_tags("Hello world") == "Hello world" + + def test_single_tag(self): + assert _strip_think_tags("reasoningAnswer") == "Answer" + + def test_multiline_tag(self): + text = "\nline1\nline2\n\nResult" + assert _strip_think_tags(text) == "Result" + + def test_multiple_tags(self): + text = "aXbY" + assert _strip_think_tags(text) == "XY" + + def test_empty_tag(self): + assert _strip_think_tags("OK") == "OK" + + def test_only_tag(self): + assert _strip_think_tags("only") == "" + + +# ============================================================================ +# MiniMaxProvider – construction & availability +# ============================================================================ + + +class TestMiniMaxProviderInit: + @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}, clear=False) + @patch("lmms_eval.llm_judge.providers.minimax.OpenAI", create=True) + def test_is_available_with_key(self, mock_openai_cls): + # Patch the import inside __init__ + with patch.dict("sys.modules", {"openai": MagicMock()}): + provider = MiniMaxProvider.__new__(MiniMaxProvider) + provider.config = ServerConfig(model_name="MiniMax-M2.7") + provider.api_key = "test-key" + provider.api_url = f"{MiniMaxProvider.MINIMAX_BASE_URL}/chat/completions" + provider.use_client = True + assert provider.is_available() is True + + @patch.dict(os.environ, {}, clear=False) + def test_is_not_available_without_key(self): + provider = MiniMaxProvider.__new__(MiniMaxProvider) + provider.config = ServerConfig(model_name="MiniMax-M2.7") + provider.api_key = "" + assert provider.is_available() is False + + +# ============================================================================ +# MiniMaxProvider – evaluate (mocked) +# ============================================================================ + + +def _make_mock_response(content="test response", model="MiniMax-M2.7"): + """Build a mock OpenAI-style chat completion response.""" + usage = SimpleNamespace( + prompt_tokens=10, + completion_tokens=20, + model_dump=lambda: {"prompt_tokens": 10, "completion_tokens": 20}, + ) + choice = SimpleNamespace(message=SimpleNamespace(content=content)) + return SimpleNamespace(choices=[choice], model=model, usage=usage) + + +class TestMiniMaxProviderEvaluate: + def _build_provider(self): + provider = MiniMaxProvider.__new__(MiniMaxProvider) + provider.config = ServerConfig(model_name="MiniMax-M2.7") + provider.api_key = "test-key" + provider.api_url = f"{MiniMaxProvider.MINIMAX_BASE_URL}/chat/completions" + provider.use_client = True + provider.client = MagicMock() + return provider + + def test_evaluate_returns_response(self): + provider = self._build_provider() + mock_resp = _make_mock_response("The answer is 42.") + provider.client.chat.completions.create.return_value = mock_resp + + request = Request( + messages=[{"role": "user", "content": "What is 6*7?"}], + config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1), + ) + result = provider.evaluate(request) + + assert isinstance(result, Response) + assert result.content == "The answer is 42." + assert result.model_used == "MiniMax-M2.7" + + def test_evaluate_strips_think_tags(self): + provider = self._build_provider() + mock_resp = _make_mock_response("reasoningFinal answer.") + provider.client.chat.completions.create.return_value = mock_resp + + request = Request( + messages=[{"role": "user", "content": "Think hard."}], + config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1), + ) + result = provider.evaluate(request) + assert result.content == "Final answer." + + def test_evaluate_clamps_temperature(self): + provider = self._build_provider() + mock_resp = _make_mock_response("ok") + provider.client.chat.completions.create.return_value = mock_resp + + request = Request( + messages=[{"role": "user", "content": "hi"}], + config=ServerConfig( + model_name="MiniMax-M2.7", temperature=2.0, num_retries=1 + ), + ) + provider.evaluate(request) + + call_kwargs = provider.client.chat.completions.create.call_args[1] + assert call_kwargs["temperature"] == 1.0 + + def test_evaluate_raises_without_key(self): + provider = self._build_provider() + provider.api_key = "" + + request = Request( + messages=[{"role": "user", "content": "hi"}], + config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1), + ) + with pytest.raises(ValueError, match="MiniMax API key not configured"): + provider.evaluate(request) + + def test_evaluate_retries_on_failure(self): + provider = self._build_provider() + provider.client.chat.completions.create.side_effect = [ + RuntimeError("timeout"), + _make_mock_response("recovered"), + ] + + request = Request( + messages=[{"role": "user", "content": "retry?"}], + config=ServerConfig( + model_name="MiniMax-M2.7", num_retries=2, retry_delay=0 + ), + ) + result = provider.evaluate(request) + assert result.content == "recovered" + + def test_evaluate_with_json_response_format(self): + provider = self._build_provider() + mock_resp = _make_mock_response('{"score": 5}') + provider.client.chat.completions.create.return_value = mock_resp + + request = Request( + messages=[{"role": "user", "content": "score this"}], + config=ServerConfig( + model_name="MiniMax-M2.7", + response_format="json", + num_retries=1, + ), + ) + provider.evaluate(request) + + call_kwargs = provider.client.chat.completions.create.call_args[1] + assert call_kwargs["response_format"] == {"type": "json_object"} + + def test_evaluate_with_top_p(self): + provider = self._build_provider() + mock_resp = _make_mock_response("ok") + provider.client.chat.completions.create.return_value = mock_resp + + request = Request( + messages=[{"role": "user", "content": "hi"}], + config=ServerConfig( + model_name="MiniMax-M2.7", top_p=0.9, num_retries=1 + ), + ) + provider.evaluate(request) + + call_kwargs = provider.client.chat.completions.create.call_args[1] + assert call_kwargs["top_p"] == 0.9 + + def test_evaluate_fallback_requests(self): + provider = self._build_provider() + provider.use_client = False + + mock_json = { + "choices": [{"message": {"content": "fallback answer"}}], + "model": "MiniMax-M2.7", + "usage": {"prompt_tokens": 5, "completion_tokens": 10}, + } + + with patch("lmms_eval.llm_judge.providers.minimax.requests.post") as mock_post: + mock_post.return_value = MagicMock( + status_code=200, + json=MagicMock(return_value=mock_json), + raise_for_status=MagicMock(), + ) + + request = Request( + messages=[{"role": "user", "content": "hi"}], + config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1), + ) + result = provider.evaluate(request) + assert result.content == "fallback answer" + + +# ============================================================================ +# AsyncMiniMaxProvider +# ============================================================================ + + +class TestAsyncMiniMaxProvider: + def _build_async_provider(self): + from lmms_eval.llm_judge.providers.async_minimax import AsyncMiniMaxProvider + + provider = AsyncMiniMaxProvider.__new__(AsyncMiniMaxProvider) + provider.config = ServerConfig(model_name="MiniMax-M2.7") + provider.api_key = "test-key" + provider.api_url = f"{AsyncMiniMaxProvider.MINIMAX_BASE_URL}/chat/completions" + provider.use_async_client = True + provider.async_client = MagicMock() + provider.semaphore = asyncio.Semaphore(10) + return provider + + def test_is_available(self): + provider = self._build_async_provider() + assert provider.is_available() is True + + def test_is_not_available(self): + provider = self._build_async_provider() + provider.api_key = "" + assert provider.is_available() is False + + def test_evaluate_async(self): + provider = self._build_async_provider() + mock_resp = _make_mock_response("async answer") + provider.async_client.chat.completions.create = AsyncMock( + return_value=mock_resp + ) + + request = Request( + messages=[{"role": "user", "content": "async test"}], + config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1), + ) + result = asyncio.get_event_loop().run_until_complete( + provider.evaluate_async(request) + ) + assert isinstance(result, Response) + assert result.content == "async answer" + + def test_evaluate_async_strips_think_tags(self): + provider = self._build_async_provider() + mock_resp = _make_mock_response("stepsDone") + provider.async_client.chat.completions.create = AsyncMock( + return_value=mock_resp + ) + + request = Request( + messages=[{"role": "user", "content": "think"}], + config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1), + ) + result = asyncio.get_event_loop().run_until_complete( + provider.evaluate_async(request) + ) + assert result.content == "Done" + + +# ============================================================================ +# ProviderFactory registration +# ============================================================================ + + +class TestProviderFactoryMiniMax: + def test_minimax_registered(self): + assert "minimax" in ProviderFactory._provider_classes + + def test_async_minimax_registered(self): + assert "async_minimax" in ProviderFactory._provider_classes + + def test_create_minimax_provider(self): + with patch.dict(os.environ, {"MINIMAX_API_KEY": "k"}, clear=False): + provider = ProviderFactory.create_provider( + api_type="minimax", + config=ServerConfig(model_name="MiniMax-M2.7"), + ) + assert isinstance(provider, MiniMaxProvider) + + def test_create_async_minimax_provider(self): + from lmms_eval.llm_judge.providers.async_minimax import AsyncMiniMaxProvider + + with patch.dict(os.environ, {"MINIMAX_API_KEY": "k"}, clear=False): + provider = ProviderFactory.create_provider( + api_type="async_minimax", + config=ServerConfig(model_name="MiniMax-M2.7"), + ) + assert isinstance(provider, AsyncMiniMaxProvider) + + def test_env_api_type_minimax(self): + with patch.dict( + os.environ, + {"API_TYPE": "minimax", "MINIMAX_API_KEY": "k"}, + clear=False, + ): + provider = ProviderFactory.create_provider( + config=ServerConfig(model_name="MiniMax-M2.7") + ) + assert isinstance(provider, MiniMaxProvider) + + +# ============================================================================ +# Integration tests (skipped without MINIMAX_API_KEY) +# ============================================================================ + + +@pytest.mark.skipif( + not os.environ.get("MINIMAX_API_KEY"), + reason="MINIMAX_API_KEY not set", +) +class TestMiniMaxIntegration: + """Live integration tests against the real MiniMax API.""" + + def test_live_evaluate(self): + config = ServerConfig( + model_name="MiniMax-M2.7", + temperature=0.0, + max_tokens=256, + num_retries=2, + ) + provider = MiniMaxProvider(config=config) + request = Request( + messages=[{"role": "user", "content": "Reply with exactly: hello"}], + config=config, + ) + result = provider.evaluate(request) + assert isinstance(result, Response) + assert result.content # non-empty + assert result.model_used + + def test_live_json_response(self): + config = ServerConfig( + model_name="MiniMax-M2.7", + temperature=0.0, + max_tokens=256, + response_format="json", + num_retries=2, + ) + provider = MiniMaxProvider(config=config) + request = Request( + messages=[ + { + "role": "user", + "content": 'Return a JSON object: {"score": 5}', + } + ], + config=config, + ) + result = provider.evaluate(request) + assert "score" in result.content + + def test_live_async_evaluate(self): + from lmms_eval.llm_judge.providers.async_minimax import AsyncMiniMaxProvider + + config = ServerConfig( + model_name="MiniMax-M2.7", + temperature=0.0, + max_tokens=256, + num_retries=2, + ) + provider = AsyncMiniMaxProvider(config=config) + request = Request( + messages=[{"role": "user", "content": "Reply with exactly: world"}], + config=config, + ) + result = asyncio.get_event_loop().run_until_complete( + provider.evaluate_async(request) + ) + assert isinstance(result, Response) + assert result.content