diff --git a/lmms_eval/llm_judge/factory.py b/lmms_eval/llm_judge/factory.py
index e884bf3b9..092a6652b 100644
--- a/lmms_eval/llm_judge/factory.py
+++ b/lmms_eval/llm_judge/factory.py
@@ -5,9 +5,11 @@
from .protocol import ServerConfig
from .providers import (
AsyncAzureOpenAIProvider,
+ AsyncMiniMaxProvider,
AsyncOpenAIProvider,
AzureOpenAIProvider,
DummyProvider,
+ MiniMaxProvider,
OpenAIProvider,
)
@@ -15,7 +17,7 @@
class ProviderFactory:
"""Factory for creating judge instances based on configuration"""
- _provider_classes = {"openai": OpenAIProvider, "azure": AzureOpenAIProvider, "async_openai": AsyncOpenAIProvider, "async_azure": AsyncAzureOpenAIProvider, "dummy": DummyProvider}
+ _provider_classes = {"openai": OpenAIProvider, "azure": AzureOpenAIProvider, "async_openai": AsyncOpenAIProvider, "async_azure": AsyncAzureOpenAIProvider, "minimax": MiniMaxProvider, "async_minimax": AsyncMiniMaxProvider, "dummy": DummyProvider}
# TODO
# This should actually be a decorator that registers the class
diff --git a/lmms_eval/llm_judge/providers/__init__.py b/lmms_eval/llm_judge/providers/__init__.py
index 9fbdb284d..bcb1cfdc2 100644
--- a/lmms_eval/llm_judge/providers/__init__.py
+++ b/lmms_eval/llm_judge/providers/__init__.py
@@ -1,7 +1,9 @@
from .async_azure_openai import AsyncAzureOpenAIProvider
+from .async_minimax import AsyncMiniMaxProvider
from .async_openai import AsyncOpenAIProvider
from .azure_openai import AzureOpenAIProvider
from .dummy import DummyProvider
+from .minimax import MiniMaxProvider
from .openai import OpenAIProvider
__all__ = [
@@ -9,5 +11,7 @@
"AzureOpenAIProvider",
"AsyncOpenAIProvider",
"AsyncAzureOpenAIProvider",
+ "MiniMaxProvider",
+ "AsyncMiniMaxProvider",
"DummyProvider",
]
diff --git a/lmms_eval/llm_judge/providers/async_minimax.py b/lmms_eval/llm_judge/providers/async_minimax.py
new file mode 100644
index 000000000..63d6dc890
--- /dev/null
+++ b/lmms_eval/llm_judge/providers/async_minimax.py
@@ -0,0 +1,170 @@
+import asyncio
+import os
+from typing import Dict, List, Optional, Union
+
+import aiohttp
+from loguru import logger as eval_logger
+
+from lmms_eval.models.model_utils.usage_metrics import log_usage
+
+from ..base import AsyncServerInterface
+from ..protocol import Request, Response, ServerConfig
+from .minimax import MiniMaxProvider, _clamp_temperature, _strip_think_tags
+
+
+class AsyncMiniMaxProvider(AsyncServerInterface):
+ """Async MiniMax API implementation of the Judge interface.
+
+ Uses the same OpenAI-compatible endpoint as :class:`MiniMaxProvider`
+ but through an asynchronous client (``AsyncOpenAI`` or ``aiohttp``).
+ """
+
+ MINIMAX_BASE_URL = MiniMaxProvider.MINIMAX_BASE_URL
+
+ def __init__(self, config: Optional[ServerConfig] = None):
+ super().__init__(config)
+ self.api_key = os.getenv("MINIMAX_API_KEY", "")
+ self.api_url = f"{self.MINIMAX_BASE_URL}/chat/completions"
+
+ self.use_async_client = False
+ try:
+ from openai import AsyncOpenAI
+
+ self.async_client = AsyncOpenAI(
+ api_key=self.api_key,
+ base_url=self.MINIMAX_BASE_URL,
+ )
+ self.use_async_client = True
+ except ImportError:
+ eval_logger.warning(
+ "AsyncOpenAI client not available, using aiohttp for MiniMax"
+ )
+
+ def is_available(self) -> bool:
+ return bool(self.api_key)
+
+ async def evaluate_async(self, request: Request) -> Response:
+ """Evaluate using MiniMax API asynchronously."""
+ if not self.is_available():
+ raise ValueError("MiniMax API key not configured (set MINIMAX_API_KEY)")
+
+ config = request.config or self.config
+ messages = self.prepare_messages(request)
+
+ if request.images:
+ messages = self._add_images_to_messages(messages, request.images)
+
+ payload = {
+ "model": config.model_name,
+ "messages": messages,
+ "temperature": _clamp_temperature(config.temperature),
+ "max_tokens": config.max_tokens,
+ }
+
+ if config.top_p is not None:
+ payload["top_p"] = config.top_p
+
+ if config.response_format == "json":
+ payload["response_format"] = {"type": "json_object"}
+
+ async with self.semaphore:
+ for attempt in range(config.num_retries):
+ try:
+ if self.use_async_client:
+ response = await self.async_client.chat.completions.create(
+ **payload
+ )
+ content = response.choices[0].message.content
+ model_used = response.model
+ usage = (
+ response.usage.model_dump()
+ if hasattr(response.usage, "model_dump")
+ else None
+ )
+ raw_response = response
+ else:
+ response = await self._make_async_request(
+ payload, config.timeout
+ )
+ content = response["choices"][0]["message"]["content"]
+ model_used = response["model"]
+ usage = response.get("usage")
+ raw_response = response
+
+ content = _strip_think_tags(content)
+
+ # Log usage
+ if (
+ self.use_async_client
+ and hasattr(response, "usage")
+ and response.usage
+ ):
+ log_usage(
+ model_name=model_used or config.model_name,
+ task_name=None,
+ input_tokens=getattr(
+ response.usage, "prompt_tokens", 0
+ )
+ or 0,
+ output_tokens=getattr(
+ response.usage, "completion_tokens", 0
+ )
+ or 0,
+ reasoning_tokens=0,
+ source="judge",
+ )
+ elif not self.use_async_client and isinstance(usage, dict):
+ log_usage(
+ model_name=model_used or config.model_name,
+ task_name=None,
+ input_tokens=usage.get("prompt_tokens", 0) or 0,
+ output_tokens=usage.get("completion_tokens", 0) or 0,
+ reasoning_tokens=0,
+ source="judge",
+ )
+
+ return Response(
+ content=content.strip(),
+ model_used=model_used,
+ usage=usage,
+ raw_response=raw_response,
+ )
+
+ except Exception as e:
+ eval_logger.warning(
+ f"MiniMax async attempt {attempt + 1}/{config.num_retries} "
+ f"failed: {e}"
+ )
+ if attempt < config.num_retries - 1:
+ await asyncio.sleep(config.retry_delay)
+ else:
+ eval_logger.error(
+ f"All {config.num_retries} MiniMax async attempts failed"
+ )
+ raise
+
+ async def _make_async_request(self, payload: Dict, timeout: int) -> Dict:
+ """Make async HTTP request to MiniMax API."""
+ headers = {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json",
+ }
+ async with aiohttp.ClientSession() as session:
+ async with session.post(
+ self.api_url,
+ headers=headers,
+ json=payload,
+ timeout=aiohttp.ClientTimeout(total=timeout),
+ ) as response:
+ response.raise_for_status()
+ return await response.json()
+
+ def _add_images_to_messages(
+ self, messages: List[Dict], images: List[Union[str, bytes]]
+ ) -> List[Dict]:
+ """Add images to messages – reuse from MiniMaxProvider."""
+ return MiniMaxProvider._add_images_to_messages(self, messages, images)
+
+ def _encode_image(self, image_path: str) -> str:
+ """Encode image to base64 – reuse from MiniMaxProvider."""
+ return MiniMaxProvider._encode_image(self, image_path)
diff --git a/lmms_eval/llm_judge/providers/minimax.py b/lmms_eval/llm_judge/providers/minimax.py
new file mode 100644
index 000000000..40abfe9f0
--- /dev/null
+++ b/lmms_eval/llm_judge/providers/minimax.py
@@ -0,0 +1,212 @@
+import os
+import re
+import time
+from typing import Dict, List, Optional, Union
+
+import requests
+from loguru import logger as eval_logger
+
+from lmms_eval.models.model_utils.media_encoder import encode_image_to_base64
+from lmms_eval.models.model_utils.usage_metrics import log_usage
+
+from ..base import ServerInterface
+from ..protocol import Request, Response, ServerConfig
+
+# MiniMax temperature must be in [0.0, 1.0]
+_MINIMAX_TEMP_MIN = 0.0
+_MINIMAX_TEMP_MAX = 1.0
+
+
+def _clamp_temperature(temperature: float) -> float:
+ """Clamp temperature to MiniMax's accepted range [0.0, 1.0]."""
+ return max(_MINIMAX_TEMP_MIN, min(_MINIMAX_TEMP_MAX, temperature))
+
+
+def _strip_think_tags(text: str) -> str:
+ """Strip ... tags that MiniMax reasoning models may emit."""
+ return re.sub(r".*?", "", text, flags=re.DOTALL).strip()
+
+
+class MiniMaxProvider(ServerInterface):
+ """MiniMax API implementation of the Judge interface.
+
+ MiniMax exposes an OpenAI-compatible chat completions endpoint at
+ ``https://api.minimax.io/v1``. This provider re-uses the ``openai``
+ Python SDK (if available) with a custom *base_url*, falling back to
+ raw ``requests`` calls otherwise.
+
+ Supported models include ``MiniMax-M2.7``, ``MiniMax-M2.5``, and
+ ``MiniMax-M2.5-highspeed`` (204K context).
+
+ Environment variables
+ ---------------------
+ MINIMAX_API_KEY : str
+ API key for the MiniMax platform.
+ """
+
+ MINIMAX_BASE_URL = "https://api.minimax.io/v1"
+
+ def __init__(self, config: Optional[ServerConfig] = None):
+ super().__init__(config)
+ self.api_key = os.getenv("MINIMAX_API_KEY", "")
+ self.api_url = f"{self.MINIMAX_BASE_URL}/chat/completions"
+
+ # Initialise OpenAI client pointed at MiniMax
+ try:
+ from openai import OpenAI
+
+ self.client = OpenAI(
+ api_key=self.api_key,
+ base_url=self.MINIMAX_BASE_URL,
+ )
+ self.use_client = True
+ except ImportError:
+ eval_logger.warning(
+ "OpenAI client not available, falling back to requests for MiniMax"
+ )
+ self.use_client = False
+
+ def is_available(self) -> bool:
+ return bool(self.api_key)
+
+ def evaluate(self, request: Request) -> Response:
+ """Evaluate using the MiniMax API."""
+ if not self.is_available():
+ raise ValueError("MiniMax API key not configured (set MINIMAX_API_KEY)")
+
+ config = request.config or self.config
+ messages = self.prepare_messages(request)
+
+ if request.images:
+ messages = self._add_images_to_messages(messages, request.images)
+
+ payload = {
+ "model": config.model_name,
+ "messages": messages,
+ "temperature": _clamp_temperature(config.temperature),
+ "max_tokens": config.max_tokens,
+ }
+
+ if config.top_p is not None:
+ payload["top_p"] = config.top_p
+
+ if config.response_format == "json":
+ payload["response_format"] = {"type": "json_object"}
+
+ for attempt in range(config.num_retries):
+ try:
+ if self.use_client:
+ response = self.client.chat.completions.create(**payload)
+ content = response.choices[0].message.content
+ model_used = response.model
+ usage = (
+ response.usage.model_dump()
+ if hasattr(response.usage, "model_dump")
+ else None
+ )
+ raw_response = response
+ else:
+ response = self._make_request(payload, config.timeout)
+ content = response["choices"][0]["message"]["content"]
+ model_used = response["model"]
+ usage = response.get("usage")
+ raw_response = response
+
+ # Strip tags from reasoning models
+ content = _strip_think_tags(content)
+
+ # Log usage for token tracking
+ if self.use_client and hasattr(response, "usage") and response.usage:
+ log_usage(
+ model_name=model_used or config.model_name,
+ task_name=None,
+ input_tokens=getattr(response.usage, "prompt_tokens", 0) or 0,
+ output_tokens=getattr(response.usage, "completion_tokens", 0)
+ or 0,
+ reasoning_tokens=0,
+ source="judge",
+ )
+ elif not self.use_client and isinstance(usage, dict):
+ log_usage(
+ model_name=model_used or config.model_name,
+ task_name=None,
+ input_tokens=usage.get("prompt_tokens", 0) or 0,
+ output_tokens=usage.get("completion_tokens", 0) or 0,
+ reasoning_tokens=0,
+ source="judge",
+ )
+
+ return Response(
+ content=content.strip(),
+ model_used=model_used,
+ usage=usage,
+ raw_response=raw_response,
+ )
+
+ except Exception as e:
+ eval_logger.warning(
+ f"MiniMax attempt {attempt + 1}/{config.num_retries} failed: {e}"
+ )
+ if attempt < config.num_retries - 1:
+ time.sleep(config.retry_delay)
+ else:
+ eval_logger.error(
+ f"All {config.num_retries} MiniMax attempts failed"
+ )
+ raise
+
+ def _make_request(self, payload: Dict, timeout: int) -> Dict:
+ """Make HTTP request to MiniMax API."""
+ headers = {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json",
+ }
+ response = requests.post(
+ self.api_url, headers=headers, json=payload, timeout=timeout
+ )
+ response.raise_for_status()
+ return response.json()
+
+ def _add_images_to_messages(
+ self, messages: List[Dict], images: List[Union[str, bytes]]
+ ) -> List[Dict]:
+ """Add images to the last user message."""
+ for i in range(len(messages) - 1, -1, -1):
+ if messages[i]["role"] == "user":
+ if isinstance(messages[i]["content"], str):
+ messages[i]["content"] = [
+ {"type": "text", "text": messages[i]["content"]}
+ ]
+
+ for image in images:
+ if isinstance(image, str):
+ base64_image = self._encode_image(image)
+ messages[i]["content"].append(
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/jpeg;base64,{base64_image}"
+ },
+ }
+ )
+ elif isinstance(image, bytes):
+ messages[i]["content"].append(
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/jpeg;base64,{image.decode()}"
+ },
+ }
+ )
+ break
+ return messages
+
+ def _encode_image(self, image_path: str) -> str:
+ """Encode image to base64."""
+ return encode_image_to_base64(
+ image_path,
+ image_format="JPEG",
+ convert_rgb=True,
+ quality=85,
+ use_path_cache=True,
+ )
diff --git a/test/eval/test_minimax_provider.py b/test/eval/test_minimax_provider.py
new file mode 100644
index 000000000..bb7d774d3
--- /dev/null
+++ b/test/eval/test_minimax_provider.py
@@ -0,0 +1,431 @@
+"""Tests for MiniMax LLM judge providers.
+
+Covers:
+ - Temperature clamping
+ - Think-tag stripping
+ - MiniMaxProvider (sync) construction, availability, evaluate
+ - AsyncMiniMaxProvider construction, availability, evaluate_async
+ - ProviderFactory registration ('minimax', 'async_minimax')
+"""
+
+import asyncio
+import os
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from lmms_eval.llm_judge.factory import ProviderFactory
+from lmms_eval.llm_judge.protocol import Request, Response, ServerConfig
+from lmms_eval.llm_judge.providers.minimax import (
+ MiniMaxProvider,
+ _clamp_temperature,
+ _strip_think_tags,
+)
+
+
+# ============================================================================
+# Temperature clamping
+# ============================================================================
+
+
+class TestClampTemperature:
+ def test_within_range(self):
+ assert _clamp_temperature(0.5) == 0.5
+
+ def test_at_lower_bound(self):
+ assert _clamp_temperature(0.0) == 0.0
+
+ def test_at_upper_bound(self):
+ assert _clamp_temperature(1.0) == 1.0
+
+ def test_below_lower_bound(self):
+ assert _clamp_temperature(-0.5) == 0.0
+
+ def test_above_upper_bound(self):
+ assert _clamp_temperature(1.5) == 1.0
+
+ def test_high_temperature(self):
+ assert _clamp_temperature(2.0) == 1.0
+
+
+# ============================================================================
+# Think-tag stripping
+# ============================================================================
+
+
+class TestStripThinkTags:
+ def test_no_tags(self):
+ assert _strip_think_tags("Hello world") == "Hello world"
+
+ def test_single_tag(self):
+ assert _strip_think_tags("reasoningAnswer") == "Answer"
+
+ def test_multiline_tag(self):
+ text = "\nline1\nline2\n\nResult"
+ assert _strip_think_tags(text) == "Result"
+
+ def test_multiple_tags(self):
+ text = "aXbY"
+ assert _strip_think_tags(text) == "XY"
+
+ def test_empty_tag(self):
+ assert _strip_think_tags("OK") == "OK"
+
+ def test_only_tag(self):
+ assert _strip_think_tags("only") == ""
+
+
+# ============================================================================
+# MiniMaxProvider – construction & availability
+# ============================================================================
+
+
+class TestMiniMaxProviderInit:
+ @patch.dict(os.environ, {"MINIMAX_API_KEY": "test-key"}, clear=False)
+ @patch("lmms_eval.llm_judge.providers.minimax.OpenAI", create=True)
+ def test_is_available_with_key(self, mock_openai_cls):
+ # Patch the import inside __init__
+ with patch.dict("sys.modules", {"openai": MagicMock()}):
+ provider = MiniMaxProvider.__new__(MiniMaxProvider)
+ provider.config = ServerConfig(model_name="MiniMax-M2.7")
+ provider.api_key = "test-key"
+ provider.api_url = f"{MiniMaxProvider.MINIMAX_BASE_URL}/chat/completions"
+ provider.use_client = True
+ assert provider.is_available() is True
+
+ @patch.dict(os.environ, {}, clear=False)
+ def test_is_not_available_without_key(self):
+ provider = MiniMaxProvider.__new__(MiniMaxProvider)
+ provider.config = ServerConfig(model_name="MiniMax-M2.7")
+ provider.api_key = ""
+ assert provider.is_available() is False
+
+
+# ============================================================================
+# MiniMaxProvider – evaluate (mocked)
+# ============================================================================
+
+
+def _make_mock_response(content="test response", model="MiniMax-M2.7"):
+ """Build a mock OpenAI-style chat completion response."""
+ usage = SimpleNamespace(
+ prompt_tokens=10,
+ completion_tokens=20,
+ model_dump=lambda: {"prompt_tokens": 10, "completion_tokens": 20},
+ )
+ choice = SimpleNamespace(message=SimpleNamespace(content=content))
+ return SimpleNamespace(choices=[choice], model=model, usage=usage)
+
+
+class TestMiniMaxProviderEvaluate:
+ def _build_provider(self):
+ provider = MiniMaxProvider.__new__(MiniMaxProvider)
+ provider.config = ServerConfig(model_name="MiniMax-M2.7")
+ provider.api_key = "test-key"
+ provider.api_url = f"{MiniMaxProvider.MINIMAX_BASE_URL}/chat/completions"
+ provider.use_client = True
+ provider.client = MagicMock()
+ return provider
+
+ def test_evaluate_returns_response(self):
+ provider = self._build_provider()
+ mock_resp = _make_mock_response("The answer is 42.")
+ provider.client.chat.completions.create.return_value = mock_resp
+
+ request = Request(
+ messages=[{"role": "user", "content": "What is 6*7?"}],
+ config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1),
+ )
+ result = provider.evaluate(request)
+
+ assert isinstance(result, Response)
+ assert result.content == "The answer is 42."
+ assert result.model_used == "MiniMax-M2.7"
+
+ def test_evaluate_strips_think_tags(self):
+ provider = self._build_provider()
+ mock_resp = _make_mock_response("reasoningFinal answer.")
+ provider.client.chat.completions.create.return_value = mock_resp
+
+ request = Request(
+ messages=[{"role": "user", "content": "Think hard."}],
+ config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1),
+ )
+ result = provider.evaluate(request)
+ assert result.content == "Final answer."
+
+ def test_evaluate_clamps_temperature(self):
+ provider = self._build_provider()
+ mock_resp = _make_mock_response("ok")
+ provider.client.chat.completions.create.return_value = mock_resp
+
+ request = Request(
+ messages=[{"role": "user", "content": "hi"}],
+ config=ServerConfig(
+ model_name="MiniMax-M2.7", temperature=2.0, num_retries=1
+ ),
+ )
+ provider.evaluate(request)
+
+ call_kwargs = provider.client.chat.completions.create.call_args[1]
+ assert call_kwargs["temperature"] == 1.0
+
+ def test_evaluate_raises_without_key(self):
+ provider = self._build_provider()
+ provider.api_key = ""
+
+ request = Request(
+ messages=[{"role": "user", "content": "hi"}],
+ config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1),
+ )
+ with pytest.raises(ValueError, match="MiniMax API key not configured"):
+ provider.evaluate(request)
+
+ def test_evaluate_retries_on_failure(self):
+ provider = self._build_provider()
+ provider.client.chat.completions.create.side_effect = [
+ RuntimeError("timeout"),
+ _make_mock_response("recovered"),
+ ]
+
+ request = Request(
+ messages=[{"role": "user", "content": "retry?"}],
+ config=ServerConfig(
+ model_name="MiniMax-M2.7", num_retries=2, retry_delay=0
+ ),
+ )
+ result = provider.evaluate(request)
+ assert result.content == "recovered"
+
+ def test_evaluate_with_json_response_format(self):
+ provider = self._build_provider()
+ mock_resp = _make_mock_response('{"score": 5}')
+ provider.client.chat.completions.create.return_value = mock_resp
+
+ request = Request(
+ messages=[{"role": "user", "content": "score this"}],
+ config=ServerConfig(
+ model_name="MiniMax-M2.7",
+ response_format="json",
+ num_retries=1,
+ ),
+ )
+ provider.evaluate(request)
+
+ call_kwargs = provider.client.chat.completions.create.call_args[1]
+ assert call_kwargs["response_format"] == {"type": "json_object"}
+
+ def test_evaluate_with_top_p(self):
+ provider = self._build_provider()
+ mock_resp = _make_mock_response("ok")
+ provider.client.chat.completions.create.return_value = mock_resp
+
+ request = Request(
+ messages=[{"role": "user", "content": "hi"}],
+ config=ServerConfig(
+ model_name="MiniMax-M2.7", top_p=0.9, num_retries=1
+ ),
+ )
+ provider.evaluate(request)
+
+ call_kwargs = provider.client.chat.completions.create.call_args[1]
+ assert call_kwargs["top_p"] == 0.9
+
+ def test_evaluate_fallback_requests(self):
+ provider = self._build_provider()
+ provider.use_client = False
+
+ mock_json = {
+ "choices": [{"message": {"content": "fallback answer"}}],
+ "model": "MiniMax-M2.7",
+ "usage": {"prompt_tokens": 5, "completion_tokens": 10},
+ }
+
+ with patch("lmms_eval.llm_judge.providers.minimax.requests.post") as mock_post:
+ mock_post.return_value = MagicMock(
+ status_code=200,
+ json=MagicMock(return_value=mock_json),
+ raise_for_status=MagicMock(),
+ )
+
+ request = Request(
+ messages=[{"role": "user", "content": "hi"}],
+ config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1),
+ )
+ result = provider.evaluate(request)
+ assert result.content == "fallback answer"
+
+
+# ============================================================================
+# AsyncMiniMaxProvider
+# ============================================================================
+
+
+class TestAsyncMiniMaxProvider:
+ def _build_async_provider(self):
+ from lmms_eval.llm_judge.providers.async_minimax import AsyncMiniMaxProvider
+
+ provider = AsyncMiniMaxProvider.__new__(AsyncMiniMaxProvider)
+ provider.config = ServerConfig(model_name="MiniMax-M2.7")
+ provider.api_key = "test-key"
+ provider.api_url = f"{AsyncMiniMaxProvider.MINIMAX_BASE_URL}/chat/completions"
+ provider.use_async_client = True
+ provider.async_client = MagicMock()
+ provider.semaphore = asyncio.Semaphore(10)
+ return provider
+
+ def test_is_available(self):
+ provider = self._build_async_provider()
+ assert provider.is_available() is True
+
+ def test_is_not_available(self):
+ provider = self._build_async_provider()
+ provider.api_key = ""
+ assert provider.is_available() is False
+
+ def test_evaluate_async(self):
+ provider = self._build_async_provider()
+ mock_resp = _make_mock_response("async answer")
+ provider.async_client.chat.completions.create = AsyncMock(
+ return_value=mock_resp
+ )
+
+ request = Request(
+ messages=[{"role": "user", "content": "async test"}],
+ config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1),
+ )
+ result = asyncio.get_event_loop().run_until_complete(
+ provider.evaluate_async(request)
+ )
+ assert isinstance(result, Response)
+ assert result.content == "async answer"
+
+ def test_evaluate_async_strips_think_tags(self):
+ provider = self._build_async_provider()
+ mock_resp = _make_mock_response("stepsDone")
+ provider.async_client.chat.completions.create = AsyncMock(
+ return_value=mock_resp
+ )
+
+ request = Request(
+ messages=[{"role": "user", "content": "think"}],
+ config=ServerConfig(model_name="MiniMax-M2.7", num_retries=1),
+ )
+ result = asyncio.get_event_loop().run_until_complete(
+ provider.evaluate_async(request)
+ )
+ assert result.content == "Done"
+
+
+# ============================================================================
+# ProviderFactory registration
+# ============================================================================
+
+
+class TestProviderFactoryMiniMax:
+ def test_minimax_registered(self):
+ assert "minimax" in ProviderFactory._provider_classes
+
+ def test_async_minimax_registered(self):
+ assert "async_minimax" in ProviderFactory._provider_classes
+
+ def test_create_minimax_provider(self):
+ with patch.dict(os.environ, {"MINIMAX_API_KEY": "k"}, clear=False):
+ provider = ProviderFactory.create_provider(
+ api_type="minimax",
+ config=ServerConfig(model_name="MiniMax-M2.7"),
+ )
+ assert isinstance(provider, MiniMaxProvider)
+
+ def test_create_async_minimax_provider(self):
+ from lmms_eval.llm_judge.providers.async_minimax import AsyncMiniMaxProvider
+
+ with patch.dict(os.environ, {"MINIMAX_API_KEY": "k"}, clear=False):
+ provider = ProviderFactory.create_provider(
+ api_type="async_minimax",
+ config=ServerConfig(model_name="MiniMax-M2.7"),
+ )
+ assert isinstance(provider, AsyncMiniMaxProvider)
+
+ def test_env_api_type_minimax(self):
+ with patch.dict(
+ os.environ,
+ {"API_TYPE": "minimax", "MINIMAX_API_KEY": "k"},
+ clear=False,
+ ):
+ provider = ProviderFactory.create_provider(
+ config=ServerConfig(model_name="MiniMax-M2.7")
+ )
+ assert isinstance(provider, MiniMaxProvider)
+
+
+# ============================================================================
+# Integration tests (skipped without MINIMAX_API_KEY)
+# ============================================================================
+
+
+@pytest.mark.skipif(
+ not os.environ.get("MINIMAX_API_KEY"),
+ reason="MINIMAX_API_KEY not set",
+)
+class TestMiniMaxIntegration:
+ """Live integration tests against the real MiniMax API."""
+
+ def test_live_evaluate(self):
+ config = ServerConfig(
+ model_name="MiniMax-M2.7",
+ temperature=0.0,
+ max_tokens=256,
+ num_retries=2,
+ )
+ provider = MiniMaxProvider(config=config)
+ request = Request(
+ messages=[{"role": "user", "content": "Reply with exactly: hello"}],
+ config=config,
+ )
+ result = provider.evaluate(request)
+ assert isinstance(result, Response)
+ assert result.content # non-empty
+ assert result.model_used
+
+ def test_live_json_response(self):
+ config = ServerConfig(
+ model_name="MiniMax-M2.7",
+ temperature=0.0,
+ max_tokens=256,
+ response_format="json",
+ num_retries=2,
+ )
+ provider = MiniMaxProvider(config=config)
+ request = Request(
+ messages=[
+ {
+ "role": "user",
+ "content": 'Return a JSON object: {"score": 5}',
+ }
+ ],
+ config=config,
+ )
+ result = provider.evaluate(request)
+ assert "score" in result.content
+
+ def test_live_async_evaluate(self):
+ from lmms_eval.llm_judge.providers.async_minimax import AsyncMiniMaxProvider
+
+ config = ServerConfig(
+ model_name="MiniMax-M2.7",
+ temperature=0.0,
+ max_tokens=256,
+ num_retries=2,
+ )
+ provider = AsyncMiniMaxProvider(config=config)
+ request = Request(
+ messages=[{"role": "user", "content": "Reply with exactly: world"}],
+ config=config,
+ )
+ result = asyncio.get_event_loop().run_until_complete(
+ provider.evaluate_async(request)
+ )
+ assert isinstance(result, Response)
+ assert result.content