feat: upgrade MiniMax default model to M2.7 (#1291)

octo-patch · CaralHsi · web-flow · commit 83ea72eadcde · 2026-03-26T11:11:37.000+08:00
* feat: add MiniMax as a first-class LLM provider Add MiniMax LLM support via the OpenAI-compatible API, following the same pattern as the existing Qwen and DeepSeek providers. Changes: - Add MinimaxLLMConfig with api_key, api_base, extra_body fields - Add MinimaxLLM class inheriting from OpenAILLM - Register minimax backend in LLMFactory and LLMConfigFactory - Add minimax_config() to APIConfig with env var support (MINIMAX_API_KEY, MINIMAX_API_BASE) - Add minimax to backend_model dicts in product/user config - Add MiniMax example scenario in examples/basic_modules/llm.py - Add unit tests for config and LLM (generate, stream, think prefix) - Update .env.example and README with MiniMax provider info MiniMax API: https://api.minimax.io/v1 (OpenAI-compatible) Models: MiniMax-M2.5, MiniMax-M2.5-highspeed (204K context) * feat: upgrade MiniMax default model to M2.7 - Update default model from MiniMax-M2.5 to MiniMax-M2.7 in API config - Update example code to use MiniMax-M2.7 as default with M2.7-highspeed listed - Update unit tests to reference M2.7 and M2.7-highspeed models - Keep all previous models (M2.5, M2.5-highspeed) as available alternatives * fix: Update MemReader configuration with backup support Enhanced MemReader configuration to support backup client and general model. * fix: derive MinimaxLLMConfig from OpenAILLMConfig * Add backup configuration options to test_llm.py * Add backup configuration options to test_llm.py * backup options in test_minimax_config Restored backup configuration options in test_llm.py. --------- Co-authored-by: CaralHsi <caralhsi@gmail.com>
diff --git a/README.md b/README.md
@@ -224,6 +224,7 @@ Full tutorial → [MemOS-Cloud-OpenClaw-Plugin](https://github.com/MemTensor/Mem
 2. Configure `docker/.env.example` and copy to `MemOS/.env`
  - The `OPENAI_API_KEY`,`MOS_EMBEDDER_API_KEY`,`MEMRADER_API_KEY` and others can be applied for through [`BaiLian`](https://bailian.console.aliyun.com/?spm=a2c4g.11186623.0.0.2f2165b08fRk4l&tab=api#/api).
  - Fill in the corresponding configuration in the `MemOS/.env` file.
+ - Supported LLM providers: **OpenAI**, **Azure OpenAI**, **Qwen (DashScope)**, **DeepSeek**, **MiniMax**, **Ollama**, **HuggingFace**, **vLLM**. Set `MOS_CHAT_MODEL_PROVIDER` to select the backend (e.g., `openai`, `qwen`, `deepseek`, `minimax`).
 3. Start the service.
 
 - Launch via Docker
diff --git a/docker/.env.example b/docker/.env.example
@@ -25,9 +25,12 @@ MOS_MAX_TOKENS=2048
 # Top-P for LLM in the Product API
 MOS_TOP_P=0.9
 # LLM for the Product API backend
-MOS_CHAT_MODEL_PROVIDER=openai             # openai | huggingface | vllm
+MOS_CHAT_MODEL_PROVIDER=openai             # openai | huggingface | vllm | minimax
 OPENAI_API_KEY=sk-xxx                      # [required] when provider=openai
 OPENAI_API_BASE=https://api.openai.com/v1  # [required] base for the key
+# MiniMax LLM (when provider=minimax)
+# MINIMAX_API_KEY=your-minimax-api-key     # [required] when provider=minimax
+# MINIMAX_API_BASE=https://api.minimax.io/v1  # base for MiniMax API
 
 ## MemReader / retrieval LLM
 MEMRADER_MODEL=gpt-4o-mini
diff --git a/examples/basic_modules/llm.py b/examples/basic_modules/llm.py
@@ -164,7 +164,37 @@
 print("Scenario 6:", resp)
 
 
-# Scenario 7: Using LLMFactory with Deepseek-chat + reasoning + CoT + streaming
+# Scenario 7: Using LLMFactory with MiniMax (OpenAI-compatible API)
+# Prerequisites:
+# 1. Get your API key from the MiniMax platform.
+# 2. Available models: MiniMax-M2.7 (flagship), MiniMax-M2.7-highspeed (low-latency),
+#    MiniMax-M2.5, MiniMax-M2.5-highspeed.
+
+cfg_mm = LLMConfigFactory.model_validate(
+    {
+        "backend": "minimax",
+        "config": {
+            "model_name_or_path": "MiniMax-M2.7",
+            "api_key": "your-minimax-api-key",
+            "api_base": "https://api.minimax.io/v1",
+            "temperature": 0.7,
+            "max_tokens": 1024,
+        },
+    }
+)
+llm = LLMFactory.from_config(cfg_mm)
+messages = [{"role": "user", "content": "Hello, who are you"}]
+resp = llm.generate(messages)
+print("Scenario 7:", resp)
+print("==" * 20)
+
+print("Scenario 7 (streaming):\n")
+for chunk in llm.generate_stream(messages):
+    print(chunk, end="")
+print("\n" + "==" * 20)
+
+
+# Scenario 8: Using LLMFactory with DeepSeek-chat + reasoning + CoT + streaming
 
 cfg2 = LLMConfigFactory.model_validate(
     {
@@ -186,7 +216,7 @@
         "content": "Explain how to solve this problem step-by-step. Be explicit in your thinking process. Question: If a train travels from city A to city B at 60 mph and returns at 40 mph, what is its average speed for the entire trip? Let's think step by step.",
     },
 ]
-print("Scenario 7:\n")
+print("Scenario 8:\n")
 for chunk in llm.generate_stream(messages):
     print(chunk, end="")
 print("==" * 20)
diff --git a/src/memos/api/config.py b/src/memos/api/config.py
@@ -284,6 +284,20 @@ def qwen_config() -> dict[str, Any]:
             "remove_think_prefix": True,
         }
 
+    @staticmethod
+    def minimax_config() -> dict[str, Any]:
+        """Get MiniMax configuration."""
+        return {
+            "model_name_or_path": os.getenv("MOS_CHAT_MODEL", "MiniMax-M2.7"),
+            "temperature": float(os.getenv("MOS_CHAT_TEMPERATURE", "0.8")),
+            "max_tokens": int(os.getenv("MOS_MAX_TOKENS", "8000")),
+            "top_p": float(os.getenv("MOS_TOP_P", "0.9")),
+            "top_k": int(os.getenv("MOS_TOP_K", "50")),
+            "remove_think_prefix": True,
+            "api_key": os.getenv("MINIMAX_API_KEY", "your-api-key-here"),
+            "api_base": os.getenv("MINIMAX_API_BASE", "https://api.minimax.io/v1"),
+        }
+
     @staticmethod
     def vllm_config() -> dict[str, Any]:
         """Get Qwen configuration."""
@@ -901,12 +915,14 @@ def get_product_default_config() -> dict[str, Any]:
         openai_config = APIConfig.get_openai_config()
         qwen_config = APIConfig.qwen_config()
         vllm_config = APIConfig.vllm_config()
+        minimax_config = APIConfig.minimax_config()
         reader_config = APIConfig.get_reader_config()
 
         backend_model = {
             "openai": openai_config,
             "huggingface": qwen_config,
             "vllm": vllm_config,
+            "minimax": minimax_config,
         }
         backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai")
         mysql_config = APIConfig.get_mysql_config()
@@ -1024,13 +1040,15 @@ def create_user_config(user_name: str, user_id: str) -> tuple["MOSConfig", "Gene
         openai_config = APIConfig.get_openai_config()
         qwen_config = APIConfig.qwen_config()
         vllm_config = APIConfig.vllm_config()
+        minimax_config = APIConfig.minimax_config()
         mysql_config = APIConfig.get_mysql_config()
         reader_config = APIConfig.get_reader_config()
         backend = os.getenv("MOS_CHAT_MODEL_PROVIDER", "openai")
         backend_model = {
             "openai": openai_config,
             "huggingface": qwen_config,
             "vllm": vllm_config,
+            "minimax": minimax_config,
         }
         # Create MOSConfig
         config_dict = {
diff --git a/src/memos/configs/llm.py b/src/memos/configs/llm.py
@@ -72,6 +72,15 @@ class DeepSeekLLMConfig(OpenAILLMConfig):
     )
 
 
+class MinimaxLLMConfig(OpenAILLMConfig):
+    api_key: str = Field(..., description="API key for MiniMax")
+    api_base: str = Field(
+        default="https://api.minimax.io/v1",
+        description="Base URL for MiniMax OpenAI-compatible API",
+    )
+    extra_body: Any = Field(default=None, description="Extra options for API")
+
+
 class AzureLLMConfig(BaseLLMConfig):
     base_url: str = Field(
         default="https://api.openai.azure.com/",
@@ -146,6 +155,7 @@ class LLMConfigFactory(BaseConfig):
         "huggingface_singleton": HFLLMConfig,  # Add singleton support
         "qwen": QwenLLMConfig,
         "deepseek": DeepSeekLLMConfig,
+        "minimax": MinimaxLLMConfig,
         "openai_new": OpenAIResponsesLLMConfig,
     }
 
diff --git a/src/memos/llms/factory.py b/src/memos/llms/factory.py
@@ -4,6 +4,7 @@
 from memos.llms.base import BaseLLM
 from memos.llms.deepseek import DeepSeekLLM
 from memos.llms.hf import HFLLM
+from memos.llms.minimax import MinimaxLLM
 from memos.llms.hf_singleton import HFSingletonLLM
 from memos.llms.ollama import OllamaLLM
 from memos.llms.openai import AzureLLM, OpenAILLM
@@ -25,6 +26,7 @@ class LLMFactory(BaseLLM):
         "vllm": VLLMLLM,
         "qwen": QwenLLM,
         "deepseek": DeepSeekLLM,
+        "minimax": MinimaxLLM,
         "openai_new": OpenAIResponsesLLM,
     }
 
diff --git a/src/memos/llms/minimax.py b/src/memos/llms/minimax.py
@@ -0,0 +1,13 @@
+from memos.configs.llm import MinimaxLLMConfig
+from memos.llms.openai import OpenAILLM
+from memos.log import get_logger
+
+
+logger = get_logger(__name__)
+
+
+class MinimaxLLM(OpenAILLM):
+    """MiniMax LLM class via OpenAI-compatible API."""
+
+    def __init__(self, config: MinimaxLLMConfig):
+        super().__init__(config)
diff --git a/tests/configs/test_llm.py b/tests/configs/test_llm.py
@@ -2,6 +2,7 @@
     BaseLLMConfig,
     HFLLMConfig,
     LLMConfigFactory,
+    MinimaxLLMConfig,
     OllamaLLMConfig,
     OpenAILLMConfig,
 )
@@ -145,6 +146,42 @@ def test_hf_llm_config():
     check_config_instantiation_invalid(HFLLMConfig)
 
 
+def test_minimax_llm_config():
+    check_config_base_class(
+        MinimaxLLMConfig,
+        required_fields=["model_name_or_path", "api_key"],
+        optional_fields=[
+            "temperature",
+            "max_tokens",
+            "top_p",
+            "top_k",
+            "api_base",
+            "remove_think_prefix",
+            "extra_body",
+            "default_headers",
+            "backup_client",
+            "backup_api_key",
+            "backup_api_base",
+            "backup_model_name_or_path",
+            "backup_headers",
+        ],
+    )
+
+    check_config_instantiation_valid(
+        MinimaxLLMConfig,
+        {
+            "model_name_or_path": "MiniMax-M2.7",
+            "api_key": "test-key",
+            "api_base": "https://api.minimax.io/v1",
+            "temperature": 0.7,
+            "max_tokens": 1024,
+            "top_p": 0.9,
+        },
+    )
+
+    check_config_instantiation_invalid(MinimaxLLMConfig)
+
+
 def test_llm_config_factory():
     check_config_factory_class(
         LLMConfigFactory,
diff --git a/tests/llms/test_minimax.py b/tests/llms/test_minimax.py
@@ -0,0 +1,114 @@
+import unittest
+
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+from memos.configs.llm import MinimaxLLMConfig
+from memos.llms.minimax import MinimaxLLM
+
+
+class TestMinimaxLLM(unittest.TestCase):
+    def test_minimax_llm_generate_with_and_without_think_prefix(self):
+        """Test MinimaxLLM generate method with and without <think> tag removal."""
+
+        # Simulated full content including <think> tag
+        full_content = "Hello from MiniMax!"
+        reasoning_content = "Thinking in progress..."
+
+        # Mock response object
+        mock_response = MagicMock()
+        mock_response.model_dump_json.return_value = '{"mock": "true"}'
+        mock_response.choices[0].message.content = full_content
+        mock_response.choices[0].message.reasoning_content = reasoning_content
+
+        # Config with think prefix preserved
+        config_with_think = MinimaxLLMConfig.model_validate(
+            {
+                "model_name_or_path": "MiniMax-M2.7",
+                "temperature": 0.7,
+                "max_tokens": 512,
+                "top_p": 0.9,
+                "api_key": "sk-test",
+                "api_base": "https://api.minimax.io/v1",
+                "remove_think_prefix": False,
+            }
+        )
+        llm_with_think = MinimaxLLM(config_with_think)
+        llm_with_think.client.chat.completions.create = MagicMock(return_value=mock_response)
+
+        output_with_think = llm_with_think.generate([{"role": "user", "content": "Hello"}])
+        self.assertEqual(output_with_think, f"<think>{reasoning_content}</think>{full_content}")
+
+        # Config with think tag removed
+        config_without_think = config_with_think.model_copy(update={"remove_think_prefix": True})
+        llm_without_think = MinimaxLLM(config_without_think)
+        llm_without_think.client.chat.completions.create = MagicMock(return_value=mock_response)
+
+        output_without_think = llm_without_think.generate([{"role": "user", "content": "Hello"}])
+        self.assertEqual(output_without_think, full_content)
+
+    def test_minimax_llm_generate_stream(self):
+        """Test MinimaxLLM generate_stream with content chunks."""
+
+        def make_chunk(delta_dict):
+            # Create a simulated stream chunk with delta fields
+            delta = SimpleNamespace(**delta_dict)
+            choice = SimpleNamespace(delta=delta)
+            return SimpleNamespace(choices=[choice])
+
+        # Simulate chunks: content only (MiniMax standard response)
+        mock_stream_chunks = [
+            make_chunk({"content": "Hello"}),
+            make_chunk({"content": ", "}),
+            make_chunk({"content": "MiniMax!"}),
+        ]
+
+        mock_chat_completions_create = MagicMock(return_value=iter(mock_stream_chunks))
+
+        config = MinimaxLLMConfig.model_validate(
+            {
+                "model_name_or_path": "MiniMax-M2.7",
+                "temperature": 0.7,
+                "max_tokens": 512,
+                "top_p": 0.9,
+                "api_key": "sk-test",
+                "api_base": "https://api.minimax.io/v1",
+                "remove_think_prefix": False,
+            }
+        )
+        llm = MinimaxLLM(config)
+        llm.client.chat.completions.create = mock_chat_completions_create
+
+        messages = [{"role": "user", "content": "Say hello"}]
+        streamed = list(llm.generate_stream(messages))
+        full_output = "".join(streamed)
+
+        self.assertEqual(full_output, "Hello, MiniMax!")
+
+    def test_minimax_llm_config_defaults(self):
+        """Test MinimaxLLMConfig default values."""
+        config = MinimaxLLMConfig.model_validate(
+            {
+                "model_name_or_path": "MiniMax-M2.7",
+                "api_key": "sk-test",
+            }
+        )
+        self.assertEqual(config.api_base, "https://api.minimax.io/v1")
+        self.assertEqual(config.temperature, 0.7)
+        self.assertEqual(config.max_tokens, 8192)
+
+    def test_minimax_llm_config_custom_values(self):
+        """Test MinimaxLLMConfig with custom values."""
+        config = MinimaxLLMConfig.model_validate(
+            {
+                "model_name_or_path": "MiniMax-M2.7-highspeed",
+                "api_key": "sk-test",
+                "api_base": "https://custom.api.minimax.io/v1",
+                "temperature": 0.5,
+                "max_tokens": 2048,
+            }
+        )
+        self.assertEqual(config.model_name_or_path, "MiniMax-M2.7-highspeed")
+        self.assertEqual(config.api_base, "https://custom.api.minimax.io/v1")
+        self.assertEqual(config.temperature, 0.5)
+        self.assertEqual(config.max_tokens, 2048)