add DeepSeek V4 support

rangehow · rangehow · commit 310a40b9d56b · 2026-04-24T16:11:15.000+08:00
diff --git a/bootstrap.py b/bootstrap.py
@@ -870,7 +870,7 @@ def _pip_install(packages: list[str]) -> tuple[bool, str]:
   openai:     { url: 'https://api.openai.com/v1',   model: 'gpt-5.4' },
   anthropic:  { url: 'https://api.anthropic.com/v1', model: 'claude-opus-4-7' },
   bedrock:    { url: 'https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1', model: 'us.anthropic.claude-opus-4-7-v1:0' },
-  deepseek:   { url: 'https://api.deepseek.com/v1',  model: 'deepseek-chat' },
+  deepseek:   { url: 'https://api.deepseek.com/v1',  model: 'deepseek-v4-flash' },
   openrouter: { url: 'https://openrouter.ai/api/v1', model: 'anthropic/claude-sonnet-4.6' },
   custom:     { url: '',                              model: '' },
 };
diff --git a/lib/llm_dispatch/config.py b/lib/llm_dispatch/config.py
@@ -140,6 +140,9 @@ def is_model_cheap(model_id: str, fallback_cost_per_1k: float = None,
     'gpt-4.1-nano':                  {'caps': {'text', 'cheap'},                   'rpm': 200, 'latency': 1000, 'cost': 0.001},
 
     # ── DeepSeek ──
+    # V4 family (Apr 2026) — 1M ctx, dual Thinking / Non-Thinking; pro=1.6T/49B, flash=284B/13B.
+    'deepseek-v4-pro':               {'caps': {'text', 'thinking', 'cheap'},      'rpm': 30,  'latency': 3000, 'cost': 0.003},
+    'deepseek-v4-flash':             {'caps': {'text', 'thinking', 'cheap'},      'rpm': 60,  'latency': 2000, 'cost': 0.0002},
     'deepseek-chat':                 {'caps': {'text', 'cheap'},                  'rpm': 60,  'latency': 2000, 'cost': 0.001},
     'deepseek-v3.2':                 {'caps': {'text', 'cheap'},                  'rpm': 60,  'latency': 2000, 'cost': 0.001},
     'deepseek-v3.2-tencent':         {'caps': {'text', 'cheap'},                  'rpm': 60,  'latency': 2000, 'cost': 0.001},
diff --git a/lib/llm_dispatch/discovery.py b/lib/llm_dispatch/discovery.py
@@ -555,7 +555,9 @@ def _probe_balance_url(base_url: str, api_key: str) -> str:
     (re.compile(r'longcat', re.I),                'enable_thinking'),
     # GLM (Zhipu AI): thinking.type format
     (re.compile(r'glm', re.I),                    'thinking_type'),
-    # DeepSeek: no thinking param needed (separate model)
+    # DeepSeek V4 (Apr 2026) uses thinking.type = "enabled" (dual-mode API).
+    (re.compile(r'deepseek-v4', re.I),            'thinking_type'),
+    # DeepSeek V3 reasoner was a separate model — no thinking param needed.
     (re.compile(r'deepseek-reasoner', re.I),      'none'),
 ]
 
diff --git a/lib/pricing.py b/lib/pricing.py
@@ -60,6 +60,10 @@
     'gpt-4o-mini':               {'input': 0.15,  'output': 0.6,   'cacheWriteMul': 1.00, 'cacheReadMul': 0.50, 'name': 'GPT-4o Mini'},
     'gpt-4-turbo':               {'input': 10.0,  'output': 30.0,  'cacheWriteMul': 1.00, 'cacheReadMul': 0.50, 'name': 'GPT-4 Turbo'},
     'deepseek-chat':             {'input': 0.27,  'output': 1.10,  'cacheWriteMul': 1.00, 'cacheReadMul': 0.10, 'name': 'DeepSeek V3'},
+    # DeepSeek V4 (2026-04-24) — both models have 1M ctx, dual Thinking / Non-Thinking modes.
+    # cacheReadMul derived from disclosed cache-hit pricing: Pro $0.145 / $1.74 ≈ 0.083, Flash $0.028 / $0.14 = 0.20.
+    'deepseek-v4-pro':           {'input': 1.74,  'output': 3.48,  'cacheWriteMul': 1.00, 'cacheReadMul': 0.083, 'name': 'DeepSeek V4 Pro'},
+    'deepseek-v4-flash':         {'input': 0.14,  'output': 0.28,  'cacheWriteMul': 1.00, 'cacheReadMul': 0.20,  'name': 'DeepSeek V4 Flash'},
     'deepseek-v3.2':             {'input': 0.28,  'output': 0.41,  'cacheWriteMul': 1.00, 'cacheReadMul': 0.10, 'name': 'DeepSeek V3.2'},  # ¥2/¥3 per 1M
     # DeepSeek V3.2 mirrors on YourProvider gateway — tiered ¥2/¥4 input, ¥4/¥6 output at 32K (cheapest tier in USD)
     'deepseek-v3.2-tencent':     {'input': 0.28,  'output': 0.55,  'cacheWriteMul': 1.00, 'cacheReadMul': 0.10, 'name': 'DeepSeek V3.2 (Tencent)'},  # ¥2/¥4 per 1M ≤32K
diff --git a/lib/swarm/registry.py b/lib/swarm/registry.py
@@ -57,9 +57,9 @@
         'heavy':    'qwen3-235b-a22b',
     },
     'deepseek': {
-        'light':    'deepseek-chat',
-        'standard': 'deepseek-chat',
-        'heavy':    'deepseek-reasoner',
+        'light':    'deepseek-v4-flash',
+        'standard': 'deepseek-v4-flash',
+        'heavy':    'deepseek-v4-pro',
     },
     'gemini': {
         'light':    'gemini-2.0-flash',
diff --git a/static/js/settings.js b/static/js/settings.js
@@ -175,9 +175,13 @@ const _PROVIDER_TEMPLATES = [
     name: 'DeepSeek',
     base_url: 'https://api.deepseek.com',
     balance_url: 'https://api.deepseek.com/user/balance',
+    // V4 (Apr 2026): 1M ctx, dual Thinking / Non-Thinking modes. Legacy deepseek-chat /
+    // deepseek-reasoner retire 2026-07-24 and currently route to deepseek-v4-flash.
     models: [
-      { model_id: 'deepseek-chat',     capabilities: ['text'],              rpm: 60,  cost: 0.001 },
-      { model_id: 'deepseek-reasoner', capabilities: ['text', 'thinking'],  rpm: 30,  cost: 0.002 },
+      { model_id: 'deepseek-v4-pro',   capabilities: ['text', 'thinking', 'cheap'],  rpm: 30,  cost: 0.003 },
+      { model_id: 'deepseek-v4-flash', capabilities: ['text', 'thinking', 'cheap'],  rpm: 60,  cost: 0.0002 },
+      { model_id: 'deepseek-chat',     capabilities: ['text'],                       rpm: 60,  cost: 0.001 },
+      { model_id: 'deepseek-reasoner', capabilities: ['text', 'thinking'],           rpm: 30,  cost: 0.002 },
     ],
   },
   {