Skip to content

Commit 310a40b

Browse files
committed
add DeepSeek V4 support
1 parent a4216be commit 310a40b

6 files changed

Lines changed: 20 additions & 7 deletions

File tree

bootstrap.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -870,7 +870,7 @@ def _pip_install(packages: list[str]) -> tuple[bool, str]:
870870
openai: { url: 'https://api.openai.com/v1', model: 'gpt-5.4' },
871871
anthropic: { url: 'https://api.anthropic.com/v1', model: 'claude-opus-4-7' },
872872
bedrock: { url: 'https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1', model: 'us.anthropic.claude-opus-4-7-v1:0' },
873-
deepseek: { url: 'https://api.deepseek.com/v1', model: 'deepseek-chat' },
873+
deepseek: { url: 'https://api.deepseek.com/v1', model: 'deepseek-v4-flash' },
874874
openrouter: { url: 'https://openrouter.ai/api/v1', model: 'anthropic/claude-sonnet-4.6' },
875875
custom: { url: '', model: '' },
876876
};

lib/llm_dispatch/config.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,9 @@ def is_model_cheap(model_id: str, fallback_cost_per_1k: float = None,
140140
'gpt-4.1-nano': {'caps': {'text', 'cheap'}, 'rpm': 200, 'latency': 1000, 'cost': 0.001},
141141

142142
# ── DeepSeek ──
143+
# V4 family (Apr 2026) — 1M ctx, dual Thinking / Non-Thinking; pro=1.6T/49B, flash=284B/13B.
144+
'deepseek-v4-pro': {'caps': {'text', 'thinking', 'cheap'}, 'rpm': 30, 'latency': 3000, 'cost': 0.003},
145+
'deepseek-v4-flash': {'caps': {'text', 'thinking', 'cheap'}, 'rpm': 60, 'latency': 2000, 'cost': 0.0002},
143146
'deepseek-chat': {'caps': {'text', 'cheap'}, 'rpm': 60, 'latency': 2000, 'cost': 0.001},
144147
'deepseek-v3.2': {'caps': {'text', 'cheap'}, 'rpm': 60, 'latency': 2000, 'cost': 0.001},
145148
'deepseek-v3.2-tencent': {'caps': {'text', 'cheap'}, 'rpm': 60, 'latency': 2000, 'cost': 0.001},

lib/llm_dispatch/discovery.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,9 @@ def _probe_balance_url(base_url: str, api_key: str) -> str:
555555
(re.compile(r'longcat', re.I), 'enable_thinking'),
556556
# GLM (Zhipu AI): thinking.type format
557557
(re.compile(r'glm', re.I), 'thinking_type'),
558-
# DeepSeek: no thinking param needed (separate model)
558+
# DeepSeek V4 (Apr 2026) uses thinking.type = "enabled" (dual-mode API).
559+
(re.compile(r'deepseek-v4', re.I), 'thinking_type'),
560+
# DeepSeek V3 reasoner was a separate model — no thinking param needed.
559561
(re.compile(r'deepseek-reasoner', re.I), 'none'),
560562
]
561563

lib/pricing.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@
6060
'gpt-4o-mini': {'input': 0.15, 'output': 0.6, 'cacheWriteMul': 1.00, 'cacheReadMul': 0.50, 'name': 'GPT-4o Mini'},
6161
'gpt-4-turbo': {'input': 10.0, 'output': 30.0, 'cacheWriteMul': 1.00, 'cacheReadMul': 0.50, 'name': 'GPT-4 Turbo'},
6262
'deepseek-chat': {'input': 0.27, 'output': 1.10, 'cacheWriteMul': 1.00, 'cacheReadMul': 0.10, 'name': 'DeepSeek V3'},
63+
# DeepSeek V4 (2026-04-24) — both models have 1M ctx, dual Thinking / Non-Thinking modes.
64+
# cacheReadMul derived from disclosed cache-hit pricing: Pro $0.145 / $1.74 ≈ 0.083, Flash $0.028 / $0.14 = 0.20.
65+
'deepseek-v4-pro': {'input': 1.74, 'output': 3.48, 'cacheWriteMul': 1.00, 'cacheReadMul': 0.083, 'name': 'DeepSeek V4 Pro'},
66+
'deepseek-v4-flash': {'input': 0.14, 'output': 0.28, 'cacheWriteMul': 1.00, 'cacheReadMul': 0.20, 'name': 'DeepSeek V4 Flash'},
6367
'deepseek-v3.2': {'input': 0.28, 'output': 0.41, 'cacheWriteMul': 1.00, 'cacheReadMul': 0.10, 'name': 'DeepSeek V3.2'}, # ¥2/¥3 per 1M
6468
# DeepSeek V3.2 mirrors on YourProvider gateway — tiered ¥2/¥4 input, ¥4/¥6 output at 32K (cheapest tier in USD)
6569
'deepseek-v3.2-tencent': {'input': 0.28, 'output': 0.55, 'cacheWriteMul': 1.00, 'cacheReadMul': 0.10, 'name': 'DeepSeek V3.2 (Tencent)'}, # ¥2/¥4 per 1M ≤32K

lib/swarm/registry.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@
5757
'heavy': 'qwen3-235b-a22b',
5858
},
5959
'deepseek': {
60-
'light': 'deepseek-chat',
61-
'standard': 'deepseek-chat',
62-
'heavy': 'deepseek-reasoner',
60+
'light': 'deepseek-v4-flash',
61+
'standard': 'deepseek-v4-flash',
62+
'heavy': 'deepseek-v4-pro',
6363
},
6464
'gemini': {
6565
'light': 'gemini-2.0-flash',

static/js/settings.js

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,9 +175,13 @@ const _PROVIDER_TEMPLATES = [
175175
name: 'DeepSeek',
176176
base_url: 'https://api.deepseek.com',
177177
balance_url: 'https://api.deepseek.com/user/balance',
178+
// V4 (Apr 2026): 1M ctx, dual Thinking / Non-Thinking modes. Legacy deepseek-chat /
179+
// deepseek-reasoner retire 2026-07-24 and currently route to deepseek-v4-flash.
178180
models: [
179-
{ model_id: 'deepseek-chat', capabilities: ['text'], rpm: 60, cost: 0.001 },
180-
{ model_id: 'deepseek-reasoner', capabilities: ['text', 'thinking'], rpm: 30, cost: 0.002 },
181+
{ model_id: 'deepseek-v4-pro', capabilities: ['text', 'thinking', 'cheap'], rpm: 30, cost: 0.003 },
182+
{ model_id: 'deepseek-v4-flash', capabilities: ['text', 'thinking', 'cheap'], rpm: 60, cost: 0.0002 },
183+
{ model_id: 'deepseek-chat', capabilities: ['text'], rpm: 60, cost: 0.001 },
184+
{ model_id: 'deepseek-reasoner', capabilities: ['text', 'thinking'], rpm: 30, cost: 0.002 },
181185
],
182186
},
183187
{

0 commit comments

Comments
 (0)