Skip to content

Commit 72fb77c

Browse files
feat(pricing): add prod pricing and rate limits for documented models
Sync prod_model_price.json and prod_rate_limit.json with the model catalog documented in api-gateway-docs and the active blockchain registry. Adds entries (including :web and capitalized registry-ID variants) for: glm-5.1, glm-5.1-non-thinking, kimi-k2.6, MiniMax-M2.7, deepseek-v4-pro, deepseek-v4-flash, plus :web/casing aliases for existing models so every registered blockchain ID resolves without falling back to the default price. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e684c69 commit 72fb77c

2 files changed

Lines changed: 64 additions & 1 deletion

File tree

models/prod_model_price.json

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,38 +2,78 @@
22
"default_input_price_per_million": "0.50",
33
"default_output_price_per_million": "2.00",
44
"models": {
5+
"glm-5.1": { "input": "1.50", "output": "5.00" },
6+
"glm-5.1:web": { "input": "1.50", "output": "5.00" },
7+
"GLM-5.1": { "input": "1.50", "output": "5.00" },
8+
"glm-5.1-non-thinking": { "input": "1.50", "output": "5.00" },
9+
"glm-5.1-non-thinking:web": { "input": "1.50", "output": "5.00" },
10+
511
"glm-5": { "input": "1.00", "output": "3.20" },
12+
"glm-5:web": { "input": "1.00", "output": "3.20" },
13+
"GLM-5": { "input": "1.00", "output": "3.20" },
614
"glm-4.7": { "input": "0.50", "output": "2.25" },
15+
"glm-4.7:web": { "input": "0.50", "output": "2.25" },
716
"glm-4.7-thinking": { "input": "0.45", "output": "2.00" },
17+
"glm-4.7-thinking:web": { "input": "0.45", "output": "2.00" },
818
"glm-4.7-flash": { "input": "0.10", "output": "0.50" },
19+
"glm-4.7-flash:web": { "input": "0.10", "output": "0.50" },
920

21+
"kimi-k2.6": { "input": "0.50", "output": "3.25" },
22+
"kimi-k2.6:web": { "input": "0.50", "output": "3.25" },
1023
"kimi-k2.5": { "input": "0.60", "output": "3.00" },
24+
"kimi-k2.5:web": { "input": "0.60", "output": "3.00" },
25+
"Kimi-K2.5": { "input": "0.60", "output": "3.00" },
1126
"kimi-k2-thinking": { "input": "0.60", "output": "3.00" },
1227

1328
"gemma-4-31b": { "input": "0.15", "output": "0.40" },
29+
"Gemma-4-31b": { "input": "0.15", "output": "0.40" },
30+
"Gemma-4-31b:web": { "input": "0.15", "output": "0.40" },
1431
"gemma-4-26b-a4b": { "input": "0.15", "output": "0.40" },
32+
"Gemma-4-26b-a4b": { "input": "0.15", "output": "0.40" },
33+
"Gemma-4-26b-a4b:web": { "input": "0.15", "output": "0.40" },
1534

1635
"qwen3-235b": { "input": "0.40", "output": "3.00" },
36+
"qwen3-235b:web": { "input": "0.40", "output": "3.00" },
1737
"qwen-3-235b": { "input": "0.40", "output": "3.00" },
1838
"qwen3-coder-480b-a35b-instruct": { "input": "0.70", "output": "2.80" },
39+
"qwen3-coder-480b-a35b-instruct:web": { "input": "0.70", "output": "2.80" },
1940
"qwen3-coder-480b-a35b": { "input": "0.70", "output": "2.80" },
2041
"qwen3-next-80b": { "input": "0.15", "output": "1.50" },
42+
"qwen3-next-80b:web": { "input": "0.15", "output": "1.50" },
2143
"qwen35-35b-a3b": { "input": "0.30", "output": "1.25" },
44+
"qwen35-35b-a3b:web": { "input": "0.30", "output": "1.25" },
2245
"qwen35-9b": { "input": "0.05", "output": "0.15" },
46+
"qwen35-9b:web": { "input": "0.05", "output": "0.15" },
2347

2448
"arcee-trinity-large-thinking": { "input": "0.30", "output": "1.00" },
49+
"Arcee-Trinity-Large-Thinking": { "input": "0.30", "output": "1.00" },
50+
"Arcee-Trinity-Large-Thinking:web": { "input": "0.30", "output": "1.00" },
2551

2652
"minimax-m2.5": { "input": "0.30", "output": "1.20" },
53+
"MiniMax-M2.5": { "input": "0.30", "output": "1.20" },
54+
"MiniMax-M2.5:web": { "input": "0.30", "output": "1.20" },
55+
"MiniMax-M2.7": { "input": "0.35", "output": "1.50" },
56+
"MiniMax-M2.7:web": { "input": "0.35", "output": "1.50" },
57+
58+
"deepseek-v4-pro": { "input": "1.60", "output": "3.50" },
59+
"deepseek-v4-pro:web": { "input": "1.60", "output": "3.50" },
60+
"deepseek-v4-flash": { "input": "0.15", "output": "0.30" },
61+
"deepseek-v4-flash:web": { "input": "0.15", "output": "0.30" },
2762

2863
"gpt-oss-120b": { "input": "0.07", "output": "0.28" },
64+
"gpt-oss-120b:web": { "input": "0.07", "output": "0.28" },
2965

3066
"hermes-3-llama-3.1-405b": { "input": "1.00", "output": "3.00" },
67+
"hermes-3-llama-3.1-405b:web": { "input": "1.00", "output": "3.00" },
3168
"llama-3.3-70b": { "input": "0.70", "output": "2.50" },
69+
"llama-3.3-70b:web": { "input": "0.70", "output": "2.50" },
3270
"llama-3-3-70b": { "input": "0.70", "output": "2.50" },
3371
"llama-3.2-3b": { "input": "0.10", "output": "0.50" },
72+
"llama-3.2-3b:web": { "input": "0.10", "output": "0.50" },
3473
"llama-3-2-3b": { "input": "0.10", "output": "0.50" },
3574

3675
"mistral-31-24b": { "input": "0.50", "output": "2.00" },
76+
"mistral-31-24b:web": { "input": "0.50", "output": "2.00" },
3777
"mistral-small-24b": { "input": "0.50", "output": "2.00" },
3878

3979
"venice-uncensored": { "input": "0.20", "output": "0.90" },

models/prod_rate_limit.json

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,9 @@
3838
"qwen35-9b",
3939
"qwen35-9b:web",
4040
"venice-uncensored",
41-
"venice-uncensored:web"
41+
"venice-uncensored:web",
42+
"deepseek-v4-flash",
43+
"deepseek-v4-flash:web"
4244
],
4345
"priority": 50,
4446
"description": "Medium models with moderate limits"
@@ -50,6 +52,12 @@
5052
"models": [
5153
"glm-5",
5254
"glm-5:web",
55+
"GLM-5",
56+
"glm-5.1",
57+
"glm-5.1:web",
58+
"GLM-5.1",
59+
"glm-5.1-non-thinking",
60+
"glm-5.1-non-thinking:web",
5361
"glm-4.7",
5462
"glm-4.7:web",
5563
"glm-4.7-thinking",
@@ -58,20 +66,35 @@
5866
"glm-4.7-flash:web",
5967
"kimi-k2.5",
6068
"kimi-k2.5:web",
69+
"Kimi-K2.5",
70+
"kimi-k2.6",
71+
"kimi-k2.6:web",
6172
"kimi-k2-thinking",
6273
"kimi-k2-thinking:web",
6374
"gemma-4-31b",
6475
"gemma-4-31b:web",
76+
"Gemma-4-31b",
77+
"Gemma-4-31b:web",
6578
"gemma-4-26b-a4b",
6679
"gemma-4-26b-a4b:web",
80+
"Gemma-4-26b-a4b",
81+
"Gemma-4-26b-a4b:web",
6782
"arcee-trinity-large-thinking",
6883
"arcee-trinity-large-thinking:web",
84+
"Arcee-Trinity-Large-Thinking",
85+
"Arcee-Trinity-Large-Thinking:web",
6986
"qwen3-235b",
7087
"qwen3-235b:web",
7188
"qwen3-coder-480b-a35b-instruct",
7289
"qwen3-coder-480b-a35b-instruct:web",
7390
"minimax-m2.5",
7491
"minimax-m2.5:web",
92+
"MiniMax-M2.5",
93+
"MiniMax-M2.5:web",
94+
"MiniMax-M2.7",
95+
"MiniMax-M2.7:web",
96+
"deepseek-v4-pro",
97+
"deepseek-v4-pro:web",
7598
"gpt-oss-120b",
7699
"gpt-oss-120b:web",
77100
"hermes-3-llama-3.1-405b",

0 commit comments

Comments
 (0)