Skip to content

Commit be643b2

Browse files
feat(ai-gateway): add Qwen3.6 Flash, Max Preview, and 27B with tiered pricing (#2984)
* feat(ai-gateway): add remaining qwen3.6 models Add Qwen3.6 Flash, 35B A3B, Max Preview, and 27B models from OpenRouter's catalog alongside the existing Qwen3.6 Plus model. * refactor(qwen): tiered pricing helper and wire up 3.6 Flash/Max/27B - Drop qwen3.6-35b-a3b (never shipped). - Extract makeTieredPricing / makeFlatPricing helpers that apply the 35% Kilo discount from pre-discount Alibaba Model Studio prices. - Refactor qwen3.6-plus onto the tiered helper (behaviour unchanged). - Add qwen3.6-flash and qwen3.6-max-preview with tiered brackets matching the Model Studio pricing page. - Add qwen3.6-27b with a flat price (no tiers advertised). - Register the new models in kiloExclusiveModels / preferredModels and teach model-settings about the whole qwen3.6 family (binary reasoning variants, alibaba ai_sdk_provider). - Cover the new tier boundaries in processUsage.calculatKiloExclusiveCost tests. * refactor(qwen): export qwen36_models array and drop new models from preferredModels * refactor(qwen): colocate qwen36ModelIds and derive PricePerMillion via Omit * refactor(qwen): drop export from flash/max-preview/27b model definitions * Revert "refactor(qwen): drop export from flash/max-preview/27b model definitions" This reverts commit 5bd22d0. * chore: apply oxfmt formatting * refactor(qwen): rename to alibabaDirectModels and isAlibabaDirectModel --------- Co-authored-by: kiloconnect[bot] <240665456+kiloconnect[bot]@users.noreply.github.com>
1 parent da0a1ca commit be643b2

4 files changed

Lines changed: 421 additions & 42 deletions

File tree

apps/web/src/lib/ai-gateway/models.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import {
2323
import { KIMI_CURRENT_MODEL_ID } from '@/lib/ai-gateway/providers/moonshotai';
2424
import { morph_warp_grep_free_model } from '@/lib/ai-gateway/providers/morph';
2525
import { gemma_4_26b_a4b_it_free_model } from '@/lib/ai-gateway/providers/google';
26-
import { qwen36_plus_model } from '@/lib/ai-gateway/providers/qwen';
26+
import { alibabaDirectModels, qwen36_plus_model } from '@/lib/ai-gateway/providers/qwen';
2727
import { stepfun_35_flash_free_model } from '@/lib/ai-gateway/providers/stepfun';
2828
import {
2929
grok_code_fast_1_optimized_free_model,
@@ -84,7 +84,7 @@ export const kiloExclusiveModels = [
8484
morph_warp_grep_free_model,
8585
grok_code_fast_1_optimized_free_model,
8686
seed_20_code_free_model,
87-
qwen36_plus_model,
87+
...alibabaDirectModels,
8888
trinity_large_thinking_free_model,
8989
claude_sonnet_clawsetup_model,
9090
stepfun_35_flash_free_model,

apps/web/src/lib/ai-gateway/processUsage.calculatKiloExclusiveCost.test.ts

Lines changed: 226 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,22 @@ import { test, describe, expect } from '@jest/globals';
22
import { calculateKiloExclusiveCost_mUsd } from './processUsage';
33
import type { JustTheCostsUsageStats } from './processUsage.types';
44
import type { KiloExclusiveModel } from '@/lib/ai-gateway/providers/kilo-exclusive-model';
5-
import { qwen36_plus_model } from '@/lib/ai-gateway/providers/qwen';
5+
import {
6+
qwen36_27b_model,
7+
qwen36_flash_model,
8+
qwen36_max_preview_model,
9+
qwen36_plus_model,
10+
} from '@/lib/ai-gateway/providers/qwen';
11+
12+
const makeUsage = (overrides: Partial<JustTheCostsUsageStats> = {}): JustTheCostsUsageStats => ({
13+
cost_mUsd: 0,
14+
inputTokens: 0,
15+
outputTokens: 0,
16+
cacheWriteTokens: 0,
17+
cacheHitTokens: 0,
18+
is_byok: false,
19+
...overrides,
20+
});
621

722
describe('calculatKiloExclusiveCost_mUsd with qwen3.6-plus', () => {
823
// Pre-discount prices from Qwen pricing page (35% Kilo discount applied in code):
@@ -15,16 +30,6 @@ describe('calculatKiloExclusiveCost_mUsd with qwen3.6-plus', () => {
1530
// Input: $2/1M → $1.3/1M CacheWrite: $2.5/1M → $1.625/1M
1631
// CacheRead: $0.2/1M → $0.13/1M Output: $6/1M → $3.9/1M
1732

18-
const makeUsage = (overrides: Partial<JustTheCostsUsageStats> = {}): JustTheCostsUsageStats => ({
19-
cost_mUsd: 0,
20-
inputTokens: 0,
21-
outputTokens: 0,
22-
cacheWriteTokens: 0,
23-
cacheHitTokens: 0,
24-
is_byok: false,
25-
...overrides,
26-
});
27-
2833
test('returns 0 when model has no pricing', () => {
2934
const model: KiloExclusiveModel = {
3035
...qwen36_plus_model,
@@ -223,3 +228,213 @@ describe('calculatKiloExclusiveCost_mUsd with qwen3.6-plus', () => {
223228
expect(result).toBe(4_290_000);
224229
});
225230
});
231+
232+
describe('calculatKiloExclusiveCost_mUsd with qwen3.6-flash', () => {
233+
// Pre-discount prices from Qwen pricing page (35% Kilo discount applied in code):
234+
//
235+
// Input<=256k tier:
236+
// Input: $0.25/1M → $0.1625/1M CacheWrite: $0.3125/1M → $0.203125/1M
237+
// CacheRead: $0.025/1M → $0.01625/1M Output: $1.5/1M → $0.975/1M
238+
//
239+
// 256k<Input<=1M tier:
240+
// Input: $1/1M → $0.65/1M CacheWrite: $1.25/1M → $0.8125/1M
241+
// CacheRead: $0.1/1M → $0.065/1M Output: $4/1M → $2.6/1M
242+
243+
test('input-only cost in <=256k tier', () => {
244+
// 1M tokens * 0.1625 = 162_500 mUsd
245+
const result = calculateKiloExclusiveCost_mUsd(
246+
qwen36_flash_model,
247+
makeUsage({ inputTokens: 100_000 })
248+
);
249+
expect(result).toBe(Math.round(100_000 * 0.1625));
250+
});
251+
252+
test('output cost in <=256k tier', () => {
253+
const result = calculateKiloExclusiveCost_mUsd(
254+
qwen36_flash_model,
255+
makeUsage({ outputTokens: 50_000 })
256+
);
257+
expect(result).toBe(Math.round(50_000 * 0.975));
258+
});
259+
260+
test('mixed usage in <=256k tier', () => {
261+
// 100k input, 20k cache hit, 30k cache write → 50k uncached
262+
// total input = 50k + 30k + 20k = 100k (<=256k)
263+
const result = calculateKiloExclusiveCost_mUsd(
264+
qwen36_flash_model,
265+
makeUsage({
266+
inputTokens: 100_000,
267+
outputTokens: 10_000,
268+
cacheHitTokens: 20_000,
269+
cacheWriteTokens: 30_000,
270+
})
271+
);
272+
expect(result).toBe(
273+
Math.round(50_000 * 0.1625 + 10_000 * 0.975 + 20_000 * 0.01625 + 30_000 * 0.203125)
274+
);
275+
});
276+
277+
test('input-only cost in >256k tier', () => {
278+
// 300k uncached input tokens > 256k → tier 2: 300_000 * 0.65
279+
const result = calculateKiloExclusiveCost_mUsd(
280+
qwen36_flash_model,
281+
makeUsage({ inputTokens: 300_000 })
282+
);
283+
expect(result).toBe(Math.round(300_000 * 0.65));
284+
});
285+
286+
test('mixed usage in >256k tier', () => {
287+
// 500k input, 50k cache hit, 100k cache write → 350k uncached
288+
// total input = 500k (>256k)
289+
const result = calculateKiloExclusiveCost_mUsd(
290+
qwen36_flash_model,
291+
makeUsage({
292+
inputTokens: 500_000,
293+
outputTokens: 20_000,
294+
cacheHitTokens: 50_000,
295+
cacheWriteTokens: 100_000,
296+
})
297+
);
298+
expect(result).toBe(
299+
Math.round(350_000 * 0.65 + 20_000 * 2.6 + 50_000 * 0.065 + 100_000 * 0.8125)
300+
);
301+
});
302+
303+
test('tier boundary: exactly 256k total input uses <=256k pricing', () => {
304+
const result = calculateKiloExclusiveCost_mUsd(
305+
qwen36_flash_model,
306+
makeUsage({ inputTokens: 256 * 1024 })
307+
);
308+
expect(result).toBe(Math.round(256 * 1024 * 0.1625));
309+
});
310+
311+
test('tier boundary: 256k+1 total input uses >256k pricing', () => {
312+
const result = calculateKiloExclusiveCost_mUsd(
313+
qwen36_flash_model,
314+
makeUsage({ inputTokens: 256 * 1024 + 1 })
315+
);
316+
expect(result).toBe(Math.round((256 * 1024 + 1) * 0.65));
317+
});
318+
319+
test('1M tokens input cost matches post-discount tier 2 price', () => {
320+
const result = calculateKiloExclusiveCost_mUsd(
321+
qwen36_flash_model,
322+
makeUsage({ inputTokens: 1_000_000 })
323+
);
324+
expect(result).toBe(1_000_000 * 0.65);
325+
});
326+
});
327+
328+
describe('calculatKiloExclusiveCost_mUsd with qwen3.6-max-preview', () => {
329+
// Pre-discount prices from Qwen pricing page (35% Kilo discount applied in code):
330+
//
331+
// Input<=128k tier:
332+
// Input: $1.3/1M → $0.845/1M CacheWrite: $1.625/1M → $1.05625/1M
333+
// CacheRead: $0.13/1M → $0.0845/1M Output: $7.8/1M → $5.07/1M
334+
//
335+
// 128k<Input<=256k tier:
336+
// Input: $2/1M → $1.3/1M CacheWrite: $2.5/1M → $1.625/1M
337+
// CacheRead: $0.2/1M → $0.13/1M Output: $12/1M → $7.8/1M
338+
339+
test('input-only cost in <=128k tier', () => {
340+
const result = calculateKiloExclusiveCost_mUsd(
341+
qwen36_max_preview_model,
342+
makeUsage({ inputTokens: 50_000 })
343+
);
344+
expect(result).toBe(Math.round(50_000 * 0.845));
345+
});
346+
347+
test('output cost in <=128k tier', () => {
348+
const result = calculateKiloExclusiveCost_mUsd(
349+
qwen36_max_preview_model,
350+
makeUsage({ outputTokens: 10_000 })
351+
);
352+
expect(result).toBe(Math.round(10_000 * 5.07));
353+
});
354+
355+
test('mixed usage in <=128k tier', () => {
356+
const result = calculateKiloExclusiveCost_mUsd(
357+
qwen36_max_preview_model,
358+
makeUsage({
359+
inputTokens: 100_000,
360+
outputTokens: 5_000,
361+
cacheHitTokens: 20_000,
362+
cacheWriteTokens: 30_000,
363+
})
364+
);
365+
expect(result).toBe(
366+
Math.round(50_000 * 0.845 + 5_000 * 5.07 + 20_000 * 0.0845 + 30_000 * 1.05625)
367+
);
368+
});
369+
370+
test('input-only cost in >128k tier', () => {
371+
// 200k > 128k → tier 2
372+
const result = calculateKiloExclusiveCost_mUsd(
373+
qwen36_max_preview_model,
374+
makeUsage({ inputTokens: 200_000 })
375+
);
376+
expect(result).toBe(Math.round(200_000 * 1.3));
377+
});
378+
379+
test('tier boundary: exactly 128k total input uses <=128k pricing', () => {
380+
const result = calculateKiloExclusiveCost_mUsd(
381+
qwen36_max_preview_model,
382+
makeUsage({ inputTokens: 128 * 1024 })
383+
);
384+
expect(result).toBe(Math.round(128 * 1024 * 0.845));
385+
});
386+
387+
test('tier boundary: 128k+1 total input uses >128k pricing', () => {
388+
const result = calculateKiloExclusiveCost_mUsd(
389+
qwen36_max_preview_model,
390+
makeUsage({ inputTokens: 128 * 1024 + 1 })
391+
);
392+
expect(result).toBe(Math.round((128 * 1024 + 1) * 1.3));
393+
});
394+
395+
test('256k tokens input cost uses >128k tier', () => {
396+
const result = calculateKiloExclusiveCost_mUsd(
397+
qwen36_max_preview_model,
398+
makeUsage({ inputTokens: 256 * 1024 })
399+
);
400+
expect(result).toBe(Math.round(256 * 1024 * 1.3));
401+
});
402+
});
403+
404+
describe('calculatKiloExclusiveCost_mUsd with qwen3.6-27b', () => {
405+
// Pre-discount prices (35% Kilo discount applied in code):
406+
// Input: $0.5/1M → $0.325/1M Output: $5/1M → $3.25/1M
407+
408+
test('input-only cost', () => {
409+
const result = calculateKiloExclusiveCost_mUsd(
410+
qwen36_27b_model,
411+
makeUsage({ inputTokens: 100_000 })
412+
);
413+
expect(result).toBe(Math.round(100_000 * 0.325));
414+
});
415+
416+
test('output-only cost', () => {
417+
const result = calculateKiloExclusiveCost_mUsd(
418+
qwen36_27b_model,
419+
makeUsage({ outputTokens: 50_000 })
420+
);
421+
expect(result).toBe(Math.round(50_000 * 3.25));
422+
});
423+
424+
test('cache hit falls back to prompt price when cache_read is null', () => {
425+
// no explicit cache_read price → uses prompt_per_million
426+
const result = calculateKiloExclusiveCost_mUsd(
427+
qwen36_27b_model,
428+
makeUsage({ inputTokens: 100_000, cacheHitTokens: 100_000 })
429+
);
430+
expect(result).toBe(Math.round(100_000 * 0.325));
431+
});
432+
433+
test('pricing is flat regardless of input size', () => {
434+
const result = calculateKiloExclusiveCost_mUsd(
435+
qwen36_27b_model,
436+
makeUsage({ inputTokens: 250_000 })
437+
);
438+
expect(result).toBe(Math.round(250_000 * 0.325));
439+
});
440+
});

apps/web/src/lib/ai-gateway/providers/model-settings.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import { isClaudeModel, isOpusModel } from '@/lib/ai-gateway/providers/anthropic
22
import { isGemini3Model, isGemmaModel } from '@/lib/ai-gateway/providers/google';
33
import { isKimiModel } from '@/lib/ai-gateway/providers/moonshotai';
44
import { isOpenAiModel } from '@/lib/ai-gateway/providers/openai';
5-
import { qwen36_plus_model } from '@/lib/ai-gateway/providers/qwen';
5+
import { isAlibabaDirectModel } from '@/lib/ai-gateway/providers/qwen';
66
import { seed_20_code_free_model } from '@/lib/ai-gateway/providers/seed';
77
import { isGrok4Model, isGrokModel } from '@/lib/ai-gateway/providers/xai';
88
import { isGlmModel } from '@/lib/ai-gateway/providers/zai';
@@ -66,7 +66,7 @@ export function getModelVariants(model: string): OpenCodeSettings['variants'] {
6666
if (
6767
isKimiModel(model) ||
6868
isGlmModel(model) ||
69-
model === qwen36_plus_model.public_id ||
69+
isAlibabaDirectModel(model) ||
7070
isGemmaModel(model)
7171
) {
7272
return REASONING_VARIANTS_BINARY;
@@ -97,7 +97,7 @@ export function getModelVariants(model: string): OpenCodeSettings['variants'] {
9797
}
9898

9999
function getAiSdkProvider(model: string): CustomLlmProvider | undefined {
100-
if (qwen36_plus_model.public_id === model) {
100+
if (isAlibabaDirectModel(model)) {
101101
// with 'openai' (Responses) prompt caching doesn't work
102102
// with 'openai-compatible' (Chat Completions) cost is wrong (cache writes are not counted)
103103
return 'alibaba';

0 commit comments

Comments
 (0)