feat(ai-gateway): add Qwen3.6 Flash, Max Preview, and 27B with tiered pricing (#2984)

kilo-code-bot[bot] · web-flow · commit be643b2f0238 · 2026-05-01T14:35:31.000+02:00
* feat(ai-gateway): add remaining qwen3.6 models Add Qwen3.6 Flash, 35B A3B, Max Preview, and 27B models from OpenRouter's catalog alongside the existing Qwen3.6 Plus model. * refactor(qwen): tiered pricing helper and wire up 3.6 Flash/Max/27B - Drop qwen3.6-35b-a3b (never shipped). - Extract makeTieredPricing / makeFlatPricing helpers that apply the 35% Kilo discount from pre-discount Alibaba Model Studio prices. - Refactor qwen3.6-plus onto the tiered helper (behaviour unchanged). - Add qwen3.6-flash and qwen3.6-max-preview with tiered brackets matching the Model Studio pricing page. - Add qwen3.6-27b with a flat price (no tiers advertised). - Register the new models in kiloExclusiveModels / preferredModels and teach model-settings about the whole qwen3.6 family (binary reasoning variants, alibaba ai_sdk_provider). - Cover the new tier boundaries in processUsage.calculatKiloExclusiveCost tests. * refactor(qwen): export qwen36_models array and drop new models from preferredModels * refactor(qwen): colocate qwen36ModelIds and derive PricePerMillion via Omit * refactor(qwen): drop export from flash/max-preview/27b model definitions * Revert "refactor(qwen): drop export from flash/max-preview/27b model definitions" This reverts commit 5bd22d0. * chore: apply oxfmt formatting * refactor(qwen): rename to alibabaDirectModels and isAlibabaDirectModel --------- Co-authored-by: kiloconnect[bot] <240665456+kiloconnect[bot]@users.noreply.github.com>
diff --git a/apps/web/src/lib/ai-gateway/models.ts b/apps/web/src/lib/ai-gateway/models.ts
@@ -23,7 +23,7 @@ import {
 import { KIMI_CURRENT_MODEL_ID } from '@/lib/ai-gateway/providers/moonshotai';
 import { morph_warp_grep_free_model } from '@/lib/ai-gateway/providers/morph';
 import { gemma_4_26b_a4b_it_free_model } from '@/lib/ai-gateway/providers/google';
-import { qwen36_plus_model } from '@/lib/ai-gateway/providers/qwen';
+import { alibabaDirectModels, qwen36_plus_model } from '@/lib/ai-gateway/providers/qwen';
 import { stepfun_35_flash_free_model } from '@/lib/ai-gateway/providers/stepfun';
 import {
   grok_code_fast_1_optimized_free_model,
@@ -84,7 +84,7 @@ export const kiloExclusiveModels = [
   morph_warp_grep_free_model,
   grok_code_fast_1_optimized_free_model,
   seed_20_code_free_model,
-  qwen36_plus_model,
+  ...alibabaDirectModels,
   trinity_large_thinking_free_model,
   claude_sonnet_clawsetup_model,
   stepfun_35_flash_free_model,
diff --git a/apps/web/src/lib/ai-gateway/processUsage.calculatKiloExclusiveCost.test.ts b/apps/web/src/lib/ai-gateway/processUsage.calculatKiloExclusiveCost.test.ts
@@ -2,7 +2,22 @@ import { test, describe, expect } from '@jest/globals';
 import { calculateKiloExclusiveCost_mUsd } from './processUsage';
 import type { JustTheCostsUsageStats } from './processUsage.types';
 import type { KiloExclusiveModel } from '@/lib/ai-gateway/providers/kilo-exclusive-model';
-import { qwen36_plus_model } from '@/lib/ai-gateway/providers/qwen';
+import {
+  qwen36_27b_model,
+  qwen36_flash_model,
+  qwen36_max_preview_model,
+  qwen36_plus_model,
+} from '@/lib/ai-gateway/providers/qwen';
+
+const makeUsage = (overrides: Partial<JustTheCostsUsageStats> = {}): JustTheCostsUsageStats => ({
+  cost_mUsd: 0,
+  inputTokens: 0,
+  outputTokens: 0,
+  cacheWriteTokens: 0,
+  cacheHitTokens: 0,
+  is_byok: false,
+  ...overrides,
+});
 
 describe('calculatKiloExclusiveCost_mUsd with qwen3.6-plus', () => {
   // Pre-discount prices from Qwen pricing page (35% Kilo discount applied in code):
@@ -15,16 +30,6 @@ describe('calculatKiloExclusiveCost_mUsd with qwen3.6-plus', () => {
   //   Input: $2/1M → $1.3/1M   CacheWrite: $2.5/1M → $1.625/1M
   //   CacheRead: $0.2/1M → $0.13/1M   Output: $6/1M → $3.9/1M
 
-  const makeUsage = (overrides: Partial<JustTheCostsUsageStats> = {}): JustTheCostsUsageStats => ({
-    cost_mUsd: 0,
-    inputTokens: 0,
-    outputTokens: 0,
-    cacheWriteTokens: 0,
-    cacheHitTokens: 0,
-    is_byok: false,
-    ...overrides,
-  });
-
   test('returns 0 when model has no pricing', () => {
     const model: KiloExclusiveModel = {
       ...qwen36_plus_model,
@@ -223,3 +228,213 @@ describe('calculatKiloExclusiveCost_mUsd with qwen3.6-plus', () => {
     expect(result).toBe(4_290_000);
   });
 });
+
+describe('calculatKiloExclusiveCost_mUsd with qwen3.6-flash', () => {
+  // Pre-discount prices from Qwen pricing page (35% Kilo discount applied in code):
+  //
+  // Input<=256k tier:
+  //   Input: $0.25/1M → $0.1625/1M   CacheWrite: $0.3125/1M → $0.203125/1M
+  //   CacheRead: $0.025/1M → $0.01625/1M   Output: $1.5/1M → $0.975/1M
+  //
+  // 256k<Input<=1M tier:
+  //   Input: $1/1M → $0.65/1M   CacheWrite: $1.25/1M → $0.8125/1M
+  //   CacheRead: $0.1/1M → $0.065/1M   Output: $4/1M → $2.6/1M
+
+  test('input-only cost in <=256k tier', () => {
+    // 1M tokens * 0.1625 = 162_500 mUsd
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_flash_model,
+      makeUsage({ inputTokens: 100_000 })
+    );
+    expect(result).toBe(Math.round(100_000 * 0.1625));
+  });
+
+  test('output cost in <=256k tier', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_flash_model,
+      makeUsage({ outputTokens: 50_000 })
+    );
+    expect(result).toBe(Math.round(50_000 * 0.975));
+  });
+
+  test('mixed usage in <=256k tier', () => {
+    // 100k input, 20k cache hit, 30k cache write → 50k uncached
+    // total input = 50k + 30k + 20k = 100k (<=256k)
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_flash_model,
+      makeUsage({
+        inputTokens: 100_000,
+        outputTokens: 10_000,
+        cacheHitTokens: 20_000,
+        cacheWriteTokens: 30_000,
+      })
+    );
+    expect(result).toBe(
+      Math.round(50_000 * 0.1625 + 10_000 * 0.975 + 20_000 * 0.01625 + 30_000 * 0.203125)
+    );
+  });
+
+  test('input-only cost in >256k tier', () => {
+    // 300k uncached input tokens > 256k → tier 2: 300_000 * 0.65
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_flash_model,
+      makeUsage({ inputTokens: 300_000 })
+    );
+    expect(result).toBe(Math.round(300_000 * 0.65));
+  });
+
+  test('mixed usage in >256k tier', () => {
+    // 500k input, 50k cache hit, 100k cache write → 350k uncached
+    // total input = 500k (>256k)
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_flash_model,
+      makeUsage({
+        inputTokens: 500_000,
+        outputTokens: 20_000,
+        cacheHitTokens: 50_000,
+        cacheWriteTokens: 100_000,
+      })
+    );
+    expect(result).toBe(
+      Math.round(350_000 * 0.65 + 20_000 * 2.6 + 50_000 * 0.065 + 100_000 * 0.8125)
+    );
+  });
+
+  test('tier boundary: exactly 256k total input uses <=256k pricing', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_flash_model,
+      makeUsage({ inputTokens: 256 * 1024 })
+    );
+    expect(result).toBe(Math.round(256 * 1024 * 0.1625));
+  });
+
+  test('tier boundary: 256k+1 total input uses >256k pricing', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_flash_model,
+      makeUsage({ inputTokens: 256 * 1024 + 1 })
+    );
+    expect(result).toBe(Math.round((256 * 1024 + 1) * 0.65));
+  });
+
+  test('1M tokens input cost matches post-discount tier 2 price', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_flash_model,
+      makeUsage({ inputTokens: 1_000_000 })
+    );
+    expect(result).toBe(1_000_000 * 0.65);
+  });
+});
+
+describe('calculatKiloExclusiveCost_mUsd with qwen3.6-max-preview', () => {
+  // Pre-discount prices from Qwen pricing page (35% Kilo discount applied in code):
+  //
+  // Input<=128k tier:
+  //   Input: $1.3/1M → $0.845/1M   CacheWrite: $1.625/1M → $1.05625/1M
+  //   CacheRead: $0.13/1M → $0.0845/1M   Output: $7.8/1M → $5.07/1M
+  //
+  // 128k<Input<=256k tier:
+  //   Input: $2/1M → $1.3/1M   CacheWrite: $2.5/1M → $1.625/1M
+  //   CacheRead: $0.2/1M → $0.13/1M   Output: $12/1M → $7.8/1M
+
+  test('input-only cost in <=128k tier', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_max_preview_model,
+      makeUsage({ inputTokens: 50_000 })
+    );
+    expect(result).toBe(Math.round(50_000 * 0.845));
+  });
+
+  test('output cost in <=128k tier', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_max_preview_model,
+      makeUsage({ outputTokens: 10_000 })
+    );
+    expect(result).toBe(Math.round(10_000 * 5.07));
+  });
+
+  test('mixed usage in <=128k tier', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_max_preview_model,
+      makeUsage({
+        inputTokens: 100_000,
+        outputTokens: 5_000,
+        cacheHitTokens: 20_000,
+        cacheWriteTokens: 30_000,
+      })
+    );
+    expect(result).toBe(
+      Math.round(50_000 * 0.845 + 5_000 * 5.07 + 20_000 * 0.0845 + 30_000 * 1.05625)
+    );
+  });
+
+  test('input-only cost in >128k tier', () => {
+    // 200k > 128k → tier 2
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_max_preview_model,
+      makeUsage({ inputTokens: 200_000 })
+    );
+    expect(result).toBe(Math.round(200_000 * 1.3));
+  });
+
+  test('tier boundary: exactly 128k total input uses <=128k pricing', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_max_preview_model,
+      makeUsage({ inputTokens: 128 * 1024 })
+    );
+    expect(result).toBe(Math.round(128 * 1024 * 0.845));
+  });
+
+  test('tier boundary: 128k+1 total input uses >128k pricing', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_max_preview_model,
+      makeUsage({ inputTokens: 128 * 1024 + 1 })
+    );
+    expect(result).toBe(Math.round((128 * 1024 + 1) * 1.3));
+  });
+
+  test('256k tokens input cost uses >128k tier', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_max_preview_model,
+      makeUsage({ inputTokens: 256 * 1024 })
+    );
+    expect(result).toBe(Math.round(256 * 1024 * 1.3));
+  });
+});
+
+describe('calculatKiloExclusiveCost_mUsd with qwen3.6-27b', () => {
+  // Pre-discount prices (35% Kilo discount applied in code):
+  //   Input: $0.5/1M → $0.325/1M   Output: $5/1M → $3.25/1M
+
+  test('input-only cost', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_27b_model,
+      makeUsage({ inputTokens: 100_000 })
+    );
+    expect(result).toBe(Math.round(100_000 * 0.325));
+  });
+
+  test('output-only cost', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_27b_model,
+      makeUsage({ outputTokens: 50_000 })
+    );
+    expect(result).toBe(Math.round(50_000 * 3.25));
+  });
+
+  test('cache hit falls back to prompt price when cache_read is null', () => {
+    // no explicit cache_read price → uses prompt_per_million
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_27b_model,
+      makeUsage({ inputTokens: 100_000, cacheHitTokens: 100_000 })
+    );
+    expect(result).toBe(Math.round(100_000 * 0.325));
+  });
+
+  test('pricing is flat regardless of input size', () => {
+    const result = calculateKiloExclusiveCost_mUsd(
+      qwen36_27b_model,
+      makeUsage({ inputTokens: 250_000 })
+    );
+    expect(result).toBe(Math.round(250_000 * 0.325));
+  });
+});
diff --git a/apps/web/src/lib/ai-gateway/providers/model-settings.ts b/apps/web/src/lib/ai-gateway/providers/model-settings.ts
@@ -2,7 +2,7 @@ import { isClaudeModel, isOpusModel } from '@/lib/ai-gateway/providers/anthropic
 import { isGemini3Model, isGemmaModel } from '@/lib/ai-gateway/providers/google';
 import { isKimiModel } from '@/lib/ai-gateway/providers/moonshotai';
 import { isOpenAiModel } from '@/lib/ai-gateway/providers/openai';
-import { qwen36_plus_model } from '@/lib/ai-gateway/providers/qwen';
+import { isAlibabaDirectModel } from '@/lib/ai-gateway/providers/qwen';
 import { seed_20_code_free_model } from '@/lib/ai-gateway/providers/seed';
 import { isGrok4Model, isGrokModel } from '@/lib/ai-gateway/providers/xai';
 import { isGlmModel } from '@/lib/ai-gateway/providers/zai';
@@ -66,7 +66,7 @@ export function getModelVariants(model: string): OpenCodeSettings['variants'] {
   if (
     isKimiModel(model) ||
     isGlmModel(model) ||
-    model === qwen36_plus_model.public_id ||
+    isAlibabaDirectModel(model) ||
     isGemmaModel(model)
   ) {
     return REASONING_VARIANTS_BINARY;
@@ -97,7 +97,7 @@ export function getModelVariants(model: string): OpenCodeSettings['variants'] {
 }
 
 function getAiSdkProvider(model: string): CustomLlmProvider | undefined {
-  if (qwen36_plus_model.public_id === model) {
+  if (isAlibabaDirectModel(model)) {
     // with 'openai' (Responses) prompt caching doesn't work
     // with 'openai-compatible' (Chat Completions) cost is wrong (cache writes are not counted)
     return 'alibaba';
diff --git a/apps/web/src/lib/ai-gateway/providers/qwen.ts b/apps/web/src/lib/ai-gateway/providers/qwen.ts