mcowger
diff --git a/‎packages/backend/src/services/inspectors/usage-logging.ts‎
Lines changed: 9 additions & 0 deletions b/‎packages/backend/src/services/inspectors/usage-logging.ts‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎packages/backend/src/services/response-handler.ts‎
Lines changed: 9 additions & 0 deletions b/‎packages/backend/src/services/response-handler.ts‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎packages/backend/src/utils/__tests__/provider-cost.test.ts‎
Lines changed: 320 additions & 3 deletions b/‎packages/backend/src/utils/__tests__/provider-cost.test.ts‎
Lines changed: 320 additions & 3 deletions
@@ -148,6 +148,15 @@ export class UsageInspector extends PassThrough {
       // Some providers emit `: cost {"request_cost_usd": ...}` as SSE comments
       if (reconstructed?.providerReportedCost) {
         applyProviderReportedCost(this.usageRecord, reconstructed.providerReportedCost);
+        if (reconstructed?.usage) {
+          const usageCostDetails = extractUsageCostDetails(reconstructed.usage);
+          if (usageCostDetails) {
+            logger.warn(
+              `[ProviderCost] Both SSE :cost and usage.cost_details present for ${this.usageRecord.requestId}; ` +
+                `SSE value ($${this.usageRecord.providerReportedCost}) takes priority over cost_details total ($${usageCostDetails.total_cost})`
+            );
+          }
+        }
       }
 
       // Override with provider-reported cost from usage.cost_details if available
 
@@ -502,6 +502,15 @@ async function finalizeUsage(
   const reconstructed = debugManager.getReconstructedRawResponse(usageRecord.requestId!);
   if (reconstructed?.providerReportedCost) {
     applyProviderReportedCost(usageRecord, reconstructed.providerReportedCost);
+    if (reconstructed?.usage) {
+      const usageCostDetails = extractUsageCostDetails(reconstructed.usage);
+      if (usageCostDetails) {
+        logger.warn(
+          `[ProviderCost] Both SSE :cost and usage.cost_details present for ${usageRecord.requestId}; ` +
+            `SSE value ($${usageRecord.providerReportedCost}) takes priority over cost_details total ($${usageCostDetails.total_cost})`
+        );
+      }
+    }
   }
 
   // Also check for cost_details in the usage block (some providers embed costs there)
 
@@ -432,12 +432,60 @@ describe('extractUsageCostDetails', () => {
 
     expect(extractUsageCostDetails(usage)).toBeNull();
   });
+
+  test('captures usage.cost when cost_details block is absent (Kimi/Avian shape)', () => {
+    // Real response: Kimi-k2.5 via OpenRouter — usage.cost present but no cost_details block.
+    const usage = {
+      prompt_tokens: 154,
+      completion_tokens: 131,
+      total_tokens: 285,
+      cost: 0.0003287,
+      prompt_tokens_details: { cached_tokens: 128, cache_write_tokens: 0, audio_tokens: 0, video_tokens: 0 },
+      completion_tokens_details: { reasoning_tokens: 87, image_tokens: 0, audio_tokens: 0 },
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    expect(result!.total_cost).toBe(0.0003287);
+    expect(result!.input_cost).toBeNull();
+    expect(result!.upstream_inference_prompt_cost).toBeNull();
+  });
+
+  test('captures cost_in_usd_ticks when cost_details block is absent (xAI grok shape)', () => {
+    // Real response: xai-grok-4-fast — cost reported as integer ticks, no cost_details block.
+    // 1 USD = 10^10 ticks per xAI API docs.
+    const usage = {
+      prompt_tokens: 165,
+      completion_tokens: 2,
+      total_tokens: 296,
+      prompt_tokens_details: { text_tokens: 165, audio_tokens: 0, image_tokens: 0, cached_tokens: 164 },
+      completion_tokens_details: { reasoning_tokens: 129, audio_tokens: 0, accepted_prediction_tokens: 0, rejected_prediction_tokens: 0 },
+      num_sources_used: 0,
+      cost_in_usd_ticks: 739000,
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    expect(result!.total_cost).toBeCloseTo(739000 / 10_000_000_000, 10);
+    expect(result!.input_cost).toBeNull();
+    expect(result!.upstream_inference_prompt_cost).toBeNull();
+  });
+
+  test('returns null when neither cost_details nor top-level cost fields are present', () => {
+    const usage = {
+      prompt_tokens: 100,
+      completion_tokens: 50,
+      total_tokens: 150,
+    };
+
+    expect(extractUsageCostDetails(usage)).toBeNull();
+  });
 });
 
 describe('applyUsageCostDetails', () => {
   test('applies gateway input/output/cached costs directly when full breakdown is present', () => {
     const record = createUsageRecord();
-    // Extracted from: glm-5.1 via LLM Gateway (real response)
+    // Extracted from: glm-5.1 via LLM Gateway
     const costDetails: ProviderCostDetails = {
       total_cost: 0.022101624,
       input_cost: 0.00073836,
@@ -533,7 +581,7 @@ describe('applyUsageCostDetails', () => {
     const record = createUsageRecord();
     // createUsageRecord defaults: costInput=0.001, costCached=0.0005
     // Prompt ratio: input=0.001/(0.001+0.0005)=2/3, cached=0.0005/(0.001+0.0005)=1/3
-    // Extracted from: z-ai/glm-5-turbo-20260315 (real response, cached_tokens=128/173 prompt tokens)
+    // Extracted from: z-ai/glm-5-turbo-20260315 (cached_tokens=128/173 prompt tokens)
     const costDetails: ProviderCostDetails = {
       total_cost: 0.00021672,
       input_cost: null,
@@ -603,7 +651,7 @@ describe('applyUsageCostDetails', () => {
 
   test('attributes full upstream prompt cost to input when no cached tokens', () => {
     const record = createUsageRecord({ costCached: 0, costCacheWrite: 0, costTotal: 0.003 });
-    // Extracted from: normal-tier real response (cached_tokens=0)
+    // Extracted from: normal-tier (cached_tokens=0)
     const costDetails: ProviderCostDetails = {
       total_cost: 0.00435825,
       input_cost: null,
@@ -822,6 +870,43 @@ describe('applyUsageCostDetails', () => {
     expect(record.costOutput).toBe(0);
   });
 
+  test('falls back to proportional distribution when upstream costs are all zero (Vercel shape)', () => {
+    // Real response: Vercel AI Gateway — cost is non-zero but upstream_inference_* fields are
+    // all 0 (gateway doesn't pass through upstream cost breakdown). Without the > 0 guard,
+    // the Normal tier would fire and produce zero sub-costs despite total_cost being correct.
+    const record = createUsageRecord();
+    // costInput=0.001, costOutput=0.002, costCached=0.0005, total=0.0035
+    const costDetails: ProviderCostDetails = {
+      total_cost: 0.003561,
+      input_cost: null,
+      output_cost: null,
+      cached_input_cost: null,
+      cache_write_input_cost: null,
+      upstream_inference_cost: null,
+      upstream_inference_prompt_cost: 0,
+      upstream_inference_completions_cost: 0,
+      request_cost: null,
+      web_search_cost: null,
+      image_input_cost: null,
+      image_output_cost: null,
+      audio_input_cost: null,
+      data_storage_cost: null,
+    };
+
+    applyUsageCostDetails(record, costDetails);
+
+    expect(record.costTotal).toBe(0.003561);
+    expect(record.costSource).toBe('provider_reported');
+    // Should use Minimal tier (proportional distribution), not Normal tier (which would zero everything)
+    expect(record.costInput).toBeGreaterThan(0);
+    expect(record.costOutput).toBeGreaterThan(0);
+    expect(record.costCached).toBeGreaterThan(0);
+    // Proportional: input=1/3.5, output=2/3.5, cached=0.5/3.5
+    expect(record.costInput).toBeCloseTo((0.001 / 0.0035) * 0.003561, 8);
+    expect(record.costOutput).toBeCloseTo((0.002 / 0.0035) * 0.003561, 8);
+    expect(record.costCached).toBeCloseTo((0.0005 / 0.0035) * 0.003561, 8);
+  });
+
   test('SSE : cost comments take precedence over cost_details', () => {
     const record = createUsageRecord();
     // SSE comment cost applied first
@@ -1030,3 +1115,235 @@ describe('extractProviderEnergyFromSSEComments (via DebugLoggingInspector)', ()
     expect(lastEnergy.energy_kwh).toBe(5.2904e-5);
   });
 });
+
+describe('extractUsageCostDetails - real-world cassette shapes', () => {
+  test('Vercel market_cost field does not interfere with cost extraction', () => {
+    // Vercel AI Gateway adds market_cost alongside cost and cost_details.
+    // The normalizer should extract cost as total and ignore market_cost.
+    const usage = {
+      prompt_tokens: 16,
+      completion_tokens: 33,
+      total_tokens: 49,
+      cost: 0.000543,
+      is_byok: false,
+      prompt_tokens_details: { cached_tokens: 0, audio_tokens: 0, video_tokens: 0 },
+      cost_details: {
+        upstream_inference_cost: null,
+        upstream_inference_prompt_cost: 0,
+        upstream_inference_completions_cost: 0,
+      },
+      completion_tokens_details: { reasoning_tokens: 0, image_tokens: 0 },
+      cache_creation_input_tokens: 0,
+      market_cost: 0.000543,
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    expect(result!.total_cost).toBe(0.000543);
+    // upstream_inference fields are both 0, so normal tier guard prevents zeroing
+    // Falls back to minimal tier (proportional). But there are no prior calculated costs.
+  });
+
+  test('Vercel GPT-5 with non-zero cost and zero upstream breakdown', () => {
+    const usage = {
+      prompt_tokens: 113,
+      completion_tokens: 327,
+      total_tokens: 440,
+      cost: 0.00597125,
+      is_byok: false,
+      prompt_tokens_details: { cached_tokens: 0, audio_tokens: 0, video_tokens: 0 },
+      cost_details: {
+        upstream_inference_cost: null,
+        upstream_inference_prompt_cost: 0,
+        upstream_inference_completions_cost: 0,
+      },
+      completion_tokens_details: { reasoning_tokens: 256, image_tokens: 0 },
+      cache_creation_input_tokens: 0,
+      market_cost: 0.00597125,
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    expect(result!.total_cost).toBe(0.00597125);
+    // upstream fields are 0, should NOT be used as breakdown (Vercel shape)
+    expect(result!.input_cost).toBeNull();
+    expect(result!.output_cost).toBeNull();
+    expect(result!.upstream_inference_prompt_cost).toBe(0);
+    expect(result!.upstream_inference_completions_cost).toBe(0);
+  });
+
+  test('OpenRouter Grok with cached tokens in prompt_tokens_details', () => {
+    // OpenRouter passes cached_tokens in prompt_tokens_details alongside cost_details.
+    const usage = {
+      prompt_tokens: 445,
+      completion_tokens: 278,
+      total_tokens: 723,
+      cost: 0.00020535,
+      is_byok: false,
+      prompt_tokens_details: {
+        cached_tokens: 151,
+        cache_write_tokens: 0,
+        audio_tokens: 0,
+        video_tokens: 0,
+      },
+      cost_details: {
+        upstream_inference_cost: 0.00020535,
+        upstream_inference_prompt_cost: 0.00006635,
+        upstream_inference_completions_cost: 0.000139,
+      },
+      completion_tokens_details: { reasoning_tokens: 210, image_tokens: 0, audio_tokens: 0 },
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    expect(result!.total_cost).toBe(0.00020535);
+    // upstream fields preserved separately (normal tier)
+    expect(result!.upstream_inference_prompt_cost).toBe(0.00006635);
+    expect(result!.upstream_inference_completions_cost).toBe(0.000139);
+    // No gateway-level input_cost/output_cost on OpenRouter
+    expect(result!.input_cost).toBeNull();
+    expect(result!.output_cost).toBeNull();
+  });
+
+  test('xAI grok-4-fast cost_in_usd_ticks with cached tokens', () => {
+    // xAI reports cost as cost_in_usd_ticks (no cost_details block).
+    const usage = {
+      prompt_tokens: 468,
+      completion_tokens: 82,
+      total_tokens: 870,
+      prompt_tokens_details: {
+        text_tokens: 468,
+        audio_tokens: 0,
+        image_tokens: 0,
+        cached_tokens: 305,
+      },
+      completion_tokens_details: {
+        reasoning_tokens: 320,
+        audio_tokens: 0,
+        accepted_prediction_tokens: 0,
+        rejected_prediction_tokens: 0,
+      },
+      num_sources_used: 0,
+      cost_in_usd_ticks: 2488500,
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    // 2488500 / 10_000_000_000 = 0.00024885
+    expect(result!.total_cost).toBeCloseTo(2488500 / 10_000_000_000, 10);
+    expect(result!.input_cost).toBeNull();
+  });
+
+  test('Avian Kimi (via OpenRouter) with top-level cost and no cost_details', () => {
+    // Avian/Kimi reports cost at the top level but has no cost_details block.
+    const usage = {
+      prompt_tokens: 154,
+      completion_tokens: 131,
+      total_tokens: 285,
+      cost: 0.0003287,
+      prompt_tokens_details: {
+        cached_tokens: 128,
+        cache_write_tokens: 0,
+        audio_tokens: 0,
+        video_tokens: 0,
+      },
+      completion_tokens_details: { reasoning_tokens: 87, image_tokens: 0, audio_tokens: 0 },
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    expect(result!.total_cost).toBe(0.0003287);
+    expect(result!.input_cost).toBeNull();
+    expect(result!.upstream_inference_prompt_cost).toBeNull();
+  });
+
+  test('OpenRouter Anthropic Thinking with reasoning tokens', () => {
+    const usage = {
+      prompt_tokens: 607,
+      completion_tokens: 143,
+      total_tokens: 750,
+      cost: 0.001322,
+      is_byok: false,
+      prompt_tokens_details: { cached_tokens: 0, cache_write_tokens: 0, audio_tokens: 0, video_tokens: 0 },
+      cost_details: {
+        upstream_inference_cost: 0.001322,
+        upstream_inference_prompt_cost: 0.000607,
+        upstream_inference_completions_cost: 0.000715,
+      },
+      completion_tokens_details: { reasoning_tokens: 99, image_tokens: 0, audio_tokens: 0 },
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    expect(result!.total_cost).toBe(0.001322);
+    expect(result!.upstream_inference_cost).toBe(0.001322);
+    expect(result!.upstream_inference_prompt_cost).toBe(0.000607);
+    expect(result!.upstream_inference_completions_cost).toBe(0.000715);
+  });
+
+  test('OpenRouter Gemini with upstream fields matching total', () => {
+    const usage = {
+      prompt_tokens: 161,
+      completion_tokens: 32,
+      total_tokens: 193,
+      cost: 0.00008825,
+      is_byok: false,
+      prompt_tokens_details: { cached_tokens: 0, cache_write_tokens: 0, audio_tokens: 0, video_tokens: 0 },
+      cost_details: {
+        upstream_inference_cost: 0.00008825,
+        upstream_inference_prompt_cost: 0.00004025,
+        upstream_inference_completions_cost: 0.000048,
+      },
+      completion_tokens_details: { reasoning_tokens: 0, image_tokens: 0, audio_tokens: 0 },
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    expect(result!.total_cost).toBe(0.00008825);
+    expect(result!.upstream_inference_cost).toBe(0.00008825);
+    expect(result!.upstream_inference_prompt_cost).toBe(0.00004025);
+    expect(result!.upstream_inference_completions_cost).toBe(0.000048);
+  });
+
+  test('OpenRouter GLM with reasoning tokens', () => {
+    const usage = {
+      prompt_tokens: 279,
+      completion_tokens: 72,
+      total_tokens: 351,
+      cost: 0.0006228,
+      is_byok: false,
+      prompt_tokens_details: { cached_tokens: 0, cache_write_tokens: 0, audio_tokens: 0, video_tokens: 0 },
+      cost_details: {
+        upstream_inference_cost: 0.0006228,
+        upstream_inference_prompt_cost: 0.0003348,
+        upstream_inference_completions_cost: 0.000288,
+      },
+      completion_tokens_details: { reasoning_tokens: 25, image_tokens: 0, audio_tokens: 0 },
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    expect(result!.total_cost).toBe(0.0006228);
+  });
+
+  test('OpenRouter OpenAI model with cached tokens and reasoning tokens', () => {
+    const usage = {
+      prompt_tokens: 113,
+      completion_tokens: 54,
+      total_tokens: 167,
+      cost: 0.0000901,
+      is_byok: false,
+      prompt_tokens_details: { cached_tokens: 0, cache_write_tokens: 0, audio_tokens: 0, video_tokens: 0 },
+      cost_details: {
+        upstream_inference_cost: 0.0000901,
+        upstream_inference_prompt_cost: 0.0000226,
+        upstream_inference_completions_cost: 0.0000675,
+      },
+      completion_tokens_details: { reasoning_tokens: 0, image_tokens: 0, audio_tokens: 0 },
+    };
+
+    const result = extractUsageCostDetails(usage);
+    expect(result).not.toBeNull();
+    expect(result!.total_cost).toBe(0.0000901);
+  });
+});