Skip to content

Commit 314befe

Browse files
committed
Merge branch 'feature/capture-provider-cost-details'
2 parents d84094d + e507057 commit 314befe

6 files changed

Lines changed: 731 additions & 13 deletions

File tree

packages/backend/src/services/inspectors/usage-logging.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,15 @@ export class UsageInspector extends PassThrough {
148148
// Some providers emit `: cost {"request_cost_usd": ...}` as SSE comments
149149
if (reconstructed?.providerReportedCost) {
150150
applyProviderReportedCost(this.usageRecord, reconstructed.providerReportedCost);
151+
if (reconstructed?.usage) {
152+
const usageCostDetails = extractUsageCostDetails(reconstructed.usage);
153+
if (usageCostDetails) {
154+
logger.warn(
155+
`[ProviderCost] Both SSE :cost and usage.cost_details present for ${this.usageRecord.requestId}; ` +
156+
`SSE value ($${this.usageRecord.providerReportedCost}) takes priority over cost_details total ($${usageCostDetails.total_cost})`
157+
);
158+
}
159+
}
151160
}
152161

153162
// Override with provider-reported cost from usage.cost_details if available

packages/backend/src/services/response-handler.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,15 @@ async function finalizeUsage(
502502
const reconstructed = debugManager.getReconstructedRawResponse(usageRecord.requestId!);
503503
if (reconstructed?.providerReportedCost) {
504504
applyProviderReportedCost(usageRecord, reconstructed.providerReportedCost);
505+
if (reconstructed?.usage) {
506+
const usageCostDetails = extractUsageCostDetails(reconstructed.usage);
507+
if (usageCostDetails) {
508+
logger.warn(
509+
`[ProviderCost] Both SSE :cost and usage.cost_details present for ${usageRecord.requestId}; ` +
510+
`SSE value ($${usageRecord.providerReportedCost}) takes priority over cost_details total ($${usageCostDetails.total_cost})`
511+
);
512+
}
513+
}
505514
}
506515

507516
// Also check for cost_details in the usage block (some providers embed costs there)

packages/backend/src/utils/__tests__/provider-cost.test.ts

Lines changed: 320 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -432,12 +432,60 @@ describe('extractUsageCostDetails', () => {
432432

433433
expect(extractUsageCostDetails(usage)).toBeNull();
434434
});
435+
436+
test('captures usage.cost when cost_details block is absent (Kimi/Avian shape)', () => {
437+
// Real response: Kimi-k2.5 via OpenRouter — usage.cost present but no cost_details block.
438+
const usage = {
439+
prompt_tokens: 154,
440+
completion_tokens: 131,
441+
total_tokens: 285,
442+
cost: 0.0003287,
443+
prompt_tokens_details: { cached_tokens: 128, cache_write_tokens: 0, audio_tokens: 0, video_tokens: 0 },
444+
completion_tokens_details: { reasoning_tokens: 87, image_tokens: 0, audio_tokens: 0 },
445+
};
446+
447+
const result = extractUsageCostDetails(usage);
448+
expect(result).not.toBeNull();
449+
expect(result!.total_cost).toBe(0.0003287);
450+
expect(result!.input_cost).toBeNull();
451+
expect(result!.upstream_inference_prompt_cost).toBeNull();
452+
});
453+
454+
test('captures cost_in_usd_ticks when cost_details block is absent (xAI grok shape)', () => {
455+
// Real response: xai-grok-4-fast — cost reported as integer ticks, no cost_details block.
456+
// 1 USD = 10^10 ticks per xAI API docs.
457+
const usage = {
458+
prompt_tokens: 165,
459+
completion_tokens: 2,
460+
total_tokens: 296,
461+
prompt_tokens_details: { text_tokens: 165, audio_tokens: 0, image_tokens: 0, cached_tokens: 164 },
462+
completion_tokens_details: { reasoning_tokens: 129, audio_tokens: 0, accepted_prediction_tokens: 0, rejected_prediction_tokens: 0 },
463+
num_sources_used: 0,
464+
cost_in_usd_ticks: 739000,
465+
};
466+
467+
const result = extractUsageCostDetails(usage);
468+
expect(result).not.toBeNull();
469+
expect(result!.total_cost).toBeCloseTo(739000 / 10_000_000_000, 10);
470+
expect(result!.input_cost).toBeNull();
471+
expect(result!.upstream_inference_prompt_cost).toBeNull();
472+
});
473+
474+
test('returns null when neither cost_details nor top-level cost fields are present', () => {
475+
const usage = {
476+
prompt_tokens: 100,
477+
completion_tokens: 50,
478+
total_tokens: 150,
479+
};
480+
481+
expect(extractUsageCostDetails(usage)).toBeNull();
482+
});
435483
});
436484

437485
describe('applyUsageCostDetails', () => {
438486
test('applies gateway input/output/cached costs directly when full breakdown is present', () => {
439487
const record = createUsageRecord();
440-
// Extracted from: glm-5.1 via LLM Gateway (real response)
488+
// Extracted from: glm-5.1 via LLM Gateway
441489
const costDetails: ProviderCostDetails = {
442490
total_cost: 0.022101624,
443491
input_cost: 0.00073836,
@@ -533,7 +581,7 @@ describe('applyUsageCostDetails', () => {
533581
const record = createUsageRecord();
534582
// createUsageRecord defaults: costInput=0.001, costCached=0.0005
535583
// Prompt ratio: input=0.001/(0.001+0.0005)=2/3, cached=0.0005/(0.001+0.0005)=1/3
536-
// Extracted from: z-ai/glm-5-turbo-20260315 (real response, cached_tokens=128/173 prompt tokens)
584+
// Extracted from: z-ai/glm-5-turbo-20260315 (cached_tokens=128/173 prompt tokens)
537585
const costDetails: ProviderCostDetails = {
538586
total_cost: 0.00021672,
539587
input_cost: null,
@@ -603,7 +651,7 @@ describe('applyUsageCostDetails', () => {
603651

604652
test('attributes full upstream prompt cost to input when no cached tokens', () => {
605653
const record = createUsageRecord({ costCached: 0, costCacheWrite: 0, costTotal: 0.003 });
606-
// Extracted from: normal-tier real response (cached_tokens=0)
654+
// Extracted from: normal-tier (cached_tokens=0)
607655
const costDetails: ProviderCostDetails = {
608656
total_cost: 0.00435825,
609657
input_cost: null,
@@ -822,6 +870,43 @@ describe('applyUsageCostDetails', () => {
822870
expect(record.costOutput).toBe(0);
823871
});
824872

873+
test('falls back to proportional distribution when upstream costs are all zero (Vercel shape)', () => {
874+
// Real response: Vercel AI Gateway — cost is non-zero but upstream_inference_* fields are
875+
// all 0 (gateway doesn't pass through upstream cost breakdown). Without the > 0 guard,
876+
// the Normal tier would fire and produce zero sub-costs despite total_cost being correct.
877+
const record = createUsageRecord();
878+
// costInput=0.001, costOutput=0.002, costCached=0.0005, total=0.0035
879+
const costDetails: ProviderCostDetails = {
880+
total_cost: 0.003561,
881+
input_cost: null,
882+
output_cost: null,
883+
cached_input_cost: null,
884+
cache_write_input_cost: null,
885+
upstream_inference_cost: null,
886+
upstream_inference_prompt_cost: 0,
887+
upstream_inference_completions_cost: 0,
888+
request_cost: null,
889+
web_search_cost: null,
890+
image_input_cost: null,
891+
image_output_cost: null,
892+
audio_input_cost: null,
893+
data_storage_cost: null,
894+
};
895+
896+
applyUsageCostDetails(record, costDetails);
897+
898+
expect(record.costTotal).toBe(0.003561);
899+
expect(record.costSource).toBe('provider_reported');
900+
// Should use Minimal tier (proportional distribution), not Normal tier (which would zero everything)
901+
expect(record.costInput).toBeGreaterThan(0);
902+
expect(record.costOutput).toBeGreaterThan(0);
903+
expect(record.costCached).toBeGreaterThan(0);
904+
// Proportional: input=1/3.5, output=2/3.5, cached=0.5/3.5
905+
expect(record.costInput).toBeCloseTo((0.001 / 0.0035) * 0.003561, 8);
906+
expect(record.costOutput).toBeCloseTo((0.002 / 0.0035) * 0.003561, 8);
907+
expect(record.costCached).toBeCloseTo((0.0005 / 0.0035) * 0.003561, 8);
908+
});
909+
825910
test('SSE : cost comments take precedence over cost_details', () => {
826911
const record = createUsageRecord();
827912
// SSE comment cost applied first
@@ -1030,3 +1115,235 @@ describe('extractProviderEnergyFromSSEComments (via DebugLoggingInspector)', ()
10301115
expect(lastEnergy.energy_kwh).toBe(5.2904e-5);
10311116
});
10321117
});
1118+
1119+
describe('extractUsageCostDetails - real-world cassette shapes', () => {
1120+
test('Vercel market_cost field does not interfere with cost extraction', () => {
1121+
// Vercel AI Gateway adds market_cost alongside cost and cost_details.
1122+
// The normalizer should extract cost as total and ignore market_cost.
1123+
const usage = {
1124+
prompt_tokens: 16,
1125+
completion_tokens: 33,
1126+
total_tokens: 49,
1127+
cost: 0.000543,
1128+
is_byok: false,
1129+
prompt_tokens_details: { cached_tokens: 0, audio_tokens: 0, video_tokens: 0 },
1130+
cost_details: {
1131+
upstream_inference_cost: null,
1132+
upstream_inference_prompt_cost: 0,
1133+
upstream_inference_completions_cost: 0,
1134+
},
1135+
completion_tokens_details: { reasoning_tokens: 0, image_tokens: 0 },
1136+
cache_creation_input_tokens: 0,
1137+
market_cost: 0.000543,
1138+
};
1139+
1140+
const result = extractUsageCostDetails(usage);
1141+
expect(result).not.toBeNull();
1142+
expect(result!.total_cost).toBe(0.000543);
1143+
// upstream_inference fields are both 0, so normal tier guard prevents zeroing
1144+
// Falls back to minimal tier (proportional). But there are no prior calculated costs.
1145+
});
1146+
1147+
test('Vercel GPT-5 with non-zero cost and zero upstream breakdown', () => {
1148+
const usage = {
1149+
prompt_tokens: 113,
1150+
completion_tokens: 327,
1151+
total_tokens: 440,
1152+
cost: 0.00597125,
1153+
is_byok: false,
1154+
prompt_tokens_details: { cached_tokens: 0, audio_tokens: 0, video_tokens: 0 },
1155+
cost_details: {
1156+
upstream_inference_cost: null,
1157+
upstream_inference_prompt_cost: 0,
1158+
upstream_inference_completions_cost: 0,
1159+
},
1160+
completion_tokens_details: { reasoning_tokens: 256, image_tokens: 0 },
1161+
cache_creation_input_tokens: 0,
1162+
market_cost: 0.00597125,
1163+
};
1164+
1165+
const result = extractUsageCostDetails(usage);
1166+
expect(result).not.toBeNull();
1167+
expect(result!.total_cost).toBe(0.00597125);
1168+
// upstream fields are 0, should NOT be used as breakdown (Vercel shape)
1169+
expect(result!.input_cost).toBeNull();
1170+
expect(result!.output_cost).toBeNull();
1171+
expect(result!.upstream_inference_prompt_cost).toBe(0);
1172+
expect(result!.upstream_inference_completions_cost).toBe(0);
1173+
});
1174+
1175+
test('OpenRouter Grok with cached tokens in prompt_tokens_details', () => {
1176+
// OpenRouter passes cached_tokens in prompt_tokens_details alongside cost_details.
1177+
const usage = {
1178+
prompt_tokens: 445,
1179+
completion_tokens: 278,
1180+
total_tokens: 723,
1181+
cost: 0.00020535,
1182+
is_byok: false,
1183+
prompt_tokens_details: {
1184+
cached_tokens: 151,
1185+
cache_write_tokens: 0,
1186+
audio_tokens: 0,
1187+
video_tokens: 0,
1188+
},
1189+
cost_details: {
1190+
upstream_inference_cost: 0.00020535,
1191+
upstream_inference_prompt_cost: 0.00006635,
1192+
upstream_inference_completions_cost: 0.000139,
1193+
},
1194+
completion_tokens_details: { reasoning_tokens: 210, image_tokens: 0, audio_tokens: 0 },
1195+
};
1196+
1197+
const result = extractUsageCostDetails(usage);
1198+
expect(result).not.toBeNull();
1199+
expect(result!.total_cost).toBe(0.00020535);
1200+
// upstream fields preserved separately (normal tier)
1201+
expect(result!.upstream_inference_prompt_cost).toBe(0.00006635);
1202+
expect(result!.upstream_inference_completions_cost).toBe(0.000139);
1203+
// No gateway-level input_cost/output_cost on OpenRouter
1204+
expect(result!.input_cost).toBeNull();
1205+
expect(result!.output_cost).toBeNull();
1206+
});
1207+
1208+
test('xAI grok-4-fast cost_in_usd_ticks with cached tokens', () => {
1209+
// xAI reports cost as cost_in_usd_ticks (no cost_details block).
1210+
const usage = {
1211+
prompt_tokens: 468,
1212+
completion_tokens: 82,
1213+
total_tokens: 870,
1214+
prompt_tokens_details: {
1215+
text_tokens: 468,
1216+
audio_tokens: 0,
1217+
image_tokens: 0,
1218+
cached_tokens: 305,
1219+
},
1220+
completion_tokens_details: {
1221+
reasoning_tokens: 320,
1222+
audio_tokens: 0,
1223+
accepted_prediction_tokens: 0,
1224+
rejected_prediction_tokens: 0,
1225+
},
1226+
num_sources_used: 0,
1227+
cost_in_usd_ticks: 2488500,
1228+
};
1229+
1230+
const result = extractUsageCostDetails(usage);
1231+
expect(result).not.toBeNull();
1232+
// 2488500 / 10_000_000_000 = 0.00024885
1233+
expect(result!.total_cost).toBeCloseTo(2488500 / 10_000_000_000, 10);
1234+
expect(result!.input_cost).toBeNull();
1235+
});
1236+
1237+
test('Avian Kimi (via OpenRouter) with top-level cost and no cost_details', () => {
1238+
// Avian/Kimi reports cost at the top level but has no cost_details block.
1239+
const usage = {
1240+
prompt_tokens: 154,
1241+
completion_tokens: 131,
1242+
total_tokens: 285,
1243+
cost: 0.0003287,
1244+
prompt_tokens_details: {
1245+
cached_tokens: 128,
1246+
cache_write_tokens: 0,
1247+
audio_tokens: 0,
1248+
video_tokens: 0,
1249+
},
1250+
completion_tokens_details: { reasoning_tokens: 87, image_tokens: 0, audio_tokens: 0 },
1251+
};
1252+
1253+
const result = extractUsageCostDetails(usage);
1254+
expect(result).not.toBeNull();
1255+
expect(result!.total_cost).toBe(0.0003287);
1256+
expect(result!.input_cost).toBeNull();
1257+
expect(result!.upstream_inference_prompt_cost).toBeNull();
1258+
});
1259+
1260+
test('OpenRouter Anthropic Thinking with reasoning tokens', () => {
1261+
const usage = {
1262+
prompt_tokens: 607,
1263+
completion_tokens: 143,
1264+
total_tokens: 750,
1265+
cost: 0.001322,
1266+
is_byok: false,
1267+
prompt_tokens_details: { cached_tokens: 0, cache_write_tokens: 0, audio_tokens: 0, video_tokens: 0 },
1268+
cost_details: {
1269+
upstream_inference_cost: 0.001322,
1270+
upstream_inference_prompt_cost: 0.000607,
1271+
upstream_inference_completions_cost: 0.000715,
1272+
},
1273+
completion_tokens_details: { reasoning_tokens: 99, image_tokens: 0, audio_tokens: 0 },
1274+
};
1275+
1276+
const result = extractUsageCostDetails(usage);
1277+
expect(result).not.toBeNull();
1278+
expect(result!.total_cost).toBe(0.001322);
1279+
expect(result!.upstream_inference_cost).toBe(0.001322);
1280+
expect(result!.upstream_inference_prompt_cost).toBe(0.000607);
1281+
expect(result!.upstream_inference_completions_cost).toBe(0.000715);
1282+
});
1283+
1284+
test('OpenRouter Gemini with upstream fields matching total', () => {
1285+
const usage = {
1286+
prompt_tokens: 161,
1287+
completion_tokens: 32,
1288+
total_tokens: 193,
1289+
cost: 0.00008825,
1290+
is_byok: false,
1291+
prompt_tokens_details: { cached_tokens: 0, cache_write_tokens: 0, audio_tokens: 0, video_tokens: 0 },
1292+
cost_details: {
1293+
upstream_inference_cost: 0.00008825,
1294+
upstream_inference_prompt_cost: 0.00004025,
1295+
upstream_inference_completions_cost: 0.000048,
1296+
},
1297+
completion_tokens_details: { reasoning_tokens: 0, image_tokens: 0, audio_tokens: 0 },
1298+
};
1299+
1300+
const result = extractUsageCostDetails(usage);
1301+
expect(result).not.toBeNull();
1302+
expect(result!.total_cost).toBe(0.00008825);
1303+
expect(result!.upstream_inference_cost).toBe(0.00008825);
1304+
expect(result!.upstream_inference_prompt_cost).toBe(0.00004025);
1305+
expect(result!.upstream_inference_completions_cost).toBe(0.000048);
1306+
});
1307+
1308+
test('OpenRouter GLM with reasoning tokens', () => {
1309+
const usage = {
1310+
prompt_tokens: 279,
1311+
completion_tokens: 72,
1312+
total_tokens: 351,
1313+
cost: 0.0006228,
1314+
is_byok: false,
1315+
prompt_tokens_details: { cached_tokens: 0, cache_write_tokens: 0, audio_tokens: 0, video_tokens: 0 },
1316+
cost_details: {
1317+
upstream_inference_cost: 0.0006228,
1318+
upstream_inference_prompt_cost: 0.0003348,
1319+
upstream_inference_completions_cost: 0.000288,
1320+
},
1321+
completion_tokens_details: { reasoning_tokens: 25, image_tokens: 0, audio_tokens: 0 },
1322+
};
1323+
1324+
const result = extractUsageCostDetails(usage);
1325+
expect(result).not.toBeNull();
1326+
expect(result!.total_cost).toBe(0.0006228);
1327+
});
1328+
1329+
test('OpenRouter OpenAI model with cached tokens and reasoning tokens', () => {
1330+
const usage = {
1331+
prompt_tokens: 113,
1332+
completion_tokens: 54,
1333+
total_tokens: 167,
1334+
cost: 0.0000901,
1335+
is_byok: false,
1336+
prompt_tokens_details: { cached_tokens: 0, cache_write_tokens: 0, audio_tokens: 0, video_tokens: 0 },
1337+
cost_details: {
1338+
upstream_inference_cost: 0.0000901,
1339+
upstream_inference_prompt_cost: 0.0000226,
1340+
upstream_inference_completions_cost: 0.0000675,
1341+
},
1342+
completion_tokens_details: { reasoning_tokens: 0, image_tokens: 0, audio_tokens: 0 },
1343+
};
1344+
1345+
const result = extractUsageCostDetails(usage);
1346+
expect(result).not.toBeNull();
1347+
expect(result!.total_cost).toBe(0.0000901);
1348+
});
1349+
});

0 commit comments

Comments
 (0)