@@ -432,12 +432,60 @@ describe('extractUsageCostDetails', () => {
432432
433433 expect ( extractUsageCostDetails ( usage ) ) . toBeNull ( ) ;
434434 } ) ;
435+
436+ test ( 'captures usage.cost when cost_details block is absent (Kimi/Avian shape)' , ( ) => {
437+ // Real response: Kimi-k2.5 via OpenRouter — usage.cost present but no cost_details block.
438+ const usage = {
439+ prompt_tokens : 154 ,
440+ completion_tokens : 131 ,
441+ total_tokens : 285 ,
442+ cost : 0.0003287 ,
443+ prompt_tokens_details : { cached_tokens : 128 , cache_write_tokens : 0 , audio_tokens : 0 , video_tokens : 0 } ,
444+ completion_tokens_details : { reasoning_tokens : 87 , image_tokens : 0 , audio_tokens : 0 } ,
445+ } ;
446+
447+ const result = extractUsageCostDetails ( usage ) ;
448+ expect ( result ) . not . toBeNull ( ) ;
449+ expect ( result ! . total_cost ) . toBe ( 0.0003287 ) ;
450+ expect ( result ! . input_cost ) . toBeNull ( ) ;
451+ expect ( result ! . upstream_inference_prompt_cost ) . toBeNull ( ) ;
452+ } ) ;
453+
454+ test ( 'captures cost_in_usd_ticks when cost_details block is absent (xAI grok shape)' , ( ) => {
455+ // Real response: xai-grok-4-fast — cost reported as integer ticks, no cost_details block.
456+ // 1 USD = 10^10 ticks per xAI API docs.
457+ const usage = {
458+ prompt_tokens : 165 ,
459+ completion_tokens : 2 ,
460+ total_tokens : 296 ,
461+ prompt_tokens_details : { text_tokens : 165 , audio_tokens : 0 , image_tokens : 0 , cached_tokens : 164 } ,
462+ completion_tokens_details : { reasoning_tokens : 129 , audio_tokens : 0 , accepted_prediction_tokens : 0 , rejected_prediction_tokens : 0 } ,
463+ num_sources_used : 0 ,
464+ cost_in_usd_ticks : 739000 ,
465+ } ;
466+
467+ const result = extractUsageCostDetails ( usage ) ;
468+ expect ( result ) . not . toBeNull ( ) ;
469+ expect ( result ! . total_cost ) . toBeCloseTo ( 739000 / 10_000_000_000 , 10 ) ;
470+ expect ( result ! . input_cost ) . toBeNull ( ) ;
471+ expect ( result ! . upstream_inference_prompt_cost ) . toBeNull ( ) ;
472+ } ) ;
473+
474+ test ( 'returns null when neither cost_details nor top-level cost fields are present' , ( ) => {
475+ const usage = {
476+ prompt_tokens : 100 ,
477+ completion_tokens : 50 ,
478+ total_tokens : 150 ,
479+ } ;
480+
481+ expect ( extractUsageCostDetails ( usage ) ) . toBeNull ( ) ;
482+ } ) ;
435483} ) ;
436484
437485describe ( 'applyUsageCostDetails' , ( ) => {
438486 test ( 'applies gateway input/output/cached costs directly when full breakdown is present' , ( ) => {
439487 const record = createUsageRecord ( ) ;
440- // Extracted from: glm-5.1 via LLM Gateway (real response)
488+ // Extracted from: glm-5.1 via LLM Gateway
441489 const costDetails : ProviderCostDetails = {
442490 total_cost : 0.022101624 ,
443491 input_cost : 0.00073836 ,
@@ -533,7 +581,7 @@ describe('applyUsageCostDetails', () => {
533581 const record = createUsageRecord ( ) ;
534582 // createUsageRecord defaults: costInput=0.001, costCached=0.0005
535583 // Prompt ratio: input=0.001/(0.001+0.0005)=2/3, cached=0.0005/(0.001+0.0005)=1/3
536- // Extracted from: z-ai/glm-5-turbo-20260315 (real response, cached_tokens=128/173 prompt tokens)
584+ // Extracted from: z-ai/glm-5-turbo-20260315 (cached_tokens=128/173 prompt tokens)
537585 const costDetails : ProviderCostDetails = {
538586 total_cost : 0.00021672 ,
539587 input_cost : null ,
@@ -603,7 +651,7 @@ describe('applyUsageCostDetails', () => {
603651
604652 test ( 'attributes full upstream prompt cost to input when no cached tokens' , ( ) => {
605653 const record = createUsageRecord ( { costCached : 0 , costCacheWrite : 0 , costTotal : 0.003 } ) ;
606- // Extracted from: normal-tier real response (cached_tokens=0)
654+ // Extracted from: normal-tier (cached_tokens=0)
607655 const costDetails : ProviderCostDetails = {
608656 total_cost : 0.00435825 ,
609657 input_cost : null ,
@@ -822,6 +870,43 @@ describe('applyUsageCostDetails', () => {
822870 expect ( record . costOutput ) . toBe ( 0 ) ;
823871 } ) ;
824872
873+ test ( 'falls back to proportional distribution when upstream costs are all zero (Vercel shape)' , ( ) => {
874+ // Real response: Vercel AI Gateway — cost is non-zero but upstream_inference_* fields are
875+ // all 0 (gateway doesn't pass through upstream cost breakdown). Without the > 0 guard,
876+ // the Normal tier would fire and produce zero sub-costs despite total_cost being correct.
877+ const record = createUsageRecord ( ) ;
878+ // costInput=0.001, costOutput=0.002, costCached=0.0005, total=0.0035
879+ const costDetails : ProviderCostDetails = {
880+ total_cost : 0.003561 ,
881+ input_cost : null ,
882+ output_cost : null ,
883+ cached_input_cost : null ,
884+ cache_write_input_cost : null ,
885+ upstream_inference_cost : null ,
886+ upstream_inference_prompt_cost : 0 ,
887+ upstream_inference_completions_cost : 0 ,
888+ request_cost : null ,
889+ web_search_cost : null ,
890+ image_input_cost : null ,
891+ image_output_cost : null ,
892+ audio_input_cost : null ,
893+ data_storage_cost : null ,
894+ } ;
895+
896+ applyUsageCostDetails ( record , costDetails ) ;
897+
898+ expect ( record . costTotal ) . toBe ( 0.003561 ) ;
899+ expect ( record . costSource ) . toBe ( 'provider_reported' ) ;
900+ // Should use Minimal tier (proportional distribution), not Normal tier (which would zero everything)
901+ expect ( record . costInput ) . toBeGreaterThan ( 0 ) ;
902+ expect ( record . costOutput ) . toBeGreaterThan ( 0 ) ;
903+ expect ( record . costCached ) . toBeGreaterThan ( 0 ) ;
904+ // Proportional: input=1/3.5, output=2/3.5, cached=0.5/3.5
905+ expect ( record . costInput ) . toBeCloseTo ( ( 0.001 / 0.0035 ) * 0.003561 , 8 ) ;
906+ expect ( record . costOutput ) . toBeCloseTo ( ( 0.002 / 0.0035 ) * 0.003561 , 8 ) ;
907+ expect ( record . costCached ) . toBeCloseTo ( ( 0.0005 / 0.0035 ) * 0.003561 , 8 ) ;
908+ } ) ;
909+
825910 test ( 'SSE : cost comments take precedence over cost_details' , ( ) => {
826911 const record = createUsageRecord ( ) ;
827912 // SSE comment cost applied first
@@ -1030,3 +1115,235 @@ describe('extractProviderEnergyFromSSEComments (via DebugLoggingInspector)', ()
10301115 expect ( lastEnergy . energy_kwh ) . toBe ( 5.2904e-5 ) ;
10311116 } ) ;
10321117} ) ;
1118+
1119+ describe ( 'extractUsageCostDetails - real-world cassette shapes' , ( ) => {
1120+ test ( 'Vercel market_cost field does not interfere with cost extraction' , ( ) => {
1121+ // Vercel AI Gateway adds market_cost alongside cost and cost_details.
1122+ // The normalizer should extract cost as total and ignore market_cost.
1123+ const usage = {
1124+ prompt_tokens : 16 ,
1125+ completion_tokens : 33 ,
1126+ total_tokens : 49 ,
1127+ cost : 0.000543 ,
1128+ is_byok : false ,
1129+ prompt_tokens_details : { cached_tokens : 0 , audio_tokens : 0 , video_tokens : 0 } ,
1130+ cost_details : {
1131+ upstream_inference_cost : null ,
1132+ upstream_inference_prompt_cost : 0 ,
1133+ upstream_inference_completions_cost : 0 ,
1134+ } ,
1135+ completion_tokens_details : { reasoning_tokens : 0 , image_tokens : 0 } ,
1136+ cache_creation_input_tokens : 0 ,
1137+ market_cost : 0.000543 ,
1138+ } ;
1139+
1140+ const result = extractUsageCostDetails ( usage ) ;
1141+ expect ( result ) . not . toBeNull ( ) ;
1142+ expect ( result ! . total_cost ) . toBe ( 0.000543 ) ;
1143+ // upstream_inference fields are both 0, so normal tier guard prevents zeroing
1144+ // Falls back to minimal tier (proportional). But there are no prior calculated costs.
1145+ } ) ;
1146+
1147+ test ( 'Vercel GPT-5 with non-zero cost and zero upstream breakdown' , ( ) => {
1148+ const usage = {
1149+ prompt_tokens : 113 ,
1150+ completion_tokens : 327 ,
1151+ total_tokens : 440 ,
1152+ cost : 0.00597125 ,
1153+ is_byok : false ,
1154+ prompt_tokens_details : { cached_tokens : 0 , audio_tokens : 0 , video_tokens : 0 } ,
1155+ cost_details : {
1156+ upstream_inference_cost : null ,
1157+ upstream_inference_prompt_cost : 0 ,
1158+ upstream_inference_completions_cost : 0 ,
1159+ } ,
1160+ completion_tokens_details : { reasoning_tokens : 256 , image_tokens : 0 } ,
1161+ cache_creation_input_tokens : 0 ,
1162+ market_cost : 0.00597125 ,
1163+ } ;
1164+
1165+ const result = extractUsageCostDetails ( usage ) ;
1166+ expect ( result ) . not . toBeNull ( ) ;
1167+ expect ( result ! . total_cost ) . toBe ( 0.00597125 ) ;
1168+ // upstream fields are 0, should NOT be used as breakdown (Vercel shape)
1169+ expect ( result ! . input_cost ) . toBeNull ( ) ;
1170+ expect ( result ! . output_cost ) . toBeNull ( ) ;
1171+ expect ( result ! . upstream_inference_prompt_cost ) . toBe ( 0 ) ;
1172+ expect ( result ! . upstream_inference_completions_cost ) . toBe ( 0 ) ;
1173+ } ) ;
1174+
1175+ test ( 'OpenRouter Grok with cached tokens in prompt_tokens_details' , ( ) => {
1176+ // OpenRouter passes cached_tokens in prompt_tokens_details alongside cost_details.
1177+ const usage = {
1178+ prompt_tokens : 445 ,
1179+ completion_tokens : 278 ,
1180+ total_tokens : 723 ,
1181+ cost : 0.00020535 ,
1182+ is_byok : false ,
1183+ prompt_tokens_details : {
1184+ cached_tokens : 151 ,
1185+ cache_write_tokens : 0 ,
1186+ audio_tokens : 0 ,
1187+ video_tokens : 0 ,
1188+ } ,
1189+ cost_details : {
1190+ upstream_inference_cost : 0.00020535 ,
1191+ upstream_inference_prompt_cost : 0.00006635 ,
1192+ upstream_inference_completions_cost : 0.000139 ,
1193+ } ,
1194+ completion_tokens_details : { reasoning_tokens : 210 , image_tokens : 0 , audio_tokens : 0 } ,
1195+ } ;
1196+
1197+ const result = extractUsageCostDetails ( usage ) ;
1198+ expect ( result ) . not . toBeNull ( ) ;
1199+ expect ( result ! . total_cost ) . toBe ( 0.00020535 ) ;
1200+ // upstream fields preserved separately (normal tier)
1201+ expect ( result ! . upstream_inference_prompt_cost ) . toBe ( 0.00006635 ) ;
1202+ expect ( result ! . upstream_inference_completions_cost ) . toBe ( 0.000139 ) ;
1203+ // No gateway-level input_cost/output_cost on OpenRouter
1204+ expect ( result ! . input_cost ) . toBeNull ( ) ;
1205+ expect ( result ! . output_cost ) . toBeNull ( ) ;
1206+ } ) ;
1207+
1208+ test ( 'xAI grok-4-fast cost_in_usd_ticks with cached tokens' , ( ) => {
1209+ // xAI reports cost as cost_in_usd_ticks (no cost_details block).
1210+ const usage = {
1211+ prompt_tokens : 468 ,
1212+ completion_tokens : 82 ,
1213+ total_tokens : 870 ,
1214+ prompt_tokens_details : {
1215+ text_tokens : 468 ,
1216+ audio_tokens : 0 ,
1217+ image_tokens : 0 ,
1218+ cached_tokens : 305 ,
1219+ } ,
1220+ completion_tokens_details : {
1221+ reasoning_tokens : 320 ,
1222+ audio_tokens : 0 ,
1223+ accepted_prediction_tokens : 0 ,
1224+ rejected_prediction_tokens : 0 ,
1225+ } ,
1226+ num_sources_used : 0 ,
1227+ cost_in_usd_ticks : 2488500 ,
1228+ } ;
1229+
1230+ const result = extractUsageCostDetails ( usage ) ;
1231+ expect ( result ) . not . toBeNull ( ) ;
1232+ // 2488500 / 10_000_000_000 = 0.00024885
1233+ expect ( result ! . total_cost ) . toBeCloseTo ( 2488500 / 10_000_000_000 , 10 ) ;
1234+ expect ( result ! . input_cost ) . toBeNull ( ) ;
1235+ } ) ;
1236+
1237+ test ( 'Avian Kimi (via OpenRouter) with top-level cost and no cost_details' , ( ) => {
1238+ // Avian/Kimi reports cost at the top level but has no cost_details block.
1239+ const usage = {
1240+ prompt_tokens : 154 ,
1241+ completion_tokens : 131 ,
1242+ total_tokens : 285 ,
1243+ cost : 0.0003287 ,
1244+ prompt_tokens_details : {
1245+ cached_tokens : 128 ,
1246+ cache_write_tokens : 0 ,
1247+ audio_tokens : 0 ,
1248+ video_tokens : 0 ,
1249+ } ,
1250+ completion_tokens_details : { reasoning_tokens : 87 , image_tokens : 0 , audio_tokens : 0 } ,
1251+ } ;
1252+
1253+ const result = extractUsageCostDetails ( usage ) ;
1254+ expect ( result ) . not . toBeNull ( ) ;
1255+ expect ( result ! . total_cost ) . toBe ( 0.0003287 ) ;
1256+ expect ( result ! . input_cost ) . toBeNull ( ) ;
1257+ expect ( result ! . upstream_inference_prompt_cost ) . toBeNull ( ) ;
1258+ } ) ;
1259+
1260+ test ( 'OpenRouter Anthropic Thinking with reasoning tokens' , ( ) => {
1261+ const usage = {
1262+ prompt_tokens : 607 ,
1263+ completion_tokens : 143 ,
1264+ total_tokens : 750 ,
1265+ cost : 0.001322 ,
1266+ is_byok : false ,
1267+ prompt_tokens_details : { cached_tokens : 0 , cache_write_tokens : 0 , audio_tokens : 0 , video_tokens : 0 } ,
1268+ cost_details : {
1269+ upstream_inference_cost : 0.001322 ,
1270+ upstream_inference_prompt_cost : 0.000607 ,
1271+ upstream_inference_completions_cost : 0.000715 ,
1272+ } ,
1273+ completion_tokens_details : { reasoning_tokens : 99 , image_tokens : 0 , audio_tokens : 0 } ,
1274+ } ;
1275+
1276+ const result = extractUsageCostDetails ( usage ) ;
1277+ expect ( result ) . not . toBeNull ( ) ;
1278+ expect ( result ! . total_cost ) . toBe ( 0.001322 ) ;
1279+ expect ( result ! . upstream_inference_cost ) . toBe ( 0.001322 ) ;
1280+ expect ( result ! . upstream_inference_prompt_cost ) . toBe ( 0.000607 ) ;
1281+ expect ( result ! . upstream_inference_completions_cost ) . toBe ( 0.000715 ) ;
1282+ } ) ;
1283+
1284+ test ( 'OpenRouter Gemini with upstream fields matching total' , ( ) => {
1285+ const usage = {
1286+ prompt_tokens : 161 ,
1287+ completion_tokens : 32 ,
1288+ total_tokens : 193 ,
1289+ cost : 0.00008825 ,
1290+ is_byok : false ,
1291+ prompt_tokens_details : { cached_tokens : 0 , cache_write_tokens : 0 , audio_tokens : 0 , video_tokens : 0 } ,
1292+ cost_details : {
1293+ upstream_inference_cost : 0.00008825 ,
1294+ upstream_inference_prompt_cost : 0.00004025 ,
1295+ upstream_inference_completions_cost : 0.000048 ,
1296+ } ,
1297+ completion_tokens_details : { reasoning_tokens : 0 , image_tokens : 0 , audio_tokens : 0 } ,
1298+ } ;
1299+
1300+ const result = extractUsageCostDetails ( usage ) ;
1301+ expect ( result ) . not . toBeNull ( ) ;
1302+ expect ( result ! . total_cost ) . toBe ( 0.00008825 ) ;
1303+ expect ( result ! . upstream_inference_cost ) . toBe ( 0.00008825 ) ;
1304+ expect ( result ! . upstream_inference_prompt_cost ) . toBe ( 0.00004025 ) ;
1305+ expect ( result ! . upstream_inference_completions_cost ) . toBe ( 0.000048 ) ;
1306+ } ) ;
1307+
1308+ test ( 'OpenRouter GLM with reasoning tokens' , ( ) => {
1309+ const usage = {
1310+ prompt_tokens : 279 ,
1311+ completion_tokens : 72 ,
1312+ total_tokens : 351 ,
1313+ cost : 0.0006228 ,
1314+ is_byok : false ,
1315+ prompt_tokens_details : { cached_tokens : 0 , cache_write_tokens : 0 , audio_tokens : 0 , video_tokens : 0 } ,
1316+ cost_details : {
1317+ upstream_inference_cost : 0.0006228 ,
1318+ upstream_inference_prompt_cost : 0.0003348 ,
1319+ upstream_inference_completions_cost : 0.000288 ,
1320+ } ,
1321+ completion_tokens_details : { reasoning_tokens : 25 , image_tokens : 0 , audio_tokens : 0 } ,
1322+ } ;
1323+
1324+ const result = extractUsageCostDetails ( usage ) ;
1325+ expect ( result ) . not . toBeNull ( ) ;
1326+ expect ( result ! . total_cost ) . toBe ( 0.0006228 ) ;
1327+ } ) ;
1328+
1329+ test ( 'OpenRouter OpenAI model with cached tokens and reasoning tokens' , ( ) => {
1330+ const usage = {
1331+ prompt_tokens : 113 ,
1332+ completion_tokens : 54 ,
1333+ total_tokens : 167 ,
1334+ cost : 0.0000901 ,
1335+ is_byok : false ,
1336+ prompt_tokens_details : { cached_tokens : 0 , cache_write_tokens : 0 , audio_tokens : 0 , video_tokens : 0 } ,
1337+ cost_details : {
1338+ upstream_inference_cost : 0.0000901 ,
1339+ upstream_inference_prompt_cost : 0.0000226 ,
1340+ upstream_inference_completions_cost : 0.0000675 ,
1341+ } ,
1342+ completion_tokens_details : { reasoning_tokens : 0 , image_tokens : 0 , audio_tokens : 0 } ,
1343+ } ;
1344+
1345+ const result = extractUsageCostDetails ( usage ) ;
1346+ expect ( result ) . not . toBeNull ( ) ;
1347+ expect ( result ! . total_cost ) . toBe ( 0.0000901 ) ;
1348+ } ) ;
1349+ } ) ;
0 commit comments