Skip to content

Commit a097a66

Browse files
authored
Vercel no longer seems to include cached tokens in input tokens for /generation endpoint (#3690)
* Vercel no longer seems to include cached tokens in input tokens for /generation endpoint See here: https://us.posthog.com/project/141915/insights/oskDcDiF The cache hit tokens number is often larger than the input tokens number * Typecheck fixes
1 parent 308e76f commit a097a66

2 files changed

Lines changed: 22 additions & 36 deletions

File tree

apps/web/src/lib/ai-gateway/processUsage.test.ts

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -248,13 +248,7 @@ describe('mapToUsageStats approval tests', () => {
248248
test(claudeSonnetGeneration, async () => {
249249
const inputFile = join(sampleDir, claudeSonnetGeneration);
250250
const generationData = JSON.parse(await readFile(inputFile, 'utf-8')) as OpenRouterGeneration;
251-
const result = mapToUsageStats(
252-
generationData,
253-
'nonsense',
254-
'fake-user-id',
255-
'fake-model',
256-
'openrouter'
257-
);
251+
const result = mapToUsageStats(generationData, 'nonsense', 'fake-user-id', 'openrouter');
258252
const resultString = JSON.stringify(result, null, 2);
259253
const approvalFilePath = inputFile + '.mapToUsageStats.approved.json';
260254
await verifyApproval(resultString, approvalFilePath);
@@ -292,13 +286,7 @@ describe('mapToUsageStats', () => {
292286
};
293287

294288
// Call mapToUsageStats with the BYOK generation
295-
const result = mapToUsageStats(
296-
byokGeneration,
297-
'test response',
298-
'fake-user-id',
299-
'fake-model',
300-
'openrouter'
301-
);
289+
const result = mapToUsageStats(byokGeneration, 'test response', 'fake-user-id', 'openrouter');
302290

303291
// Verify that the cost is multiplied by OPENROUTER_BYOK_COST_MULTIPLIER
304292
expect(result.cost_mUsd).toBe(toMicrodollars(0.1 * 20.0)); // 0.1 * 20 = 2, then convert to microdollars
@@ -326,7 +314,6 @@ describe('mapToUsageStats', () => {
326314
nonByokGeneration,
327315
'test response',
328316
' fake-user-id',
329-
'fake-model',
330317
'openrouter'
331318
);
332319

apps/web/src/lib/ai-gateway/processUsage.ts

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,6 @@ import {
6161
drainSseStream,
6262
extractVercelIsByok,
6363
} from '@/lib/ai-gateway/processUsage.shared';
64-
import { isClaudeModel } from '@/lib/ai-gateway/providers/anthropic.constants';
65-
import { isMinimaxModel } from '@/lib/ai-gateway/providers/minimax';
6664
import type { KiloExclusiveModel } from '@/lib/ai-gateway/providers/kilo-exclusive-model';
6765

6866
const posthogClient = PostHogClient();
@@ -987,10 +985,19 @@ export async function processTokenData(
987985
generation,
988986
usageStats.responseContent,
989987
usageContext.kiloUserId,
990-
usageContext.requested_model,
991988
usageContext.provider
992989
);
993990

991+
if (usageContext.provider === 'vercel' && usageStats.inputTokens > 0) {
992+
// It seems Vercel's /generation result does not include cache hit tokens in input tokens, unlike OpenRouter.
993+
// Since it's not completely clear this is the case and in the past the numbers were inconsistent
994+
// we keep the response usage data if we have it.
995+
genStats.inputTokens = usageStats.inputTokens;
996+
genStats.outputTokens = usageStats.outputTokens;
997+
genStats.cacheHitTokens = usageStats.cacheHitTokens;
998+
genStats.cacheWriteTokens = usageStats.cacheWriteTokens;
999+
}
1000+
9941001
genStats.model = usageStats.model; // openrouter bug?
9951002
genStats.hasError = usageStats.hasError; // retain by choice
9961003
genStats.status_code = usageStats.status_code; // retain by choice
@@ -1003,11 +1010,6 @@ export async function processTokenData(
10031010
[genStats.cacheDiscount_mUsd, usageStats.cacheDiscount_mUsd]
10041011
);
10051012
}
1006-
if (genStats.inputTokens < usageStats.inputTokens) {
1007-
console.warn(
1008-
'Suspicious: fewer input tokens in generation data compared to usage stats. Did provider return Anthropic-style token counts?'
1009-
);
1010-
}
10111013
usageStats = genStats;
10121014
}
10131015

@@ -1043,10 +1045,6 @@ export async function processTokenData(
10431045
return logMicrodollarUsage(usageStats, usageContext);
10441046
}
10451047

1046-
function useAnthropicStyleTokenCounting(requestedModel: string, provider: ProviderId) {
1047-
return provider === 'vercel' && (isClaudeModel(requestedModel) || isMinimaxModel(requestedModel));
1048-
}
1049-
10501048
async function useGenerationLookup(
10511049
usageStats: MicrodollarUsageStats | null,
10521050
usageContext: MicrodollarUsageContext
@@ -1071,7 +1069,6 @@ export const mapToUsageStats = (
10711069
{ data }: OpenRouterGeneration,
10721070
responseContent: string,
10731071
kiloUserId: string,
1074-
requestedModel: string,
10751072
provider: ProviderId
10761073
): MicrodollarUsageStats => {
10771074
let llmCostUsd;
@@ -1094,16 +1091,18 @@ export const mapToUsageStats = (
10941091
hasError: false,
10951092
model: data.model,
10961093
responseContent,
1097-
inputTokens: useAnthropicStyleTokenCounting(requestedModel, provider)
1098-
? (data.native_tokens_prompt ?? 0) +
1099-
(data.native_tokens_cached ?? 0) +
1100-
(data.native_tokens_cache_creation ?? 0)
1101-
: (data.native_tokens_prompt ?? 0),
1094+
inputTokens:
1095+
provider === 'vercel'
1096+
? (data.native_tokens_prompt ?? 0) +
1097+
(data.native_tokens_cached ?? 0) +
1098+
(data.native_tokens_cache_creation ?? 0)
1099+
: (data.native_tokens_prompt ?? 0),
11021100
cacheHitTokens: data.native_tokens_cached ?? 0,
11031101
cacheWriteTokens: data.native_tokens_cache_creation ?? 0,
1104-
outputTokens: useAnthropicStyleTokenCounting(requestedModel, provider)
1105-
? (data.native_tokens_completion ?? 0) + (data.native_tokens_reasoning ?? 0)
1106-
: (data.native_tokens_completion ?? 0),
1102+
outputTokens:
1103+
provider === 'vercel'
1104+
? (data.native_tokens_completion ?? 0) + (data.native_tokens_reasoning ?? 0)
1105+
: (data.native_tokens_completion ?? 0),
11071106
cost_mUsd: toMicrodollars(llmCostUsd),
11081107
is_byok: data.is_byok ?? null,
11091108
cacheDiscount_mUsd:

0 commit comments

Comments
 (0)