Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions containers/api-proxy/token-parsers.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ function extractReasoningTokens(usage) {
*
* Supports:
* - Anthropic: usage.cache_read_input_tokens
* - OpenAI/Copilot: usage.prompt_tokens_details.cached_tokens
* - OpenAI Chat Completions / Copilot: usage.prompt_tokens_details.cached_tokens
* - OpenAI Responses API: usage.input_tokens_details.cached_tokens
* - Token-entry arrays containing { token_type: "cache_read", token_count: <n> }
*/
function extractCacheReadTokens(usage) {
Expand All @@ -75,6 +76,15 @@ function extractCacheReadTokens(usage) {
return usage.prompt_tokens_details.cached_tokens;
}

// OpenAI Responses API (/responses) reports cached prompt tokens under
// `input_tokens_details.cached_tokens` (an object), rather than the Chat
// Completions `prompt_tokens_details.cached_tokens`. Without this branch the
// value falls through to the array loop below, which only handles token-entry
// arrays, so cache reads are silently dropped (reported as 0).
if (usage.input_tokens_details && typeof usage.input_tokens_details.cached_tokens === 'number') {
return usage.input_tokens_details.cached_tokens;
}

const tokenContainers = [
usage.prompt_tokens_details,
usage.input_tokens_details,
Expand Down Expand Up @@ -392,7 +402,9 @@ function parseSseDataLines(text) {
* Output fields:
* - input_tokens: number (from Anthropic input_tokens or OpenAI prompt_tokens)
* - output_tokens: number (from Anthropic output_tokens or OpenAI completion_tokens)
* - cache_read_tokens: number (from Anthropic cache_read_input_tokens or OpenAI prompt_tokens_details.cached_tokens)
* - cache_read_tokens: number (from Anthropic cache_read_input_tokens,
* OpenAI Chat Completions prompt_tokens_details.cached_tokens, or
* OpenAI Responses API input_tokens_details.cached_tokens)
* - cache_write_tokens: number (Anthropic cache_creation_input_tokens or
* Copilot copilot_usage cache_write; not available in flattened OpenAI usage)
*/
Expand Down
176 changes: 176 additions & 0 deletions containers/api-proxy/token-tracker.parsing.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,39 @@ describe('extractUsageFromJson', () => {
cache_read_input_tokens: 77,
});
});

test('extracts OpenAI Responses API cached tokens from input_tokens_details.cached_tokens', () => {
// The real /responses endpoint (used by codex) reports cached prompt tokens
// under `input_tokens_details.cached_tokens`, not `prompt_tokens_details`.
const body = Buffer.from(JSON.stringify({
type: 'response.completed',
response: {
id: 'resp_responses_cache',
model: 'gpt-5.4-mini',
usage: {
input_tokens: 707301,
output_tokens: 12096,
total_tokens: 719397,
input_tokens_details: {
cached_tokens: 672256,
},
output_tokens_details: {
reasoning_tokens: 7715,
},
},
},
}));

const result = extractUsageFromJson(body);
expect(result.model).toBe('gpt-5.4-mini');
expect(result.usage).toEqual({
input_tokens: 707301,
output_tokens: 12096,
total_tokens: 719397,
reasoning_tokens: 7715,
cache_read_input_tokens: 672256,
});
});
});

// ── extractUsageFromSseLine ───────────────────────────────────────────
Expand Down Expand Up @@ -343,6 +376,37 @@ describe('extractUsageFromSseLine', () => {
});
});

test('extracts cache tokens from OpenAI Responses API input_tokens_details (streaming)', () => {
// Real /responses streaming final event: cached tokens live under
// input_tokens_details.cached_tokens (object), not prompt_tokens_details.
const line = JSON.stringify({
type: 'response.completed',
response: {
model: 'gpt-5.4-mini',
usage: {
input_tokens: 37484,
output_tokens: 619,
total_tokens: 38103,
input_tokens_details: {
cached_tokens: 34816,
},
output_tokens_details: {
reasoning_tokens: 128,
},
},
},
});

const result = extractUsageFromSseLine(line);
expect(result.usage).toEqual({
input_tokens: 37484,
output_tokens: 619,
total_tokens: 38103,
reasoning_tokens: 128,
cache_read_input_tokens: 34816,
});
});

test('returns null for [DONE]', () => {
const result = extractUsageFromSseLine('[DONE]');
expect(result.usage).toBeNull();
Expand Down Expand Up @@ -390,6 +454,99 @@ describe('extractUsageFromSseLine', () => {
cache_read_input_tokens: 43894,
});
});

// Regression for gh-aw run 27784259295: the Copilot /responses endpoint
// streams a chat.completion-shaped final chunk that carries both
// prompt_tokens_details.cached_tokens AND the authoritative per-type split
// in copilot_usage.token_details. The copilot_usage breakdown must win so
// the input/cache_read split is exact. That run reported cache_read_tokens: 0
// on every request despite ~1.43M cached reads in aggregate.
//
// Each fixture below is a real request captured from the agent's process log
// for that run, in chronological order (cache reads grow as the prompt is
// re-sent). `input` + `cacheRead` === `promptTokens` for every entry.
describe('Copilot /responses streaming cache reads (run 27784259295)', () => {
const REQUESTS = [
{ promptTokens: 19158, completionTokens: 1304, cachedTokens: 0, reasoningTokens: 516, input: 19158, cacheRead: 0, output: 1304 },
{ promptTokens: 10852, completionTokens: 168, cachedTokens: 0, reasoningTokens: 94, input: 10852, cacheRead: 0, output: 168 },
{ promptTokens: 16601, completionTokens: 124, cachedTokens: 10752, reasoningTokens: 14, input: 5849, cacheRead: 10752, output: 124 },
{ promptTokens: 23055, completionTokens: 559, cachedTokens: 18944, reasoningTokens: 516, input: 4111, cacheRead: 18944, output: 559 },
{ promptTokens: 24429, completionTokens: 978, cachedTokens: 22528, reasoningTokens: 455, input: 1901, cacheRead: 22528, output: 978 },
{ promptTokens: 26055, completionTokens: 1405, cachedTokens: 24064, reasoningTokens: 904, input: 1991, cacheRead: 24064, output: 1405 },
{ promptTokens: 28551, completionTokens: 1306, cachedTokens: 25600, reasoningTokens: 941, input: 2951, cacheRead: 25600, output: 1306 },
{ promptTokens: 33145, completionTokens: 1636, cachedTokens: 28160, reasoningTokens: 938, input: 4985, cacheRead: 28160, output: 1636 },
{ promptTokens: 39144, completionTokens: 921, cachedTokens: 32768, reasoningTokens: 595, input: 6376, cacheRead: 32768, output: 921 },
{ promptTokens: 41728, completionTokens: 372, cachedTokens: 38912, reasoningTokens: 193, input: 2816, cacheRead: 38912, output: 372 },
{ promptTokens: 44382, completionTokens: 735, cachedTokens: 41472, reasoningTokens: 488, input: 2910, cacheRead: 41472, output: 735 },
{ promptTokens: 45677, completionTokens: 335, cachedTokens: 44032, reasoningTokens: 83, input: 1645, cacheRead: 44032, output: 335 },
{ promptTokens: 46386, completionTokens: 363, cachedTokens: 45568, reasoningTokens: 119, input: 818, cacheRead: 45568, output: 363 },
{ promptTokens: 48174, completionTokens: 376, cachedTokens: 46080, reasoningTokens: 139, input: 2094, cacheRead: 46080, output: 376 },
{ promptTokens: 48980, completionTokens: 211, cachedTokens: 47616, reasoningTokens: 62, input: 1364, cacheRead: 47616, output: 211 },
{ promptTokens: 65247, completionTokens: 424, cachedTokens: 48640, reasoningTokens: 313, input: 16607, cacheRead: 48640, output: 424 },
{ promptTokens: 68930, completionTokens: 267, cachedTokens: 65024, reasoningTokens: 114, input: 3906, cacheRead: 65024, output: 267 },
{ promptTokens: 69642, completionTokens: 138, cachedTokens: 68608, reasoningTokens: 24, input: 1034, cacheRead: 68608, output: 138 },
{ promptTokens: 75433, completionTokens: 138, cachedTokens: 69120, reasoningTokens: 22, input: 6313, cacheRead: 69120, output: 138 },
{ promptTokens: 78451, completionTokens: 131, cachedTokens: 75264, reasoningTokens: 73, input: 3187, cacheRead: 75264, output: 131 },
{ promptTokens: 78808, completionTokens: 56, cachedTokens: 78336, reasoningTokens: 0, input: 472, cacheRead: 78336, output: 56 },
{ promptTokens: 79128, completionTokens: 56, cachedTokens: 78336, reasoningTokens: 0, input: 792, cacheRead: 78336, output: 56 },
{ promptTokens: 79320, completionTokens: 2799, cachedTokens: 78848, reasoningTokens: 2522, input: 472, cacheRead: 78848, output: 2799 },
{ promptTokens: 82221, completionTokens: 3408, cachedTokens: 78848, reasoningTokens: 2243, input: 3373, cacheRead: 78848, output: 3408 },
{ promptTokens: 91547, completionTokens: 1400, cachedTokens: 81920, reasoningTokens: 1333, input: 9627, cacheRead: 81920, output: 1400 },
{ promptTokens: 93125, completionTokens: 201, cachedTokens: 91136, reasoningTokens: 113, input: 1989, cacheRead: 91136, output: 201 },
{ promptTokens: 93675, completionTokens: 423, cachedTokens: 92672, reasoningTokens: 366, input: 1003, cacheRead: 92672, output: 423 },
{ promptTokens: 94114, completionTokens: 161, cachedTokens: 93184, reasoningTokens: 60, input: 930, cacheRead: 93184, output: 161 },
];

const buildChunk = (r) => JSON.stringify({
object: 'chat.completion.chunk',
model: 'gpt-5.4-2026-03-05',
choices: [{ index: 0, delta: {}, finish_reason: 'stop' }],
usage: {
completion_tokens: r.completionTokens,
prompt_tokens: r.promptTokens,
total_tokens: r.promptTokens + r.completionTokens,
prompt_tokens_details: { cached_tokens: r.cachedTokens },
completion_tokens_details: { reasoning_tokens: r.reasoningTokens },
},
copilot_usage: {
token_details: [
{ token_count: r.input, token_type: 'input' },
{ token_count: r.cacheRead, token_type: 'cache_read' },
{ token_count: r.output, token_type: 'output' },
],
},
});

test.each(REQUESTS)(
'request prompt=$promptTokens recovers cache_read=$cacheRead',
(r) => {
const normalized = normalizeUsage(extractUsageFromSseLine(buildChunk(r)).usage);
expect(normalized).toEqual({
input_tokens: r.input,
output_tokens: r.output,
cache_read_tokens: r.cacheRead,
cache_write_tokens: 0,
reasoning_tokens: r.reasoningTokens,
});
// The copilot_usage split must reconstruct the lumped prompt_tokens.
expect(normalized.input_tokens + normalized.cache_read_tokens).toBe(r.promptTokens);
},
);

test('recovers the full aggregate cache-read total the run reported as 0', () => {
const totals = REQUESTS.reduce(
(acc, r) => {
const n = normalizeUsage(extractUsageFromSseLine(buildChunk(r)).usage);
acc.cacheRead += n.cache_read_tokens;
acc.input += n.input_tokens;
return acc;
},
{ cacheRead: 0, input: 0 },
);
expect(totals.cacheRead).toBe(1426432);
expect(totals.input).toBe(119526);
});
});
});

// ── parseSseDataLines ─────────────────────────────────────────────────
Expand Down Expand Up @@ -523,6 +680,25 @@ describe('normalizeUsage', () => {
reasoning_tokens: 0,
});
});

test('normalizes OpenAI Responses API cached_tokens via input_tokens_details.cached_tokens', () => {
const result = normalizeUsage({
input_tokens: 707301,
output_tokens: 12096,
total_tokens: 719397,
input_tokens_details: {
cached_tokens: 672256,
},
reasoning_tokens: 7715,
});
expect(result).toEqual({
input_tokens: 707301,
output_tokens: 12096,
cache_read_tokens: 672256,
cache_write_tokens: 0,
reasoning_tokens: 7715,
});
});
});

// ── Copilot copilot_usage.token_details breakdown ─────────────────────
Expand Down
Loading