Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 102 additions & 1 deletion containers/api-proxy/token-parsers.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,71 @@ function extractCacheReadTokens(usage) {
return undefined;
}

/**
* Extract the authoritative per-type token breakdown from a Copilot
* `copilot_usage.token_details` array.
*
* The GitHub Copilot OpenAI-compatible endpoint reports a flattened
* `usage` object where `prompt_tokens` lumps fresh input together with
* cache-write tokens, and `prompt_tokens_details.cached_tokens` only
* carries cache-read. The true split (input / cache_read / cache_write /
* output), which is billed at distinct rates, is only available in the
* sibling `copilot_usage.token_details` array, e.g.:
*
* copilot_usage: { token_details: [
* { token_type: "input", token_count: 3857 },
* { token_type: "cache_read", token_count: 0 },
* { token_type: "cache_write", token_count: 12539 },
* { token_type: "output", token_count: 362 },
* ] }
*
* Returns Anthropic-normalized usage fields (input_tokens, output_tokens,
* cache_read_input_tokens, cache_creation_input_tokens) so downstream
* normalization records the correct cache_write split, or null when no
* recognizable token_details are present.
*
* @param {object} json - Parsed response JSON (or SSE event object)
* @returns {object|null}
*/
function extractCopilotUsageBreakdown(json) {
if (!json || typeof json !== 'object') return null;
const copilotUsage = (json.copilot_usage && typeof json.copilot_usage === 'object')
? json.copilot_usage
: ((json.response && json.response.copilot_usage && typeof json.response.copilot_usage === 'object')
? json.response.copilot_usage
: null);
if (!copilotUsage || !Array.isArray(copilotUsage.token_details)) return null;

const out = {};
let found = false;
for (const entry of copilotUsage.token_details) {
if (!entry || typeof entry !== 'object') continue;
const count = entry.token_count;
if (typeof count !== 'number') continue;
switch (entry.token_type) {
case 'input':
out.input_tokens = (out.input_tokens || 0) + count;
found = true;
break;
case 'output':
out.output_tokens = (out.output_tokens || 0) + count;
found = true;
break;
case 'cache_read':
out.cache_read_input_tokens = (out.cache_read_input_tokens || 0) + count;
found = true;
break;
case 'cache_write':
out.cache_creation_input_tokens = (out.cache_creation_input_tokens || 0) + count;
found = true;
break;
default:
break;
}
}
return found ? out : null;
}

/**
* Extract token usage from a non-streaming JSON response body.
*
Expand Down Expand Up @@ -185,6 +250,26 @@ function extractUsageFromJson(body) {
}
}

// Copilot exposes the authoritative input/cache_read/cache_write/output
// split only in the sibling `copilot_usage.token_details` array. When
// present, prefer it: the flattened `usage.prompt_tokens` lumps fresh
// input together with cache-write tokens (billed at different rates).
const copilotBreakdown = extractCopilotUsageBreakdown(json);
if (copilotBreakdown) {
const merged = { ...(result.usage || {}), ...copilotBreakdown };
if (copilotBreakdown.input_tokens !== undefined) {
// Copilot gave us a precise input split: drop the lumped prompt_tokens.
delete merged.prompt_tokens;
} else if (copilotBreakdown.cache_creation_input_tokens !== undefined
&& typeof merged.prompt_tokens === 'number') {
// cache_write present but input absent: infer input = prompt_tokens - cache_write
// to avoid double-counting cache_write in normalizeUsage.
merged.input_tokens = Math.max(0, merged.prompt_tokens - copilotBreakdown.cache_creation_input_tokens);
delete merged.prompt_tokens;
}
result.usage = merged;
}

return result;
} catch {
return { usage: null, model: null };
Expand Down Expand Up @@ -260,6 +345,20 @@ function extractUsageFromSseLine(line) {
}
const cacheReadTokens = extractCacheReadTokens(json.usage);
if (typeof cacheReadTokens === 'number') result.usage.cache_read_input_tokens = cacheReadTokens;
const copilotBreakdown = extractCopilotUsageBreakdown(json);
if (copilotBreakdown) {
result.usage = { ...result.usage, ...copilotBreakdown };
if (copilotBreakdown.input_tokens !== undefined) {
// Copilot gave us a precise input split: drop the lumped prompt_tokens.
delete result.usage.prompt_tokens;
} else if (copilotBreakdown.cache_creation_input_tokens !== undefined
&& typeof result.usage.prompt_tokens === 'number') {
// cache_write present but input absent: infer input = prompt_tokens - cache_write
// to avoid double-counting cache_write in normalizeUsage.
result.usage.input_tokens = Math.max(0, result.usage.prompt_tokens - copilotBreakdown.cache_creation_input_tokens);
delete result.usage.prompt_tokens;
}
}
return result;
}

Expand Down Expand Up @@ -294,7 +393,8 @@ function parseSseDataLines(text) {
* - input_tokens: number (from Anthropic input_tokens or OpenAI prompt_tokens)
* - output_tokens: number (from Anthropic output_tokens or OpenAI completion_tokens)
* - cache_read_tokens: number (from Anthropic cache_read_input_tokens or OpenAI prompt_tokens_details.cached_tokens)
* - cache_write_tokens: number (Anthropic cache_creation_input_tokens; not available in OpenAI format)
* - cache_write_tokens: number (Anthropic cache_creation_input_tokens or
* Copilot copilot_usage cache_write; not available in flattened OpenAI usage)
*/
function normalizeUsage(usage) {
if (!usage) return null;
Expand All @@ -314,6 +414,7 @@ module.exports = {
createDecompressor,
extractReasoningTokens,
extractCacheReadTokens,
extractCopilotUsageBreakdown,
extractUsageFromJson,
extractUsageFromSseLine,
parseSseDataLines,
Expand Down
2 changes: 2 additions & 0 deletions containers/api-proxy/token-tracker.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const {
normalizeUsage,
isStreamingResponse,
isCompressedResponse,
extractCopilotUsageBreakdown,
} = require('./token-parsers');

module.exports = {
Expand All @@ -39,6 +40,7 @@ module.exports = {
normalizeUsage,
isStreamingResponse,
isCompressedResponse,
extractCopilotUsageBreakdown,
validateTokenUsageRecord,
writeTokenUsage,
TOKEN_LOG_FILE,
Expand Down
229 changes: 229 additions & 0 deletions containers/api-proxy/token-tracker.parsing.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ const {
extractUsageFromSseLine,
parseSseDataLines,
normalizeUsage,
extractCopilotUsageBreakdown,
} = require('./token-tracker');

// ── extractUsageFromJson ──────────────────────────────────────────────
Expand Down Expand Up @@ -523,3 +524,231 @@ describe('normalizeUsage', () => {
});
});
});

// ── Copilot copilot_usage.token_details breakdown ─────────────────────

describe('extractCopilotUsageBreakdown', () => {
test('returns null when copilot_usage is absent', () => {
expect(extractCopilotUsageBreakdown({ usage: { prompt_tokens: 10 } })).toBeNull();
});

test('returns null when token_details is not an array', () => {
expect(extractCopilotUsageBreakdown({ copilot_usage: { token_details: {} } })).toBeNull();
});

test('returns null when no recognizable token types are present', () => {
expect(extractCopilotUsageBreakdown({
copilot_usage: { token_details: [{ token_type: 'mystery', token_count: 5 }] },
})).toBeNull();
});

test('extracts the full input/cache_read/cache_write/output split', () => {
const result = extractCopilotUsageBreakdown({
copilot_usage: {
token_details: [
{ token_type: 'input', token_count: 3857 },
{ token_type: 'cache_read', token_count: 0 },
{ token_type: 'cache_write', token_count: 12539 },
{ token_type: 'output', token_count: 362 },
],
},
});
expect(result).toEqual({
input_tokens: 3857,
cache_read_input_tokens: 0,
cache_creation_input_tokens: 12539,
output_tokens: 362,
});
});

test('reads copilot_usage nested under a response object', () => {
const result = extractCopilotUsageBreakdown({
response: {
copilot_usage: { token_details: [{ token_type: 'input', token_count: 7 }] },
},
});
expect(result).toEqual({ input_tokens: 7 });
});

test('sums repeated token types and ignores malformed entries', () => {
const result = extractCopilotUsageBreakdown({
copilot_usage: {
token_details: [
{ token_type: 'input', token_count: 100 },
{ token_type: 'input', token_count: 50 },
{ token_type: 'output', token_count: 'nope' },
null,
{ token_type: 'cache_write' },
],
},
});
expect(result).toEqual({ input_tokens: 150 });
});
});

// ── extractUsageFromJson + Copilot breakdown integration ──────────────

describe('extractUsageFromJson with copilot_usage', () => {
// Real Claude-via-Copilot response shape: flattened usage.prompt_tokens
// lumps fresh input (3857) with cache_write (12539); the authoritative
// split lives only in copilot_usage.token_details.
const copilotBody = () => Buffer.from(JSON.stringify({
id: 'e6925ddf',
model: 'claude-sonnet-4.6',
choices: [{ message: { role: 'assistant', content: 'hi' } }],
usage: {
completion_tokens: 362,
prompt_tokens: 16396,
prompt_tokens_details: { cached_tokens: 0 },
total_tokens: 16758,
},
copilot_usage: {
token_details: [
{ token_type: 'input', token_count: 3857 },
{ token_type: 'cache_read', token_count: 0 },
{ token_type: 'cache_write', token_count: 12539 },
{ token_type: 'output', token_count: 362 },
],
total_nano_aiu: 6402225000,
},
}));

test('prefers the copilot_usage split over the lumped prompt_tokens', () => {
const { usage, model } = extractUsageFromJson(copilotBody());
expect(model).toBe('claude-sonnet-4.6');
expect(usage.input_tokens).toBe(3857);
expect(usage.cache_creation_input_tokens).toBe(12539);
expect(usage.cache_read_input_tokens).toBe(0);
expect(usage.output_tokens).toBe(362);
// The lumped prompt_tokens is dropped so normalization uses input_tokens.
expect(usage.prompt_tokens).toBeUndefined();
});

test('normalizes to the correct cache_write split', () => {
const { usage } = extractUsageFromJson(copilotBody());
expect(normalizeUsage(usage)).toEqual({
input_tokens: 3857,
output_tokens: 362,
cache_read_tokens: 0,
cache_write_tokens: 12539,
reasoning_tokens: 0,
});
});

test('does not affect plain OpenAI responses without copilot_usage', () => {
const body = Buffer.from(JSON.stringify({
model: 'gpt-5',
usage: {
prompt_tokens: 100,
completion_tokens: 20,
total_tokens: 120,
prompt_tokens_details: { cached_tokens: 30 },
},
}));
expect(normalizeUsage(extractUsageFromJson(body).usage)).toEqual({
input_tokens: 100,
output_tokens: 20,
cache_read_tokens: 30,
cache_write_tokens: 0,
reasoning_tokens: 0,
});
});

test('uses copilot_usage even when the flattened usage object is absent', () => {
const body = Buffer.from(JSON.stringify({
model: 'claude-sonnet-4.6',
copilot_usage: {
token_details: [
{ token_type: 'input', token_count: 200 },
{ token_type: 'output', token_count: 10 },
{ token_type: 'cache_write', token_count: 99 },
],
},
}));
expect(normalizeUsage(extractUsageFromJson(body).usage)).toEqual({
input_tokens: 200,
output_tokens: 10,
cache_read_tokens: 0,
cache_write_tokens: 99,
reasoning_tokens: 0,
});
});

test('infers input_tokens from prompt_tokens when copilot_usage has cache_write but no input', () => {
// Edge case: token_details provides cache_write but omits input.
// prompt_tokens = input + cache_write, so input must be inferred to avoid
// double-counting cache_write in normalizeUsage.
const body = Buffer.from(JSON.stringify({
model: 'claude-sonnet-4.6',
usage: {
prompt_tokens: 500,
completion_tokens: 50,
total_tokens: 550,
},
copilot_usage: {
token_details: [
{ token_type: 'cache_write', token_count: 300 },
{ token_type: 'output', token_count: 50 },
],
},
}));
const { usage } = extractUsageFromJson(body);
// prompt_tokens should be removed; input_tokens inferred as 500 - 300 = 200
expect(usage.prompt_tokens).toBeUndefined();
expect(usage.input_tokens).toBe(200);
expect(usage.cache_creation_input_tokens).toBe(300);
expect(normalizeUsage(usage)).toEqual({
input_tokens: 200,
output_tokens: 50,
cache_read_tokens: 0,
cache_write_tokens: 300,
reasoning_tokens: 0,
});
});
});

describe('extractUsageFromSseLine with copilot_usage', () => {
test('applies the copilot_usage split in a streaming final chunk', () => {
const line = JSON.stringify({
model: 'claude-sonnet-4.6',
usage: { prompt_tokens: 16396, completion_tokens: 362, total_tokens: 16758 },
copilot_usage: {
token_details: [
{ token_type: 'input', token_count: 3857 },
{ token_type: 'cache_write', token_count: 12539 },
{ token_type: 'output', token_count: 362 },
],
},
});
const { usage } = extractUsageFromSseLine(line);
expect(usage.input_tokens).toBe(3857);
expect(usage.cache_creation_input_tokens).toBe(12539);
expect(usage.prompt_tokens).toBeUndefined();
});

test('infers input_tokens from prompt_tokens when streaming copilot_usage has cache_write but no input', () => {
// Same double-count guard as non-streaming: if token_details omits input but
// provides cache_write, prompt_tokens must not survive alongside cache_creation_input_tokens.
const line = JSON.stringify({
model: 'claude-sonnet-4.6',
usage: { prompt_tokens: 500, completion_tokens: 50, total_tokens: 550 },
copilot_usage: {
token_details: [
{ token_type: 'cache_write', token_count: 300 },
{ token_type: 'output', token_count: 50 },
],
},
});
const { usage } = extractUsageFromSseLine(line);
expect(usage.prompt_tokens).toBeUndefined();
expect(usage.input_tokens).toBe(200);
expect(usage.cache_creation_input_tokens).toBe(300);
expect(normalizeUsage(usage)).toEqual({
input_tokens: 200,
output_tokens: 50,
cache_read_tokens: 0,
cache_write_tokens: 300,
reasoning_tokens: 0,
});
});
});
Loading