@@ -118,6 +118,71 @@ function extractCacheReadTokens(usage) {
118118 return undefined ;
119119}
120120
121+ /**
122+ * Extract the authoritative per-type token breakdown from a Copilot
123+ * `copilot_usage.token_details` array.
124+ *
125+ * The GitHub Copilot OpenAI-compatible endpoint reports a flattened
126+ * `usage` object where `prompt_tokens` lumps fresh input together with
127+ * cache-write tokens, and `prompt_tokens_details.cached_tokens` only
128+ * carries cache-read. The true split (input / cache_read / cache_write /
129+ * output), which is billed at distinct rates, is only available in the
130+ * sibling `copilot_usage.token_details` array, e.g.:
131+ *
132+ * copilot_usage: { token_details: [
133+ * { token_type: "input", token_count: 3857 },
134+ * { token_type: "cache_read", token_count: 0 },
135+ * { token_type: "cache_write", token_count: 12539 },
136+ * { token_type: "output", token_count: 362 },
137+ * ] }
138+ *
139+ * Returns Anthropic-normalized usage fields (input_tokens, output_tokens,
140+ * cache_read_input_tokens, cache_creation_input_tokens) so downstream
141+ * normalization records the correct cache_write split, or null when no
142+ * recognizable token_details are present.
143+ *
144+ * @param {object } json - Parsed response JSON (or SSE event object)
145+ * @returns {object|null }
146+ */
147+ function extractCopilotUsageBreakdown ( json ) {
148+ if ( ! json || typeof json !== 'object' ) return null ;
149+ const copilotUsage = ( json . copilot_usage && typeof json . copilot_usage === 'object' )
150+ ? json . copilot_usage
151+ : ( ( json . response && json . response . copilot_usage && typeof json . response . copilot_usage === 'object' )
152+ ? json . response . copilot_usage
153+ : null ) ;
154+ if ( ! copilotUsage || ! Array . isArray ( copilotUsage . token_details ) ) return null ;
155+
156+ const out = { } ;
157+ let found = false ;
158+ for ( const entry of copilotUsage . token_details ) {
159+ if ( ! entry || typeof entry !== 'object' ) continue ;
160+ const count = entry . token_count ;
161+ if ( typeof count !== 'number' ) continue ;
162+ switch ( entry . token_type ) {
163+ case 'input' :
164+ out . input_tokens = ( out . input_tokens || 0 ) + count ;
165+ found = true ;
166+ break ;
167+ case 'output' :
168+ out . output_tokens = ( out . output_tokens || 0 ) + count ;
169+ found = true ;
170+ break ;
171+ case 'cache_read' :
172+ out . cache_read_input_tokens = ( out . cache_read_input_tokens || 0 ) + count ;
173+ found = true ;
174+ break ;
175+ case 'cache_write' :
176+ out . cache_creation_input_tokens = ( out . cache_creation_input_tokens || 0 ) + count ;
177+ found = true ;
178+ break ;
179+ default :
180+ break ;
181+ }
182+ }
183+ return found ? out : null ;
184+ }
185+
121186/**
122187 * Extract token usage from a non-streaming JSON response body.
123188 *
@@ -185,6 +250,26 @@ function extractUsageFromJson(body) {
185250 }
186251 }
187252
253+ // Copilot exposes the authoritative input/cache_read/cache_write/output
254+ // split only in the sibling `copilot_usage.token_details` array. When
255+ // present, prefer it: the flattened `usage.prompt_tokens` lumps fresh
256+ // input together with cache-write tokens (billed at different rates).
257+ const copilotBreakdown = extractCopilotUsageBreakdown ( json ) ;
258+ if ( copilotBreakdown ) {
259+ const merged = { ...( result . usage || { } ) , ...copilotBreakdown } ;
260+ if ( copilotBreakdown . input_tokens !== undefined ) {
261+ // Copilot gave us a precise input split: drop the lumped prompt_tokens.
262+ delete merged . prompt_tokens ;
263+ } else if ( copilotBreakdown . cache_creation_input_tokens !== undefined
264+ && typeof merged . prompt_tokens === 'number' ) {
265+ // cache_write present but input absent: infer input = prompt_tokens - cache_write
266+ // to avoid double-counting cache_write in normalizeUsage.
267+ merged . input_tokens = Math . max ( 0 , merged . prompt_tokens - copilotBreakdown . cache_creation_input_tokens ) ;
268+ delete merged . prompt_tokens ;
269+ }
270+ result . usage = merged ;
271+ }
272+
188273 return result ;
189274 } catch {
190275 return { usage : null , model : null } ;
@@ -260,6 +345,20 @@ function extractUsageFromSseLine(line) {
260345 }
261346 const cacheReadTokens = extractCacheReadTokens ( json . usage ) ;
262347 if ( typeof cacheReadTokens === 'number' ) result . usage . cache_read_input_tokens = cacheReadTokens ;
348+ const copilotBreakdown = extractCopilotUsageBreakdown ( json ) ;
349+ if ( copilotBreakdown ) {
350+ result . usage = { ...result . usage , ...copilotBreakdown } ;
351+ if ( copilotBreakdown . input_tokens !== undefined ) {
352+ // Copilot gave us a precise input split: drop the lumped prompt_tokens.
353+ delete result . usage . prompt_tokens ;
354+ } else if ( copilotBreakdown . cache_creation_input_tokens !== undefined
355+ && typeof result . usage . prompt_tokens === 'number' ) {
356+ // cache_write present but input absent: infer input = prompt_tokens - cache_write
357+ // to avoid double-counting cache_write in normalizeUsage.
358+ result . usage . input_tokens = Math . max ( 0 , result . usage . prompt_tokens - copilotBreakdown . cache_creation_input_tokens ) ;
359+ delete result . usage . prompt_tokens ;
360+ }
361+ }
263362 return result ;
264363 }
265364
@@ -294,7 +393,8 @@ function parseSseDataLines(text) {
294393 * - input_tokens: number (from Anthropic input_tokens or OpenAI prompt_tokens)
295394 * - output_tokens: number (from Anthropic output_tokens or OpenAI completion_tokens)
296395 * - cache_read_tokens: number (from Anthropic cache_read_input_tokens or OpenAI prompt_tokens_details.cached_tokens)
297- * - cache_write_tokens: number (Anthropic cache_creation_input_tokens; not available in OpenAI format)
396+ * - cache_write_tokens: number (Anthropic cache_creation_input_tokens or
397+ * Copilot copilot_usage cache_write; not available in flattened OpenAI usage)
298398 */
299399function normalizeUsage ( usage ) {
300400 if ( ! usage ) return null ;
@@ -314,6 +414,7 @@ module.exports = {
314414 createDecompressor,
315415 extractReasoningTokens,
316416 extractCacheReadTokens,
417+ extractCopilotUsageBreakdown,
317418 extractUsageFromJson,
318419 extractUsageFromSseLine,
319420 parseSseDataLines,
0 commit comments