Skip to content

Commit b17432f

Browse files
authored
refactor: split token-tracker.js into four focused modules (#3343)
* Initial plan * refactor: split token-tracker.js into focused modules * fix: address token tracker security review feedback --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
1 parent 0e46a13 commit b17432f

6 files changed

Lines changed: 998 additions & 894 deletions

File tree

containers/api-proxy/Dockerfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ COPY package*.json ./
1515
RUN npm ci --omit=dev
1616

1717
# Copy application files
18-
COPY server.js logging.js metrics.js rate-limiter.js token-tracker.js \
18+
COPY server.js logging.js metrics.js rate-limiter.js \
19+
token-tracker.js token-persistence.js token-parsers.js \
20+
token-tracker-http.js token-tracker-ws.js \
1921
model-resolver.js proxy-utils.js anthropic-transforms.js \
2022
proxy-request.js model-discovery.js management.js oidc-token-provider.js \
2123
oidc-token-provider-base.js \
Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
/**
2+
* Token usage parsers for AWF API Proxy.
3+
*
4+
* Pure parsing and normalization functions — no I/O, no side effects.
5+
* Covers SSE streaming, non-streaming JSON, decompression helpers, and
6+
* usage normalization into a unified format.
7+
*/
8+
9+
'use strict';
10+
11+
const zlib = require('zlib');
12+
13+
/**
14+
* Check if a response is SSE (Server-Sent Events) streaming.
15+
*/
16+
function isStreamingResponse(headers) {
17+
const ct = headers['content-type'] || '';
18+
return ct.includes('text/event-stream');
19+
}
20+
21+
/**
22+
* Check if a response is gzip or deflate compressed.
23+
*/
24+
function isCompressedResponse(headers) {
25+
const ce = (headers['content-encoding'] || '').toLowerCase();
26+
return ce === 'gzip' || ce === 'deflate' || ce === 'br';
27+
}
28+
29+
/**
30+
* Create a decompression transform stream based on content-encoding.
31+
* Returns null if the encoding is not supported.
32+
*/
33+
function createDecompressor(headers) {
34+
const ce = (headers['content-encoding'] || '').toLowerCase();
35+
if (ce === 'gzip') return zlib.createGunzip();
36+
if (ce === 'deflate') return zlib.createInflate();
37+
if (ce === 'br') return zlib.createBrotliDecompress();
38+
return null;
39+
}
40+
41+
/**
42+
* Extract reasoning token count from provider usage payloads.
43+
*
44+
* Supports explicit `reasoning_tokens` and provider-specific nested fields.
45+
* Priority order: top-level → completion_tokens_details → output_tokens_details.
46+
*/
47+
function extractReasoningTokens(usage) {
48+
if (!usage || typeof usage !== 'object') return undefined;
49+
if (typeof usage.reasoning_tokens === 'number') return usage.reasoning_tokens;
50+
if (usage.completion_tokens_details && typeof usage.completion_tokens_details.reasoning_tokens === 'number') {
51+
return usage.completion_tokens_details.reasoning_tokens;
52+
}
53+
if (usage.output_tokens_details && typeof usage.output_tokens_details.reasoning_tokens === 'number') {
54+
return usage.output_tokens_details.reasoning_tokens;
55+
}
56+
return undefined;
57+
}
58+
59+
/**
60+
* Extract token usage from a non-streaming JSON response body.
61+
*
62+
* Supports:
63+
* - OpenAI/Copilot: { usage: { prompt_tokens, completion_tokens, total_tokens, prompt_tokens_details: { cached_tokens } } }
64+
* - Anthropic: { usage: { input_tokens, output_tokens, cache_creation_input_tokens, cache_read_input_tokens } }
65+
*
66+
* Also extracts the model field if present.
67+
*
68+
* @param {Buffer} body - Response body
69+
* @returns {{ usage: object|null, model: string|null }}
70+
*/
71+
function extractUsageFromJson(body) {
72+
try {
73+
const text = body.toString('utf8');
74+
const json = JSON.parse(text);
75+
const usageSource = (json.usage && typeof json.usage === 'object')
76+
? json.usage
77+
: ((json.response && json.response.usage && typeof json.response.usage === 'object')
78+
? json.response.usage
79+
: null);
80+
const result = { usage: null, model: json.model || (json.response && json.response.model) || null };
81+
82+
if (usageSource) {
83+
const usage = {};
84+
let hasField = false;
85+
// Anthropic fields
86+
if (typeof usageSource.input_tokens === 'number') {
87+
usage.input_tokens = usageSource.input_tokens;
88+
hasField = true;
89+
}
90+
if (typeof usageSource.output_tokens === 'number') {
91+
usage.output_tokens = usageSource.output_tokens;
92+
hasField = true;
93+
}
94+
if (typeof usageSource.cache_creation_input_tokens === 'number') {
95+
usage.cache_creation_input_tokens = usageSource.cache_creation_input_tokens;
96+
hasField = true;
97+
}
98+
if (typeof usageSource.cache_read_input_tokens === 'number') {
99+
usage.cache_read_input_tokens = usageSource.cache_read_input_tokens;
100+
hasField = true;
101+
}
102+
// OpenAI/Copilot fields
103+
if (typeof usageSource.prompt_tokens === 'number') {
104+
usage.prompt_tokens = usageSource.prompt_tokens;
105+
hasField = true;
106+
}
107+
if (typeof usageSource.completion_tokens === 'number') {
108+
usage.completion_tokens = usageSource.completion_tokens;
109+
hasField = true;
110+
}
111+
if (typeof usageSource.total_tokens === 'number') {
112+
usage.total_tokens = usageSource.total_tokens;
113+
hasField = true;
114+
}
115+
const reasoningTokens = extractReasoningTokens(usageSource);
116+
if (typeof reasoningTokens === 'number') {
117+
usage.reasoning_tokens = reasoningTokens;
118+
hasField = true;
119+
}
120+
// OpenAI/Copilot nested cache fields (prompt_tokens_details.cached_tokens)
121+
if (usageSource.prompt_tokens_details && typeof usageSource.prompt_tokens_details.cached_tokens === 'number') {
122+
usage.cache_read_input_tokens = usageSource.prompt_tokens_details.cached_tokens;
123+
hasField = true;
124+
}
125+
if (hasField) {
126+
result.usage = usage;
127+
}
128+
}
129+
130+
return result;
131+
} catch {
132+
return { usage: null, model: null };
133+
}
134+
}
135+
136+
/**
137+
* Extract token usage from a single SSE data line.
138+
*
139+
* SSE format: "data: {json}\n\n"
140+
*
141+
* Anthropic streaming events with usage:
142+
* - message_start: { type: "message_start", message: { usage: { input_tokens, cache_creation_input_tokens, cache_read_input_tokens } } }
143+
* - message_delta: { type: "message_delta", usage: { output_tokens } }
144+
*
145+
* OpenAI/Copilot streaming events with usage:
146+
* - Final chunk: { usage: { prompt_tokens, completion_tokens, total_tokens, prompt_tokens_details: { cached_tokens } } }
147+
*
148+
* @param {string} line - A single SSE data line (without "data: " prefix)
149+
* @returns {{ usage: object|null, model: string|null }}
150+
*/
151+
function extractUsageFromSseLine(line) {
152+
if (!line || line === '[DONE]') return { usage: null, model: null };
153+
154+
try {
155+
const json = JSON.parse(line);
156+
const result = { usage: null, model: json.model || null };
157+
158+
// Anthropic message_start: usage is inside message object
159+
if (json.type === 'message_start' && json.message && json.message.usage) {
160+
result.usage = {};
161+
const u = json.message.usage;
162+
if (typeof u.input_tokens === 'number') result.usage.input_tokens = u.input_tokens;
163+
if (typeof u.cache_creation_input_tokens === 'number') result.usage.cache_creation_input_tokens = u.cache_creation_input_tokens;
164+
if (typeof u.cache_read_input_tokens === 'number') result.usage.cache_read_input_tokens = u.cache_read_input_tokens;
165+
result.model = (json.message && json.message.model) || result.model;
166+
return result;
167+
}
168+
169+
// Anthropic message_delta: usage at top level
170+
if (json.type === 'message_delta' && json.usage) {
171+
result.usage = {};
172+
if (typeof json.usage.output_tokens === 'number') result.usage.output_tokens = json.usage.output_tokens;
173+
return result;
174+
}
175+
176+
// OpenAI Responses API: usage in response object on completion events
177+
if ((json.type === 'response.completed' || json.type === 'response.done')
178+
&& json.response && json.response.usage && typeof json.response.usage === 'object') {
179+
const u = json.response.usage;
180+
result.usage = {};
181+
if (typeof u.input_tokens === 'number') result.usage.input_tokens = u.input_tokens;
182+
if (typeof u.output_tokens === 'number') result.usage.output_tokens = u.output_tokens;
183+
if (typeof u.total_tokens === 'number') result.usage.total_tokens = u.total_tokens;
184+
const reasoningTokens = extractReasoningTokens(u);
185+
if (typeof reasoningTokens === 'number') result.usage.reasoning_tokens = reasoningTokens;
186+
if (u.prompt_tokens_details && typeof u.prompt_tokens_details.cached_tokens === 'number') {
187+
result.usage.cache_read_input_tokens = u.prompt_tokens_details.cached_tokens;
188+
}
189+
result.model = json.response.model || result.model;
190+
return result;
191+
}
192+
193+
// OpenAI/Copilot: usage at top level in final chunk
194+
if (json.usage && typeof json.usage === 'object') {
195+
result.usage = {};
196+
if (typeof json.usage.prompt_tokens === 'number') result.usage.prompt_tokens = json.usage.prompt_tokens;
197+
if (typeof json.usage.completion_tokens === 'number') result.usage.completion_tokens = json.usage.completion_tokens;
198+
if (typeof json.usage.total_tokens === 'number') result.usage.total_tokens = json.usage.total_tokens;
199+
const reasoningTokens = extractReasoningTokens(json.usage);
200+
if (typeof reasoningTokens === 'number') {
201+
result.usage.reasoning_tokens = reasoningTokens;
202+
}
203+
// OpenAI/Copilot nested cache fields (prompt_tokens_details.cached_tokens)
204+
if (json.usage.prompt_tokens_details && typeof json.usage.prompt_tokens_details.cached_tokens === 'number') {
205+
result.usage.cache_read_input_tokens = json.usage.prompt_tokens_details.cached_tokens;
206+
}
207+
return result;
208+
}
209+
210+
return result;
211+
} catch {
212+
return { usage: null, model: null };
213+
}
214+
}
215+
216+
/**
217+
* Extract all SSE data lines from a text chunk.
218+
* Lines are prefixed with "data: " in the SSE protocol.
219+
*/
220+
function parseSseDataLines(text) {
221+
const lines = [];
222+
const parts = text.split('\n');
223+
for (const part of parts) {
224+
const trimmed = part.trim();
225+
if (trimmed.startsWith('data: ')) {
226+
lines.push(trimmed.slice(6));
227+
} else if (trimmed === 'data:') {
228+
// empty data line
229+
}
230+
}
231+
return lines;
232+
}
233+
234+
/**
235+
* Normalize extracted usage into a unified format.
236+
*
237+
* Output fields:
238+
* - input_tokens: number (from Anthropic input_tokens or OpenAI prompt_tokens)
239+
* - output_tokens: number (from Anthropic output_tokens or OpenAI completion_tokens)
240+
* - cache_read_tokens: number (from Anthropic cache_read_input_tokens or OpenAI prompt_tokens_details.cached_tokens)
241+
* - cache_write_tokens: number (Anthropic cache_creation_input_tokens; not available in OpenAI format)
242+
*/
243+
function normalizeUsage(usage) {
244+
if (!usage) return null;
245+
246+
return {
247+
input_tokens: usage.input_tokens ?? usage.prompt_tokens ?? 0,
248+
output_tokens: usage.output_tokens ?? usage.completion_tokens ?? 0,
249+
cache_read_tokens: usage.cache_read_input_tokens ?? 0,
250+
cache_write_tokens: usage.cache_creation_input_tokens ?? 0,
251+
reasoning_tokens: usage.reasoning_tokens ?? 0,
252+
};
253+
}
254+
255+
module.exports = {
256+
isStreamingResponse,
257+
isCompressedResponse,
258+
createDecompressor,
259+
extractReasoningTokens,
260+
extractUsageFromJson,
261+
extractUsageFromSseLine,
262+
parseSseDataLines,
263+
normalizeUsage,
264+
};

0 commit comments

Comments
 (0)