diff --git a/src/lib/paths.ts b/src/lib/paths.ts index 8d0a9f02b..10af945f0 100644 --- a/src/lib/paths.ts +++ b/src/lib/paths.ts @@ -5,10 +5,12 @@ import path from "node:path" const APP_DIR = path.join(os.homedir(), ".local", "share", "copilot-api") const GITHUB_TOKEN_PATH = path.join(APP_DIR, "github_token") +const TOKEN_USAGE_DB_PATH = path.join(APP_DIR, "token-usage.db") export const PATHS = { APP_DIR, GITHUB_TOKEN_PATH, + TOKEN_USAGE_DB_PATH, } export async function ensurePaths(): Promise { diff --git a/src/lib/token-store.ts b/src/lib/token-store.ts new file mode 100644 index 000000000..b4d50df3b --- /dev/null +++ b/src/lib/token-store.ts @@ -0,0 +1,130 @@ +import { Database } from "bun:sqlite" + +import { PATHS } from "./paths" + +interface TokenUsageRow { + timestamp_min: number + model: string + input_tokens: number + output_tokens: number + request_count: number +} + +let db: Database | null = null + +const getDb = (): Database => { + if (!db) + throw new Error("Token store not initialized. Call initTokenStore() first.") + return db +} + +export const initTokenStore = (): void => { + db = new Database(PATHS.TOKEN_USAGE_DB_PATH) + + db.run(` + CREATE TABLE IF NOT EXISTS token_usage ( + timestamp_min INTEGER NOT NULL, + model TEXT NOT NULL, + input_tokens INTEGER NOT NULL DEFAULT 0, + output_tokens INTEGER NOT NULL DEFAULT 0, + request_count INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (timestamp_min, model) + ) + `) + + db.run(` + CREATE INDEX IF NOT EXISTS idx_token_usage_timestamp + ON token_usage (timestamp_min) + `) +} + +const currentMinuteBucket = (): number => + Math.floor(Date.now() / 1000 / 60) * 60 + +const pruneOldData = (): void => { + const thirtyDaysAgo = Math.floor(Date.now() / 1000) - 30 * 24 * 60 * 60 + getDb().run("DELETE FROM token_usage WHERE timestamp_min < ?", [ + thirtyDaysAgo, + ]) +} + +export const recordTokenUsage = ( + model: string, + inputTokens: number, + outputTokens: number, +): void => { + try { + const bucket = currentMinuteBucket() + + getDb().run( + `INSERT INTO token_usage (timestamp_min, model, input_tokens, output_tokens, request_count) + VALUES (?, ?, ?, ?, 1) + ON CONFLICT (timestamp_min, model) DO UPDATE SET + input_tokens = input_tokens + excluded.input_tokens, + output_tokens = output_tokens + excluded.output_tokens, + request_count = request_count + 1`, + [bucket, model, inputTokens, outputTokens], + ) + + pruneOldData() + } catch (error) { + // Never let storage errors surface to callers + console.error("[token-store] Failed to record token usage:", error) + } +} + +export interface TokenUsageSummary { + total_input: number + total_output: number + total_requests: number + models: Array +} + +export interface TokenUsageResponse { + range: string + data: Array + summary: TokenUsageSummary +} + +const RANGE_LABELS: Record = { + "3600": "1h", + "21600": "6h", + "86400": "24h", + "604800": "7d", + "2592000": "30d", +} + +export const getTokenUsageData = (rangeSeconds: number): TokenUsageResponse => { + const since = Math.floor(Date.now() / 1000) - rangeSeconds + + const rows = getDb() + .query( + `SELECT timestamp_min, model, input_tokens, output_tokens, request_count + FROM token_usage + WHERE timestamp_min >= ? + ORDER BY timestamp_min ASC`, + ) + .all(since) + + const summary: TokenUsageSummary = { + total_input: 0, + total_output: 0, + total_requests: 0, + models: [], + } + + const modelSet = new Set() + for (const row of rows) { + summary.total_input += row.input_tokens + summary.total_output += row.output_tokens + summary.total_requests += row.request_count + modelSet.add(row.model) + } + summary.models = [...modelSet].sort() + + return { + range: RANGE_LABELS[String(rangeSeconds)] ?? `${rangeSeconds}s`, + data: rows, + summary, + } +} diff --git a/src/routes/chat-completions/handler.ts b/src/routes/chat-completions/handler.ts index 04a5ae9ed..dd829bff3 100644 --- a/src/routes/chat-completions/handler.ts +++ b/src/routes/chat-completions/handler.ts @@ -6,39 +6,59 @@ import { streamSSE, type SSEMessage } from "hono/streaming" import { awaitApproval } from "~/lib/approval" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" +import { recordTokenUsage } from "~/lib/token-store" import { getTokenCount } from "~/lib/tokenizer" import { isNullish } from "~/lib/utils" import { createChatCompletions, + type ChatCompletionChunk, type ChatCompletionResponse, type ChatCompletionsPayload, } from "~/services/copilot/create-chat-completions" +type TokenEstimate = { input: number; output: number } | null + +const estimateTokens = async ( + payload: ChatCompletionsPayload, +): Promise => { + const model = state.models?.data.find((m) => m.id === payload.model) + if (!model) { + consola.warn("No model selected, skipping token count calculation") + return null + } + try { + const count = await getTokenCount(payload, model) + consola.info("Current token count:", count) + return count + } catch (error) { + consola.warn("Failed to calculate token count:", error) + return null + } +} + +const parseChunkUsage = (chunk: SSEMessage): ChatCompletionChunk["usage"] => { + if (!chunk.data || chunk.data === "[DONE]") return undefined + if (typeof chunk.data !== "string") return undefined + try { + const parsed = JSON.parse(chunk.data) as ChatCompletionChunk + return parsed.usage + } catch { + return undefined + } +} + export async function handleCompletion(c: Context) { await checkRateLimit(state) let payload = await c.req.json() consola.debug("Request payload:", JSON.stringify(payload).slice(-400)) - // Find the selected model - const selectedModel = state.models?.data.find( - (model) => model.id === payload.model, - ) - - // Calculate and display token count - try { - if (selectedModel) { - const tokenCount = await getTokenCount(payload, selectedModel) - consola.info("Current token count:", tokenCount) - } else { - consola.warn("No model selected, skipping token count calculation") - } - } catch (error) { - consola.warn("Failed to calculate token count:", error) - } + const estimated = await estimateTokens(payload) if (state.manualApprove) await awaitApproval() + const selectedModel = state.models?.data.find((m) => m.id === payload.model) + if (isNullish(payload.max_tokens)) { payload = { ...payload, @@ -47,19 +67,36 @@ export async function handleCompletion(c: Context) { consola.debug("Set max_tokens to:", JSON.stringify(payload.max_tokens)) } + if (payload.stream) { + payload = { ...payload, stream_options: { include_usage: true } } + } + const response = await createChatCompletions(payload) if (isNonStreaming(response)) { consola.debug("Non-streaming response:", JSON.stringify(response)) + const inputTokens = response.usage?.prompt_tokens ?? estimated?.input ?? 0 + const outputTokens = + response.usage?.completion_tokens ?? estimated?.output ?? 0 + recordTokenUsage(payload.model, inputTokens, outputTokens) return c.json(response) } consola.debug("Streaming response") return streamSSE(c, async (stream) => { + let lastUsage: ChatCompletionChunk["usage"] | undefined + for await (const chunk of response) { consola.debug("Streaming chunk:", JSON.stringify(chunk)) await stream.writeSSE(chunk as SSEMessage) + lastUsage = parseChunkUsage(chunk as SSEMessage) ?? lastUsage } + + recordTokenUsage( + payload.model, + lastUsage?.prompt_tokens ?? estimated?.input ?? 0, + lastUsage?.completion_tokens ?? estimated?.output ?? 0, + ) }) } diff --git a/src/routes/dashboard/budget.ts b/src/routes/dashboard/budget.ts new file mode 100644 index 000000000..8114dd489 --- /dev/null +++ b/src/routes/dashboard/budget.ts @@ -0,0 +1,37 @@ +export interface QuotaDetail { + entitlement: number + remaining: number + percent_remaining: number + unlimited: boolean + over_limit?: boolean +} + +/** Returns CSS hex color based on % used (100 - percent_remaining). */ +export function getBudgetColor(percentUsed: number): string { + if (percentUsed >= 95) return "#ef4444" + if (percentUsed >= 80) return "#f59e0b" + return "#22c55e" +} + +/** Computes percentage used (0–100) from a QuotaDetail. Returns 0 for unlimited. */ +export function getPercentUsed(quota: QuotaDetail): number { + if (quota.unlimited) return 0 + if (quota.entitlement === 0) return 0 + return Math.min( + 100, + ((quota.entitlement - quota.remaining) / quota.entitlement) * 100, + ) +} + +/** Returns a human-readable label for a quota key. */ +export function getQuotaLabel(key: string): string { + const labels: Record = { + premium_interactions: "Premium Interactions", + chat: "Chat", + completions: "Completions", + } + return ( + labels[key] + ?? key.replaceAll("_", " ").replaceAll(/\b\w/g, (c) => c.toUpperCase()) + ) +} diff --git a/src/routes/dashboard/page.ts b/src/routes/dashboard/page.ts new file mode 100644 index 000000000..61f6e284b --- /dev/null +++ b/src/routes/dashboard/page.ts @@ -0,0 +1,441 @@ +export const DASHBOARD_HTML = /* html */ ` + + + + + Token Usage — Copilot API + + + + + + + +
+
+ + +
+
+
+ + + +

Token Usage

+
+

Local request & response token tracking · 30-day history

+
+
+
+ + +
+
+ + + + + +
+
+
+ + + + + + +
+
+
Total Requests
+
+
+
+
+
+
Input Tokens
+ Input +
+
+
+
+
+
+
Output Tokens
+ Output +
+
+
+
+
+ + +
+
+
+
Token Activity
+
Input vs Output tokens over time
+
+
+ Input + Output +
+
+
+ +
+ +
+ + +
+
+
Model Breakdown
+
Per-model aggregated totals
+
+
+ + + + + + + + + + + + + + +
ModelRequestsInput TokensOutput TokensTotal TokensInput %
No data
+
+
+ +
+
+ + + +` diff --git a/src/routes/dashboard/route.ts b/src/routes/dashboard/route.ts new file mode 100644 index 000000000..b453a1183 --- /dev/null +++ b/src/routes/dashboard/route.ts @@ -0,0 +1,9 @@ +import { Hono } from "hono" + +import { DASHBOARD_HTML } from "./page" + +export const dashboardRoute = new Hono() + +dashboardRoute.get("/", (c) => { + return c.html(DASHBOARD_HTML) +}) diff --git a/src/routes/messages/handler.ts b/src/routes/messages/handler.ts index 85dbf6243..f2b13d73a 100644 --- a/src/routes/messages/handler.ts +++ b/src/routes/messages/handler.ts @@ -6,6 +6,8 @@ import { streamSSE } from "hono/streaming" import { awaitApproval } from "~/lib/approval" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" +import { recordTokenUsage } from "~/lib/token-store" +import { getTokenCount } from "~/lib/tokenizer" import { createChatCompletions, type ChatCompletionChunk, @@ -28,7 +30,7 @@ export async function handleCompletion(c: Context) { const anthropicPayload = await c.req.json() consola.debug("Anthropic request payload:", JSON.stringify(anthropicPayload)) - const openAIPayload = translateToOpenAI(anthropicPayload) + let openAIPayload = translateToOpenAI(anthropicPayload) consola.debug( "Translated OpenAI request payload:", JSON.stringify(openAIPayload), @@ -38,6 +40,27 @@ export async function handleCompletion(c: Context) { await awaitApproval() } + // Calculate estimated token count as fallback for streaming with no usage data + const selectedModel = state.models?.data.find( + (model) => model.id === openAIPayload.model, + ) + let estimatedTokenCount: { input: number; output: number } | null = null + try { + if (selectedModel) { + estimatedTokenCount = await getTokenCount(openAIPayload, selectedModel) + } + } catch { + // non-critical, ignore + } + + // Request usage data in streaming responses + if (openAIPayload.stream) { + openAIPayload = { + ...openAIPayload, + stream_options: { include_usage: true }, + } + } + const response = await createChatCompletions(openAIPayload) if (isNonStreaming(response)) { @@ -45,6 +68,13 @@ export async function handleCompletion(c: Context) { "Non-streaming response from Copilot:", JSON.stringify(response).slice(-400), ) + + const inputTokens = + response.usage?.prompt_tokens ?? estimatedTokenCount?.input ?? 0 + const outputTokens = + response.usage?.completion_tokens ?? estimatedTokenCount?.output ?? 0 + recordTokenUsage(openAIPayload.model, inputTokens, outputTokens) + const anthropicResponse = translateToAnthropic(response) consola.debug( "Translated Anthropic response:", @@ -62,6 +92,8 @@ export async function handleCompletion(c: Context) { toolCalls: {}, } + let lastUsage: ChatCompletionChunk["usage"] | undefined + for await (const rawEvent of response) { consola.debug("Copilot raw stream event:", JSON.stringify(rawEvent)) if (rawEvent.data === "[DONE]") { @@ -73,6 +105,8 @@ export async function handleCompletion(c: Context) { } const chunk = JSON.parse(rawEvent.data) as ChatCompletionChunk + if (chunk.usage) lastUsage = chunk.usage + const events = translateChunkToAnthropicEvents(chunk, streamState) for (const event of events) { @@ -83,6 +117,12 @@ export async function handleCompletion(c: Context) { }) } } + + recordTokenUsage( + openAIPayload.model, + lastUsage?.prompt_tokens ?? estimatedTokenCount?.input ?? 0, + lastUsage?.completion_tokens ?? estimatedTokenCount?.output ?? 0, + ) }) } diff --git a/src/routes/token-usage/route.ts b/src/routes/token-usage/route.ts new file mode 100644 index 000000000..b2f7f64e9 --- /dev/null +++ b/src/routes/token-usage/route.ts @@ -0,0 +1,21 @@ +import { Hono } from "hono" + +import { getTokenUsageData } from "~/lib/token-store" + +const RANGE_MAP: Record = { + "1h": 3600, + "6h": 21600, + "24h": 86400, + "7d": 604800, + "30d": 2592000, +} + +export const tokenUsageRoute = new Hono() + +tokenUsageRoute.get("/", (c) => { + const rangeParam = c.req.query("range") ?? "24h" + const rangeSeconds = RANGE_MAP[rangeParam] ?? RANGE_MAP["24h"] + + const result = getTokenUsageData(rangeSeconds) + return c.json(result) +}) diff --git a/src/server.ts b/src/server.ts index 462a278f3..8c807e0c3 100644 --- a/src/server.ts +++ b/src/server.ts @@ -3,9 +3,11 @@ import { cors } from "hono/cors" import { logger } from "hono/logger" import { completionRoutes } from "./routes/chat-completions/route" +import { dashboardRoute } from "./routes/dashboard/route" import { embeddingRoutes } from "./routes/embeddings/route" import { messageRoutes } from "./routes/messages/route" import { modelRoutes } from "./routes/models/route" +import { tokenUsageRoute } from "./routes/token-usage/route" import { tokenRoute } from "./routes/token/route" import { usageRoute } from "./routes/usage/route" @@ -21,6 +23,8 @@ server.route("/models", modelRoutes) server.route("/embeddings", embeddingRoutes) server.route("/usage", usageRoute) server.route("/token", tokenRoute) +server.route("/token-usage", tokenUsageRoute) +server.route("/dashboard", dashboardRoute) // Compatibility with tools that expect v1/ prefix server.route("/v1/chat/completions", completionRoutes) diff --git a/src/services/copilot/create-chat-completions.ts b/src/services/copilot/create-chat-completions.ts index 8534151da..78dfb8c87 100644 --- a/src/services/copilot/create-chat-completions.ts +++ b/src/services/copilot/create-chat-completions.ts @@ -133,6 +133,7 @@ export interface ChatCompletionsPayload { stop?: string | Array | null n?: number | null stream?: boolean | null + stream_options?: { include_usage: boolean } | null frequency_penalty?: number | null presence_penalty?: number | null diff --git a/src/start.ts b/src/start.ts index 14abbbdff..dcd51ddef 100644 --- a/src/start.ts +++ b/src/start.ts @@ -11,6 +11,7 @@ import { initProxyFromEnv } from "./lib/proxy" import { generateEnvScript } from "./lib/shell" import { state } from "./lib/state" import { setupCopilotToken, setupGitHubToken } from "./lib/token" +import { initTokenStore } from "./lib/token-store" import { cacheModels, cacheVSCodeVersion } from "./lib/utils" import { server } from "./server" @@ -48,6 +49,7 @@ export async function runServer(options: RunServerOptions): Promise { state.showToken = options.showToken await ensurePaths() + initTokenStore() await cacheVSCodeVersion() if (options.githubToken) { @@ -111,7 +113,8 @@ export async function runServer(options: RunServerOptions): Promise { } consola.box( - `🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage`, + `🌐 Usage Viewer: https://ericc-ch.github.io/copilot-api?endpoint=${serverUrl}/usage\n` + + `📊 Token Dashboard: ${serverUrl}/dashboard`, ) serve({ diff --git a/tests/dashboard-budget.test.ts b/tests/dashboard-budget.test.ts new file mode 100644 index 000000000..0a443b3ee --- /dev/null +++ b/tests/dashboard-budget.test.ts @@ -0,0 +1,101 @@ +import { describe, expect, test } from "bun:test" + +import { + getBudgetColor, + getPercentUsed, + getQuotaLabel, +} from "~/routes/dashboard/budget" + +describe("getBudgetColor", () => { + test("returns green below 80%", () => { + expect(getBudgetColor(0)).toBe("#22c55e") + expect(getBudgetColor(50)).toBe("#22c55e") + expect(getBudgetColor(79.9)).toBe("#22c55e") + }) + + test("returns yellow at 80%", () => { + expect(getBudgetColor(80)).toBe("#f59e0b") + expect(getBudgetColor(85)).toBe("#f59e0b") + expect(getBudgetColor(94.9)).toBe("#f59e0b") + }) + + test("returns red at 95%", () => { + expect(getBudgetColor(95)).toBe("#ef4444") + expect(getBudgetColor(99)).toBe("#ef4444") + expect(getBudgetColor(100)).toBe("#ef4444") + }) +}) + +describe("getPercentUsed", () => { + test("returns 0 for unlimited quota", () => { + expect( + getPercentUsed({ + entitlement: 1000, + remaining: 200, + percent_remaining: 20, + unlimited: true, + }), + ).toBe(0) + }) + + test("returns 0 for zero entitlement", () => { + expect( + getPercentUsed({ + entitlement: 0, + remaining: 0, + percent_remaining: 0, + unlimited: false, + }), + ).toBe(0) + }) + + test("calculates correct percent used", () => { + expect( + getPercentUsed({ + entitlement: 1000, + remaining: 800, + percent_remaining: 80, + unlimited: false, + }), + ).toBe(20) + expect( + getPercentUsed({ + entitlement: 100, + remaining: 5, + percent_remaining: 5, + unlimited: false, + }), + ).toBe(95) + expect( + getPercentUsed({ + entitlement: 100, + remaining: 0, + percent_remaining: 0, + unlimited: false, + }), + ).toBe(100) + }) + + test("caps at 100% when over limit", () => { + expect( + getPercentUsed({ + entitlement: 100, + remaining: -10, + percent_remaining: 0, + unlimited: false, + }), + ).toBe(100) + }) +}) + +describe("getQuotaLabel", () => { + test("returns friendly label for known keys", () => { + expect(getQuotaLabel("premium_interactions")).toBe("Premium Interactions") + expect(getQuotaLabel("chat")).toBe("Chat") + expect(getQuotaLabel("completions")).toBe("Completions") + }) + + test("capitalizes unknown keys", () => { + expect(getQuotaLabel("some_quota")).toBe("Some Quota") + }) +})