Skip to content

Commit 7a00846

Browse files
ashvinnihalanijuliusmarmingeclaude
authored
fix: Align token usage metrics for both Claude and Codex (#1943)
Co-authored-by: Julius Marminge <julius0216@outlook.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f59ee36 commit 7a00846

File tree

2 files changed

+191
-48
lines changed

2 files changed

+191
-48
lines changed

apps/server/src/provider/Layers/ClaudeAdapter.test.ts

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,150 @@ describe("ClaudeAdapterLive", () => {
14081408
);
14091409
});
14101410

1411+
it.effect("clamps oversized Claude usage to the reported context window", () => {
1412+
const harness = makeHarness();
1413+
return Effect.gen(function* () {
1414+
const adapter = yield* ClaudeAdapter;
1415+
1416+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 7).pipe(
1417+
Stream.runCollect,
1418+
Effect.forkChild,
1419+
);
1420+
1421+
yield* adapter.startSession({
1422+
threadId: THREAD_ID,
1423+
provider: "claudeAgent",
1424+
runtimeMode: "full-access",
1425+
});
1426+
1427+
yield* adapter.sendTurn({
1428+
threadId: THREAD_ID,
1429+
input: "hello",
1430+
attachments: [],
1431+
});
1432+
1433+
harness.query.emit({
1434+
type: "result",
1435+
subtype: "success",
1436+
is_error: false,
1437+
duration_ms: 1234,
1438+
duration_api_ms: 1200,
1439+
num_turns: 1,
1440+
result: "done",
1441+
stop_reason: "end_turn",
1442+
session_id: "sdk-session-result-usage-clamped",
1443+
usage: {
1444+
total_tokens: 535000,
1445+
},
1446+
modelUsage: {
1447+
"claude-opus-4-6": {
1448+
contextWindow: 200000,
1449+
maxOutputTokens: 64000,
1450+
},
1451+
},
1452+
} as unknown as SDKMessage);
1453+
harness.query.finish();
1454+
1455+
const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
1456+
const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated");
1457+
assert.equal(usageEvent?.type, "thread.token-usage.updated");
1458+
if (usageEvent?.type === "thread.token-usage.updated") {
1459+
assert.deepEqual(usageEvent.payload, {
1460+
usage: {
1461+
usedTokens: 200000,
1462+
lastUsedTokens: 200000,
1463+
totalProcessedTokens: 535000,
1464+
maxTokens: 200000,
1465+
},
1466+
});
1467+
}
1468+
}).pipe(
1469+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
1470+
Effect.provide(harness.layer),
1471+
);
1472+
});
1473+
1474+
it.effect(
1475+
"preserves oversized Claude result totals after task progress snapshots are recorded",
1476+
() => {
1477+
const harness = makeHarness();
1478+
return Effect.gen(function* () {
1479+
const adapter = yield* ClaudeAdapter;
1480+
1481+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 9).pipe(
1482+
Stream.runCollect,
1483+
Effect.forkChild,
1484+
);
1485+
1486+
yield* adapter.startSession({
1487+
threadId: THREAD_ID,
1488+
provider: "claudeAgent",
1489+
runtimeMode: "full-access",
1490+
});
1491+
1492+
yield* adapter.sendTurn({
1493+
threadId: THREAD_ID,
1494+
input: "hello",
1495+
attachments: [],
1496+
});
1497+
1498+
harness.query.emit({
1499+
type: "system",
1500+
subtype: "task_progress",
1501+
task_id: "task-usage-clamped",
1502+
description: "Thinking through the patch",
1503+
usage: {
1504+
total_tokens: 190000,
1505+
},
1506+
session_id: "sdk-session-task-usage-clamped",
1507+
uuid: "task-usage-progress-clamped",
1508+
} as unknown as SDKMessage);
1509+
1510+
harness.query.emit({
1511+
type: "result",
1512+
subtype: "success",
1513+
is_error: false,
1514+
duration_ms: 1234,
1515+
duration_api_ms: 1200,
1516+
num_turns: 1,
1517+
result: "done",
1518+
stop_reason: "end_turn",
1519+
session_id: "sdk-session-result-usage-clamped-after-progress",
1520+
usage: {
1521+
total_tokens: 535000,
1522+
},
1523+
modelUsage: {
1524+
"claude-opus-4-6": {
1525+
contextWindow: 200000,
1526+
maxOutputTokens: 64000,
1527+
},
1528+
},
1529+
} as unknown as SDKMessage);
1530+
harness.query.finish();
1531+
1532+
const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
1533+
const usageEvents = runtimeEvents.filter(
1534+
(event) => event.type === "thread.token-usage.updated",
1535+
);
1536+
const finalUsageEvent = usageEvents.at(-1);
1537+
assert.equal(finalUsageEvent?.type, "thread.token-usage.updated");
1538+
if (finalUsageEvent?.type === "thread.token-usage.updated") {
1539+
assert.deepEqual(finalUsageEvent.payload, {
1540+
usage: {
1541+
usedTokens: 190000,
1542+
lastUsedTokens: 190000,
1543+
totalProcessedTokens: 535000,
1544+
maxTokens: 200000,
1545+
},
1546+
});
1547+
}
1548+
}).pipe(
1549+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
1550+
Effect.provide(harness.layer),
1551+
);
1552+
},
1553+
);
1554+
14111555
it.effect(
14121556
"emits completion only after turn result when assistant frames arrive before deltas",
14131557
() => {

apps/server/src/provider/Layers/ClaudeAdapter.ts

Lines changed: 47 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import {
1717
type SDKResultMessage,
1818
type SettingSource,
1919
type SDKUserMessage,
20+
ModelUsage,
21+
NonNullableUsage,
2022
} from "@anthropic-ai/claude-agent-sdk";
2123
import {
2224
ApprovalRequestId,
@@ -272,78 +274,73 @@ function asRuntimeItemId(value: string): RuntimeItemId {
272274
return RuntimeItemId.make(value);
273275
}
274276

275-
function maxClaudeContextWindowFromModelUsage(modelUsage: unknown): number | undefined {
276-
if (!modelUsage || typeof modelUsage !== "object") {
277-
return undefined;
278-
}
277+
function maxClaudeContextWindowFromModelUsage(
278+
modelUsage: Record<string, ModelUsage> | undefined,
279+
): number | undefined {
280+
if (!modelUsage) return undefined;
279281

280282
let maxContextWindow: number | undefined;
281-
for (const value of Object.values(modelUsage as Record<string, unknown>)) {
282-
if (!value || typeof value !== "object") {
283-
continue;
284-
}
285-
const contextWindow = (value as { contextWindow?: unknown }).contextWindow;
286-
if (
287-
typeof contextWindow !== "number" ||
288-
!Number.isFinite(contextWindow) ||
289-
contextWindow <= 0
290-
) {
291-
continue;
292-
}
283+
for (const value of Object.values(modelUsage)) {
284+
const contextWindow = value.contextWindow;
293285
maxContextWindow = Math.max(maxContextWindow ?? 0, contextWindow);
294286
}
295287

296288
return maxContextWindow;
297289
}
298290

299291
function normalizeClaudeTokenUsage(
300-
usage: unknown,
292+
value: NonNullableUsage | undefined,
301293
contextWindow?: number,
302294
): ThreadTokenUsageSnapshot | undefined {
303-
if (!usage || typeof usage !== "object") {
295+
if (!value || typeof value !== "object") {
304296
return undefined;
305297
}
306298

307-
const record = usage as Record<string, unknown>;
308-
const directUsedTokens =
309-
typeof record.total_tokens === "number" && Number.isFinite(record.total_tokens)
310-
? record.total_tokens
311-
: undefined;
299+
const usage = value as Record<string, unknown>;
312300
const inputTokens =
313-
(typeof record.input_tokens === "number" && Number.isFinite(record.input_tokens)
314-
? record.input_tokens
301+
(typeof usage.input_tokens === "number" && Number.isFinite(usage.input_tokens)
302+
? usage.input_tokens
315303
: 0) +
316-
(typeof record.cache_creation_input_tokens === "number" &&
317-
Number.isFinite(record.cache_creation_input_tokens)
318-
? record.cache_creation_input_tokens
304+
(typeof usage.cache_creation_input_tokens === "number" &&
305+
Number.isFinite(usage.cache_creation_input_tokens)
306+
? usage.cache_creation_input_tokens
319307
: 0) +
320-
(typeof record.cache_read_input_tokens === "number" &&
321-
Number.isFinite(record.cache_read_input_tokens)
322-
? record.cache_read_input_tokens
308+
(typeof usage.cache_read_input_tokens === "number" &&
309+
Number.isFinite(usage.cache_read_input_tokens)
310+
? usage.cache_read_input_tokens
323311
: 0);
324312
const outputTokens =
325-
typeof record.output_tokens === "number" && Number.isFinite(record.output_tokens)
326-
? record.output_tokens
313+
typeof usage.output_tokens === "number" && Number.isFinite(usage.output_tokens)
314+
? usage.output_tokens
327315
: 0;
328-
const derivedUsedTokens = inputTokens + outputTokens;
329-
const usedTokens = directUsedTokens ?? (derivedUsedTokens > 0 ? derivedUsedTokens : undefined);
330-
if (usedTokens === undefined || usedTokens <= 0) {
316+
const derivedTotalProcessedTokens = inputTokens + outputTokens;
317+
const totalProcessedTokens =
318+
(typeof usage.total_tokens === "number" && Number.isFinite(usage.total_tokens)
319+
? usage.total_tokens
320+
: undefined) ?? (derivedTotalProcessedTokens > 0 ? derivedTotalProcessedTokens : undefined);
321+
if (totalProcessedTokens === undefined || totalProcessedTokens <= 0) {
331322
return undefined;
332323
}
333324

325+
const maxTokens =
326+
typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
327+
? contextWindow
328+
: undefined;
329+
const usedTokens =
330+
maxTokens !== undefined ? Math.min(totalProcessedTokens, maxTokens) : totalProcessedTokens;
331+
334332
return {
335333
usedTokens,
336334
lastUsedTokens: usedTokens,
335+
...(totalProcessedTokens > usedTokens ? { totalProcessedTokens } : {}),
337336
...(inputTokens > 0 ? { inputTokens } : {}),
338337
...(outputTokens > 0 ? { outputTokens } : {}),
339-
...(typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
340-
? { maxTokens: contextWindow }
341-
: {}),
342-
...(typeof record.tool_uses === "number" && Number.isFinite(record.tool_uses)
343-
? { toolUses: record.tool_uses }
338+
...(maxTokens !== undefined ? { maxTokens } : {}),
339+
...(typeof usage.tool_uses === "number" && Number.isFinite(usage.tool_uses)
340+
? { toolUses: usage.tool_uses }
344341
: {}),
345-
...(typeof record.duration_ms === "number" && Number.isFinite(record.duration_ms)
346-
? { durationMs: record.duration_ms }
342+
...(typeof usage.duration_ms === "number" && Number.isFinite(usage.duration_ms)
343+
? { durationMs: usage.duration_ms }
347344
: {}),
348345
};
349346
}
@@ -1328,8 +1325,6 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
13281325
errorMessage?: string,
13291326
result?: SDKResultMessage,
13301327
) {
1331-
const resultUsage =
1332-
result?.usage && typeof result.usage === "object" ? { ...result.usage } : undefined;
13331328
const resultContextWindow = maxClaudeContextWindowFromModelUsage(result?.modelUsage);
13341329
if (resultContextWindow !== undefined) {
13351330
context.lastKnownContextWindow = resultContextWindow;
@@ -1341,9 +1336,11 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
13411336
// Instead, use the last known context-window-accurate usage from task_progress
13421337
// events and treat the accumulated total as totalProcessedTokens.
13431338
const accumulatedSnapshot = normalizeClaudeTokenUsage(
1344-
resultUsage,
1339+
result?.usage,
13451340
resultContextWindow ?? context.lastKnownContextWindow,
13461341
);
1342+
const accumulatedTotalProcessedTokens =
1343+
accumulatedSnapshot?.totalProcessedTokens ?? accumulatedSnapshot?.usedTokens;
13471344
const lastGoodUsage = context.lastKnownTokenUsage;
13481345
const maxTokens = resultContextWindow ?? context.lastKnownContextWindow;
13491346
const usageSnapshot: ThreadTokenUsageSnapshot | undefined = lastGoodUsage
@@ -1352,8 +1349,10 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
13521349
...(typeof maxTokens === "number" && Number.isFinite(maxTokens) && maxTokens > 0
13531350
? { maxTokens }
13541351
: {}),
1355-
...(accumulatedSnapshot && accumulatedSnapshot.usedTokens > lastGoodUsage.usedTokens
1356-
? { totalProcessedTokens: accumulatedSnapshot.usedTokens }
1352+
...(typeof accumulatedTotalProcessedTokens === "number" &&
1353+
Number.isFinite(accumulatedTotalProcessedTokens) &&
1354+
accumulatedTotalProcessedTokens > lastGoodUsage.usedTokens
1355+
? { totalProcessedTokens: accumulatedTotalProcessedTokens }
13571356
: {}),
13581357
}
13591358
: accumulatedSnapshot;

0 commit comments

Comments
 (0)