Skip to content

Commit 88a4d08

Browse files
author
Ashvin Nihalani
committed
Clamp Claude token usage to reported context window
1 parent 934037c commit 88a4d08

File tree

2 files changed

+160
-7
lines changed

2 files changed

+160
-7
lines changed

apps/server/src/provider/Layers/ClaudeAdapter.test.ts

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,150 @@ describe("ClaudeAdapterLive", () => {
14081408
);
14091409
});
14101410

1411+
it.effect("clamps oversized Claude usage to the reported context window", () => {
1412+
const harness = makeHarness();
1413+
return Effect.gen(function* () {
1414+
const adapter = yield* ClaudeAdapter;
1415+
1416+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 7).pipe(
1417+
Stream.runCollect,
1418+
Effect.forkChild,
1419+
);
1420+
1421+
yield* adapter.startSession({
1422+
threadId: THREAD_ID,
1423+
provider: "claudeAgent",
1424+
runtimeMode: "full-access",
1425+
});
1426+
1427+
yield* adapter.sendTurn({
1428+
threadId: THREAD_ID,
1429+
input: "hello",
1430+
attachments: [],
1431+
});
1432+
1433+
harness.query.emit({
1434+
type: "result",
1435+
subtype: "success",
1436+
is_error: false,
1437+
duration_ms: 1234,
1438+
duration_api_ms: 1200,
1439+
num_turns: 1,
1440+
result: "done",
1441+
stop_reason: "end_turn",
1442+
session_id: "sdk-session-result-usage-clamped",
1443+
usage: {
1444+
total_tokens: 535000,
1445+
},
1446+
modelUsage: {
1447+
"claude-opus-4-6": {
1448+
contextWindow: 200000,
1449+
maxOutputTokens: 64000,
1450+
},
1451+
},
1452+
} as unknown as SDKMessage);
1453+
harness.query.finish();
1454+
1455+
const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
1456+
const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated");
1457+
assert.equal(usageEvent?.type, "thread.token-usage.updated");
1458+
if (usageEvent?.type === "thread.token-usage.updated") {
1459+
assert.deepEqual(usageEvent.payload, {
1460+
usage: {
1461+
usedTokens: 200000,
1462+
lastUsedTokens: 200000,
1463+
totalProcessedTokens: 535000,
1464+
maxTokens: 200000,
1465+
},
1466+
});
1467+
}
1468+
}).pipe(
1469+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
1470+
Effect.provide(harness.layer),
1471+
);
1472+
});
1473+
1474+
it.effect(
1475+
"preserves oversized Claude result totals after task progress snapshots are recorded",
1476+
() => {
1477+
const harness = makeHarness();
1478+
return Effect.gen(function* () {
1479+
const adapter = yield* ClaudeAdapter;
1480+
1481+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 9).pipe(
1482+
Stream.runCollect,
1483+
Effect.forkChild,
1484+
);
1485+
1486+
yield* adapter.startSession({
1487+
threadId: THREAD_ID,
1488+
provider: "claudeAgent",
1489+
runtimeMode: "full-access",
1490+
});
1491+
1492+
yield* adapter.sendTurn({
1493+
threadId: THREAD_ID,
1494+
input: "hello",
1495+
attachments: [],
1496+
});
1497+
1498+
harness.query.emit({
1499+
type: "system",
1500+
subtype: "task_progress",
1501+
task_id: "task-usage-clamped",
1502+
description: "Thinking through the patch",
1503+
usage: {
1504+
total_tokens: 190000,
1505+
},
1506+
session_id: "sdk-session-task-usage-clamped",
1507+
uuid: "task-usage-progress-clamped",
1508+
} as unknown as SDKMessage);
1509+
1510+
harness.query.emit({
1511+
type: "result",
1512+
subtype: "success",
1513+
is_error: false,
1514+
duration_ms: 1234,
1515+
duration_api_ms: 1200,
1516+
num_turns: 1,
1517+
result: "done",
1518+
stop_reason: "end_turn",
1519+
session_id: "sdk-session-result-usage-clamped-after-progress",
1520+
usage: {
1521+
total_tokens: 535000,
1522+
},
1523+
modelUsage: {
1524+
"claude-opus-4-6": {
1525+
contextWindow: 200000,
1526+
maxOutputTokens: 64000,
1527+
},
1528+
},
1529+
} as unknown as SDKMessage);
1530+
harness.query.finish();
1531+
1532+
const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
1533+
const usageEvents = runtimeEvents.filter(
1534+
(event) => event.type === "thread.token-usage.updated",
1535+
);
1536+
const finalUsageEvent = usageEvents.at(-1);
1537+
assert.equal(finalUsageEvent?.type, "thread.token-usage.updated");
1538+
if (finalUsageEvent?.type === "thread.token-usage.updated") {
1539+
assert.deepEqual(finalUsageEvent.payload, {
1540+
usage: {
1541+
usedTokens: 190000,
1542+
lastUsedTokens: 190000,
1543+
totalProcessedTokens: 535000,
1544+
maxTokens: 200000,
1545+
},
1546+
});
1547+
}
1548+
}).pipe(
1549+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
1550+
Effect.provide(harness.layer),
1551+
);
1552+
},
1553+
);
1554+
14111555
it.effect(
14121556
"emits completion only after turn result when assistant frames arrive before deltas",
14131557
() => {

apps/server/src/provider/Layers/ClaudeAdapter.ts

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -326,19 +326,24 @@ function normalizeClaudeTokenUsage(
326326
? record.output_tokens
327327
: 0;
328328
const derivedUsedTokens = inputTokens + outputTokens;
329-
const usedTokens = directUsedTokens ?? (derivedUsedTokens > 0 ? derivedUsedTokens : undefined);
330-
if (usedTokens === undefined || usedTokens <= 0) {
329+
const rawUsedTokens = directUsedTokens ?? (derivedUsedTokens > 0 ? derivedUsedTokens : undefined);
330+
if (rawUsedTokens === undefined || rawUsedTokens <= 0) {
331331
return undefined;
332332
}
333333

334+
const hasContextWindow =
335+
typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0;
336+
const usedTokens = hasContextWindow ? Math.min(rawUsedTokens, contextWindow) : rawUsedTokens;
337+
const totalProcessedTokens =
338+
hasContextWindow && rawUsedTokens > contextWindow ? rawUsedTokens : undefined;
339+
334340
return {
335341
usedTokens,
336342
lastUsedTokens: usedTokens,
343+
...(totalProcessedTokens !== undefined ? { totalProcessedTokens } : {}),
337344
...(inputTokens > 0 ? { inputTokens } : {}),
338345
...(outputTokens > 0 ? { outputTokens } : {}),
339-
...(typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
340-
? { maxTokens: contextWindow }
341-
: {}),
346+
...(hasContextWindow ? { maxTokens: contextWindow } : {}),
342347
...(typeof record.tool_uses === "number" && Number.isFinite(record.tool_uses)
343348
? { toolUses: record.tool_uses }
344349
: {}),
@@ -1344,6 +1349,8 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
13441349
resultUsage,
13451350
resultContextWindow ?? context.lastKnownContextWindow,
13461351
);
1352+
const accumulatedTotalProcessedTokens =
1353+
accumulatedSnapshot?.totalProcessedTokens ?? accumulatedSnapshot?.usedTokens;
13471354
const lastGoodUsage = context.lastKnownTokenUsage;
13481355
const maxTokens = resultContextWindow ?? context.lastKnownContextWindow;
13491356
const usageSnapshot: ThreadTokenUsageSnapshot | undefined = lastGoodUsage
@@ -1352,8 +1359,10 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
13521359
...(typeof maxTokens === "number" && Number.isFinite(maxTokens) && maxTokens > 0
13531360
? { maxTokens }
13541361
: {}),
1355-
...(accumulatedSnapshot && accumulatedSnapshot.usedTokens > lastGoodUsage.usedTokens
1356-
? { totalProcessedTokens: accumulatedSnapshot.usedTokens }
1362+
...(typeof accumulatedTotalProcessedTokens === "number" &&
1363+
Number.isFinite(accumulatedTotalProcessedTokens) &&
1364+
accumulatedTotalProcessedTokens > lastGoodUsage.usedTokens
1365+
? { totalProcessedTokens: accumulatedTotalProcessedTokens }
13571366
: {}),
13581367
}
13591368
: accumulatedSnapshot;

0 commit comments

Comments
 (0)