Skip to content

Commit 75b5cea

Browse files
Align Claude token usage with Codex semantics
1 parent 934037c commit 75b5cea

File tree

2 files changed

+180
-25
lines changed

2 files changed

+180
-25
lines changed

apps/server/src/provider/Layers/ClaudeAdapter.test.ts

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,150 @@ describe("ClaudeAdapterLive", () => {
14081408
);
14091409
});
14101410

1411+
it.effect("clamps oversized Claude usage to the reported context window", () => {
1412+
const harness = makeHarness();
1413+
return Effect.gen(function* () {
1414+
const adapter = yield* ClaudeAdapter;
1415+
1416+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 7).pipe(
1417+
Stream.runCollect,
1418+
Effect.forkChild,
1419+
);
1420+
1421+
yield* adapter.startSession({
1422+
threadId: THREAD_ID,
1423+
provider: "claudeAgent",
1424+
runtimeMode: "full-access",
1425+
});
1426+
1427+
yield* adapter.sendTurn({
1428+
threadId: THREAD_ID,
1429+
input: "hello",
1430+
attachments: [],
1431+
});
1432+
1433+
harness.query.emit({
1434+
type: "result",
1435+
subtype: "success",
1436+
is_error: false,
1437+
duration_ms: 1234,
1438+
duration_api_ms: 1200,
1439+
num_turns: 1,
1440+
result: "done",
1441+
stop_reason: "end_turn",
1442+
session_id: "sdk-session-result-usage-clamped",
1443+
usage: {
1444+
total_tokens: 535000,
1445+
},
1446+
modelUsage: {
1447+
"claude-opus-4-6": {
1448+
contextWindow: 200000,
1449+
maxOutputTokens: 64000,
1450+
},
1451+
},
1452+
} as unknown as SDKMessage);
1453+
harness.query.finish();
1454+
1455+
const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
1456+
const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated");
1457+
assert.equal(usageEvent?.type, "thread.token-usage.updated");
1458+
if (usageEvent?.type === "thread.token-usage.updated") {
1459+
assert.deepEqual(usageEvent.payload, {
1460+
usage: {
1461+
usedTokens: 200000,
1462+
lastUsedTokens: 200000,
1463+
totalProcessedTokens: 535000,
1464+
maxTokens: 200000,
1465+
},
1466+
});
1467+
}
1468+
}).pipe(
1469+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
1470+
Effect.provide(harness.layer),
1471+
);
1472+
});
1473+
1474+
it.effect(
1475+
"preserves oversized Claude result totals after task progress snapshots are recorded",
1476+
() => {
1477+
const harness = makeHarness();
1478+
return Effect.gen(function* () {
1479+
const adapter = yield* ClaudeAdapter;
1480+
1481+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 9).pipe(
1482+
Stream.runCollect,
1483+
Effect.forkChild,
1484+
);
1485+
1486+
yield* adapter.startSession({
1487+
threadId: THREAD_ID,
1488+
provider: "claudeAgent",
1489+
runtimeMode: "full-access",
1490+
});
1491+
1492+
yield* adapter.sendTurn({
1493+
threadId: THREAD_ID,
1494+
input: "hello",
1495+
attachments: [],
1496+
});
1497+
1498+
harness.query.emit({
1499+
type: "system",
1500+
subtype: "task_progress",
1501+
task_id: "task-usage-clamped",
1502+
description: "Thinking through the patch",
1503+
usage: {
1504+
total_tokens: 190000,
1505+
},
1506+
session_id: "sdk-session-task-usage-clamped",
1507+
uuid: "task-usage-progress-clamped",
1508+
} as unknown as SDKMessage);
1509+
1510+
harness.query.emit({
1511+
type: "result",
1512+
subtype: "success",
1513+
is_error: false,
1514+
duration_ms: 1234,
1515+
duration_api_ms: 1200,
1516+
num_turns: 1,
1517+
result: "done",
1518+
stop_reason: "end_turn",
1519+
session_id: "sdk-session-result-usage-clamped-after-progress",
1520+
usage: {
1521+
total_tokens: 535000,
1522+
},
1523+
modelUsage: {
1524+
"claude-opus-4-6": {
1525+
contextWindow: 200000,
1526+
maxOutputTokens: 64000,
1527+
},
1528+
},
1529+
} as unknown as SDKMessage);
1530+
harness.query.finish();
1531+
1532+
const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
1533+
const usageEvents = runtimeEvents.filter(
1534+
(event) => event.type === "thread.token-usage.updated",
1535+
);
1536+
const finalUsageEvent = usageEvents.at(-1);
1537+
assert.equal(finalUsageEvent?.type, "thread.token-usage.updated");
1538+
if (finalUsageEvent?.type === "thread.token-usage.updated") {
1539+
assert.deepEqual(finalUsageEvent.payload, {
1540+
usage: {
1541+
usedTokens: 190000,
1542+
lastUsedTokens: 190000,
1543+
totalProcessedTokens: 535000,
1544+
maxTokens: 200000,
1545+
},
1546+
});
1547+
}
1548+
}).pipe(
1549+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
1550+
Effect.provide(harness.layer),
1551+
);
1552+
},
1553+
);
1554+
14111555
it.effect(
14121556
"emits completion only after turn result when assistant frames arrive before deltas",
14131557
() => {

apps/server/src/provider/Layers/ClaudeAdapter.ts

Lines changed: 36 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -304,46 +304,53 @@ function normalizeClaudeTokenUsage(
304304
return undefined;
305305
}
306306

307-
const record = usage as Record<string, unknown>;
308-
const directUsedTokens =
309-
typeof record.total_tokens === "number" && Number.isFinite(record.total_tokens)
310-
? record.total_tokens
307+
const usageRecord = usage as Record<string, unknown>;
308+
const directTotalProcessedTokens =
309+
typeof usageRecord.total_tokens === "number" && Number.isFinite(usageRecord.total_tokens)
310+
? usageRecord.total_tokens
311311
: undefined;
312312
const inputTokens =
313-
(typeof record.input_tokens === "number" && Number.isFinite(record.input_tokens)
314-
? record.input_tokens
313+
(typeof usageRecord.input_tokens === "number" && Number.isFinite(usageRecord.input_tokens)
314+
? usageRecord.input_tokens
315315
: 0) +
316-
(typeof record.cache_creation_input_tokens === "number" &&
317-
Number.isFinite(record.cache_creation_input_tokens)
318-
? record.cache_creation_input_tokens
316+
(typeof usageRecord.cache_creation_input_tokens === "number" &&
317+
Number.isFinite(usageRecord.cache_creation_input_tokens)
318+
? usageRecord.cache_creation_input_tokens
319319
: 0) +
320-
(typeof record.cache_read_input_tokens === "number" &&
321-
Number.isFinite(record.cache_read_input_tokens)
322-
? record.cache_read_input_tokens
320+
(typeof usageRecord.cache_read_input_tokens === "number" &&
321+
Number.isFinite(usageRecord.cache_read_input_tokens)
322+
? usageRecord.cache_read_input_tokens
323323
: 0);
324324
const outputTokens =
325-
typeof record.output_tokens === "number" && Number.isFinite(record.output_tokens)
326-
? record.output_tokens
325+
typeof usageRecord.output_tokens === "number" && Number.isFinite(usageRecord.output_tokens)
326+
? usageRecord.output_tokens
327327
: 0;
328328
const derivedUsedTokens = inputTokens + outputTokens;
329-
const usedTokens = directUsedTokens ?? (derivedUsedTokens > 0 ? derivedUsedTokens : undefined);
330-
if (usedTokens === undefined || usedTokens <= 0) {
329+
const totalProcessedTokens =
330+
directTotalProcessedTokens ?? (derivedUsedTokens > 0 ? derivedUsedTokens : undefined);
331+
if (totalProcessedTokens === undefined || totalProcessedTokens <= 0) {
331332
return undefined;
332333
}
333334

335+
const maxTokens =
336+
typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
337+
? contextWindow
338+
: undefined;
339+
const usedTokens =
340+
maxTokens !== undefined ? Math.min(totalProcessedTokens, maxTokens) : totalProcessedTokens;
341+
334342
return {
335343
usedTokens,
336344
lastUsedTokens: usedTokens,
345+
...(totalProcessedTokens > usedTokens ? { totalProcessedTokens } : {}),
337346
...(inputTokens > 0 ? { inputTokens } : {}),
338347
...(outputTokens > 0 ? { outputTokens } : {}),
339-
...(typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
340-
? { maxTokens: contextWindow }
341-
: {}),
342-
...(typeof record.tool_uses === "number" && Number.isFinite(record.tool_uses)
343-
? { toolUses: record.tool_uses }
348+
...(maxTokens !== undefined ? { maxTokens } : {}),
349+
...(typeof usageRecord.tool_uses === "number" && Number.isFinite(usageRecord.tool_uses)
350+
? { toolUses: usageRecord.tool_uses }
344351
: {}),
345-
...(typeof record.duration_ms === "number" && Number.isFinite(record.duration_ms)
346-
? { durationMs: record.duration_ms }
352+
...(typeof usageRecord.duration_ms === "number" && Number.isFinite(usageRecord.duration_ms)
353+
? { durationMs: usageRecord.duration_ms }
347354
: {}),
348355
};
349356
}
@@ -1344,6 +1351,8 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
13441351
resultUsage,
13451352
resultContextWindow ?? context.lastKnownContextWindow,
13461353
);
1354+
const accumulatedTotalProcessedTokens =
1355+
accumulatedSnapshot?.totalProcessedTokens ?? accumulatedSnapshot?.usedTokens;
13471356
const lastGoodUsage = context.lastKnownTokenUsage;
13481357
const maxTokens = resultContextWindow ?? context.lastKnownContextWindow;
13491358
const usageSnapshot: ThreadTokenUsageSnapshot | undefined = lastGoodUsage
@@ -1352,8 +1361,10 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
13521361
...(typeof maxTokens === "number" && Number.isFinite(maxTokens) && maxTokens > 0
13531362
? { maxTokens }
13541363
: {}),
1355-
...(accumulatedSnapshot && accumulatedSnapshot.usedTokens > lastGoodUsage.usedTokens
1356-
? { totalProcessedTokens: accumulatedSnapshot.usedTokens }
1364+
...(typeof accumulatedTotalProcessedTokens === "number" &&
1365+
Number.isFinite(accumulatedTotalProcessedTokens) &&
1366+
accumulatedTotalProcessedTokens > lastGoodUsage.usedTokens
1367+
? { totalProcessedTokens: accumulatedTotalProcessedTokens }
13571368
: {}),
13581369
}
13591370
: accumulatedSnapshot;

0 commit comments

Comments
 (0)