Skip to content

Commit e9cfd8e

Browse files
Align Claude token usage with Codex semantics
1 parent 934037c commit e9cfd8e

File tree

2 files changed

+182
-29
lines changed

2 files changed

+182
-29
lines changed

apps/server/src/provider/Layers/ClaudeAdapter.test.ts

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,6 +1408,150 @@ describe("ClaudeAdapterLive", () => {
14081408
);
14091409
});
14101410

1411+
it.effect("clamps oversized Claude usage to the reported context window", () => {
1412+
const harness = makeHarness();
1413+
return Effect.gen(function* () {
1414+
const adapter = yield* ClaudeAdapter;
1415+
1416+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 7).pipe(
1417+
Stream.runCollect,
1418+
Effect.forkChild,
1419+
);
1420+
1421+
yield* adapter.startSession({
1422+
threadId: THREAD_ID,
1423+
provider: "claudeAgent",
1424+
runtimeMode: "full-access",
1425+
});
1426+
1427+
yield* adapter.sendTurn({
1428+
threadId: THREAD_ID,
1429+
input: "hello",
1430+
attachments: [],
1431+
});
1432+
1433+
harness.query.emit({
1434+
type: "result",
1435+
subtype: "success",
1436+
is_error: false,
1437+
duration_ms: 1234,
1438+
duration_api_ms: 1200,
1439+
num_turns: 1,
1440+
result: "done",
1441+
stop_reason: "end_turn",
1442+
session_id: "sdk-session-result-usage-clamped",
1443+
usage: {
1444+
total_tokens: 535000,
1445+
},
1446+
modelUsage: {
1447+
"claude-opus-4-6": {
1448+
contextWindow: 200000,
1449+
maxOutputTokens: 64000,
1450+
},
1451+
},
1452+
} as unknown as SDKMessage);
1453+
harness.query.finish();
1454+
1455+
const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
1456+
const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated");
1457+
assert.equal(usageEvent?.type, "thread.token-usage.updated");
1458+
if (usageEvent?.type === "thread.token-usage.updated") {
1459+
assert.deepEqual(usageEvent.payload, {
1460+
usage: {
1461+
usedTokens: 200000,
1462+
lastUsedTokens: 200000,
1463+
totalProcessedTokens: 535000,
1464+
maxTokens: 200000,
1465+
},
1466+
});
1467+
}
1468+
}).pipe(
1469+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
1470+
Effect.provide(harness.layer),
1471+
);
1472+
});
1473+
1474+
it.effect(
1475+
"preserves oversized Claude result totals after task progress snapshots are recorded",
1476+
() => {
1477+
const harness = makeHarness();
1478+
return Effect.gen(function* () {
1479+
const adapter = yield* ClaudeAdapter;
1480+
1481+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 9).pipe(
1482+
Stream.runCollect,
1483+
Effect.forkChild,
1484+
);
1485+
1486+
yield* adapter.startSession({
1487+
threadId: THREAD_ID,
1488+
provider: "claudeAgent",
1489+
runtimeMode: "full-access",
1490+
});
1491+
1492+
yield* adapter.sendTurn({
1493+
threadId: THREAD_ID,
1494+
input: "hello",
1495+
attachments: [],
1496+
});
1497+
1498+
harness.query.emit({
1499+
type: "system",
1500+
subtype: "task_progress",
1501+
task_id: "task-usage-clamped",
1502+
description: "Thinking through the patch",
1503+
usage: {
1504+
total_tokens: 190000,
1505+
},
1506+
session_id: "sdk-session-task-usage-clamped",
1507+
uuid: "task-usage-progress-clamped",
1508+
} as unknown as SDKMessage);
1509+
1510+
harness.query.emit({
1511+
type: "result",
1512+
subtype: "success",
1513+
is_error: false,
1514+
duration_ms: 1234,
1515+
duration_api_ms: 1200,
1516+
num_turns: 1,
1517+
result: "done",
1518+
stop_reason: "end_turn",
1519+
session_id: "sdk-session-result-usage-clamped-after-progress",
1520+
usage: {
1521+
total_tokens: 535000,
1522+
},
1523+
modelUsage: {
1524+
"claude-opus-4-6": {
1525+
contextWindow: 200000,
1526+
maxOutputTokens: 64000,
1527+
},
1528+
},
1529+
} as unknown as SDKMessage);
1530+
harness.query.finish();
1531+
1532+
const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
1533+
const usageEvents = runtimeEvents.filter(
1534+
(event) => event.type === "thread.token-usage.updated",
1535+
);
1536+
const finalUsageEvent = usageEvents.at(-1);
1537+
assert.equal(finalUsageEvent?.type, "thread.token-usage.updated");
1538+
if (finalUsageEvent?.type === "thread.token-usage.updated") {
1539+
assert.deepEqual(finalUsageEvent.payload, {
1540+
usage: {
1541+
usedTokens: 190000,
1542+
lastUsedTokens: 190000,
1543+
totalProcessedTokens: 535000,
1544+
maxTokens: 200000,
1545+
},
1546+
});
1547+
}
1548+
}).pipe(
1549+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
1550+
Effect.provide(harness.layer),
1551+
);
1552+
},
1553+
);
1554+
14111555
it.effect(
14121556
"emits completion only after turn result when assistant frames arrive before deltas",
14131557
() => {

apps/server/src/provider/Layers/ClaudeAdapter.ts

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -297,53 +297,58 @@ function maxClaudeContextWindowFromModelUsage(modelUsage: unknown): number | und
297297
}
298298

299299
function normalizeClaudeTokenUsage(
300-
usage: unknown,
300+
value: unknown,
301301
contextWindow?: number,
302302
): ThreadTokenUsageSnapshot | undefined {
303-
if (!usage || typeof usage !== "object") {
303+
if (!value || typeof value !== "object") {
304304
return undefined;
305305
}
306306

307-
const record = usage as Record<string, unknown>;
308-
const directUsedTokens =
309-
typeof record.total_tokens === "number" && Number.isFinite(record.total_tokens)
310-
? record.total_tokens
311-
: undefined;
307+
const usage = value as Record<string, unknown>;
312308
const inputTokens =
313-
(typeof record.input_tokens === "number" && Number.isFinite(record.input_tokens)
314-
? record.input_tokens
309+
(typeof usage.input_tokens === "number" && Number.isFinite(usage.input_tokens)
310+
? usage.input_tokens
315311
: 0) +
316-
(typeof record.cache_creation_input_tokens === "number" &&
317-
Number.isFinite(record.cache_creation_input_tokens)
318-
? record.cache_creation_input_tokens
312+
(typeof usage.cache_creation_input_tokens === "number" &&
313+
Number.isFinite(usage.cache_creation_input_tokens)
314+
? usage.cache_creation_input_tokens
319315
: 0) +
320-
(typeof record.cache_read_input_tokens === "number" &&
321-
Number.isFinite(record.cache_read_input_tokens)
322-
? record.cache_read_input_tokens
316+
(typeof usage.cache_read_input_tokens === "number" &&
317+
Number.isFinite(usage.cache_read_input_tokens)
318+
? usage.cache_read_input_tokens
323319
: 0);
324320
const outputTokens =
325-
typeof record.output_tokens === "number" && Number.isFinite(record.output_tokens)
326-
? record.output_tokens
321+
typeof usage.output_tokens === "number" && Number.isFinite(usage.output_tokens)
322+
? usage.output_tokens
327323
: 0;
328-
const derivedUsedTokens = inputTokens + outputTokens;
329-
const usedTokens = directUsedTokens ?? (derivedUsedTokens > 0 ? derivedUsedTokens : undefined);
330-
if (usedTokens === undefined || usedTokens <= 0) {
324+
const derivedTotalProcessedTokens = inputTokens + outputTokens;
325+
const totalProcessedTokens =
326+
(typeof usage.total_tokens === "number" && Number.isFinite(usage.total_tokens)
327+
? usage.total_tokens
328+
: undefined) ?? (derivedTotalProcessedTokens > 0 ? derivedTotalProcessedTokens : undefined);
329+
if (totalProcessedTokens === undefined || totalProcessedTokens <= 0) {
331330
return undefined;
332331
}
333332

333+
const maxTokens =
334+
typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
335+
? contextWindow
336+
: undefined;
337+
const usedTokens =
338+
maxTokens !== undefined ? Math.min(totalProcessedTokens, maxTokens) : totalProcessedTokens;
339+
334340
return {
335341
usedTokens,
336342
lastUsedTokens: usedTokens,
343+
...(totalProcessedTokens > usedTokens ? { totalProcessedTokens } : {}),
337344
...(inputTokens > 0 ? { inputTokens } : {}),
338345
...(outputTokens > 0 ? { outputTokens } : {}),
339-
...(typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
340-
? { maxTokens: contextWindow }
341-
: {}),
342-
...(typeof record.tool_uses === "number" && Number.isFinite(record.tool_uses)
343-
? { toolUses: record.tool_uses }
346+
...(maxTokens !== undefined ? { maxTokens } : {}),
347+
...(typeof usage.tool_uses === "number" && Number.isFinite(usage.tool_uses)
348+
? { toolUses: usage.tool_uses }
344349
: {}),
345-
...(typeof record.duration_ms === "number" && Number.isFinite(record.duration_ms)
346-
? { durationMs: record.duration_ms }
350+
...(typeof usage.duration_ms === "number" && Number.isFinite(usage.duration_ms)
351+
? { durationMs: usage.duration_ms }
347352
: {}),
348353
};
349354
}
@@ -1344,6 +1349,8 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
13441349
resultUsage,
13451350
resultContextWindow ?? context.lastKnownContextWindow,
13461351
);
1352+
const accumulatedTotalProcessedTokens =
1353+
accumulatedSnapshot?.totalProcessedTokens ?? accumulatedSnapshot?.usedTokens;
13471354
const lastGoodUsage = context.lastKnownTokenUsage;
13481355
const maxTokens = resultContextWindow ?? context.lastKnownContextWindow;
13491356
const usageSnapshot: ThreadTokenUsageSnapshot | undefined = lastGoodUsage
@@ -1352,8 +1359,10 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
13521359
...(typeof maxTokens === "number" && Number.isFinite(maxTokens) && maxTokens > 0
13531360
? { maxTokens }
13541361
: {}),
1355-
...(accumulatedSnapshot && accumulatedSnapshot.usedTokens > lastGoodUsage.usedTokens
1356-
? { totalProcessedTokens: accumulatedSnapshot.usedTokens }
1362+
...(typeof accumulatedTotalProcessedTokens === "number" &&
1363+
Number.isFinite(accumulatedTotalProcessedTokens) &&
1364+
accumulatedTotalProcessedTokens > lastGoodUsage.usedTokens
1365+
? { totalProcessedTokens: accumulatedTotalProcessedTokens }
13571366
: {}),
13581367
}
13591368
: accumulatedSnapshot;

0 commit comments

Comments
 (0)