Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions apps/server/src/provider/Layers/ClaudeAdapter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,150 @@ describe("ClaudeAdapterLive", () => {
);
});

it.effect("clamps oversized Claude usage to the reported context window", () => {
const harness = makeHarness();
return Effect.gen(function* () {
const adapter = yield* ClaudeAdapter;

const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 7).pipe(
Stream.runCollect,
Effect.forkChild,
);

yield* adapter.startSession({
threadId: THREAD_ID,
provider: "claudeAgent",
runtimeMode: "full-access",
});

yield* adapter.sendTurn({
threadId: THREAD_ID,
input: "hello",
attachments: [],
});

harness.query.emit({
type: "result",
subtype: "success",
is_error: false,
duration_ms: 1234,
duration_api_ms: 1200,
num_turns: 1,
result: "done",
stop_reason: "end_turn",
session_id: "sdk-session-result-usage-clamped",
usage: {
total_tokens: 535000,
},
modelUsage: {
"claude-opus-4-6": {
contextWindow: 200000,
maxOutputTokens: 64000,
},
},
} as unknown as SDKMessage);
harness.query.finish();

const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated");
assert.equal(usageEvent?.type, "thread.token-usage.updated");
if (usageEvent?.type === "thread.token-usage.updated") {
assert.deepEqual(usageEvent.payload, {
usage: {
usedTokens: 200000,
lastUsedTokens: 200000,
totalProcessedTokens: 535000,
maxTokens: 200000,
},
});
}
}).pipe(
Effect.provideService(Random.Random, makeDeterministicRandomService()),
Effect.provide(harness.layer),
);
});

it.effect(
"preserves oversized Claude result totals after task progress snapshots are recorded",
() => {
const harness = makeHarness();
return Effect.gen(function* () {
const adapter = yield* ClaudeAdapter;

const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 9).pipe(
Stream.runCollect,
Effect.forkChild,
);

yield* adapter.startSession({
threadId: THREAD_ID,
provider: "claudeAgent",
runtimeMode: "full-access",
});

yield* adapter.sendTurn({
threadId: THREAD_ID,
input: "hello",
attachments: [],
});

harness.query.emit({
type: "system",
subtype: "task_progress",
task_id: "task-usage-clamped",
description: "Thinking through the patch",
usage: {
total_tokens: 190000,
},
session_id: "sdk-session-task-usage-clamped",
uuid: "task-usage-progress-clamped",
} as unknown as SDKMessage);

harness.query.emit({
type: "result",
subtype: "success",
is_error: false,
duration_ms: 1234,
duration_api_ms: 1200,
num_turns: 1,
result: "done",
stop_reason: "end_turn",
session_id: "sdk-session-result-usage-clamped-after-progress",
usage: {
total_tokens: 535000,
},
modelUsage: {
"claude-opus-4-6": {
contextWindow: 200000,
maxOutputTokens: 64000,
},
},
} as unknown as SDKMessage);
harness.query.finish();

const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
const usageEvents = runtimeEvents.filter(
(event) => event.type === "thread.token-usage.updated",
);
const finalUsageEvent = usageEvents.at(-1);
assert.equal(finalUsageEvent?.type, "thread.token-usage.updated");
if (finalUsageEvent?.type === "thread.token-usage.updated") {
assert.deepEqual(finalUsageEvent.payload, {
usage: {
usedTokens: 190000,
lastUsedTokens: 190000,
totalProcessedTokens: 535000,
maxTokens: 200000,
},
});
}
}).pipe(
Effect.provideService(Random.Random, makeDeterministicRandomService()),
Effect.provide(harness.layer),
);
},
);

it.effect(
"emits completion only after turn result when assistant frames arrive before deltas",
() => {
Expand Down
95 changes: 47 additions & 48 deletions apps/server/src/provider/Layers/ClaudeAdapter.ts
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟢 Low

function maxClaudeContextWindowFromModelUsage(

In maxClaudeContextWindowFromModelUsage, the new implementation removes defensive runtime type checks for value.contextWindow. If the SDK returns malformed data where contextWindow is undefined or NaN, the function returns NaN instead of undefined. This corrupts context.lastKnownContextWindow (line 1330) since NaN !== undefined passes, causing silent degradation of token usage tracking. Consider restoring the type and finiteness checks to ensure malformed values are treated as undefined.

🤖 Copy this AI Prompt to have your agent fix this:
In file apps/server/src/provider/Layers/ClaudeAdapter.ts around line 277:

In `maxClaudeContextWindowFromModelUsage`, the new implementation removes defensive runtime type checks for `value.contextWindow`. If the SDK returns malformed data where `contextWindow` is `undefined` or `NaN`, the function returns `NaN` instead of `undefined`. This corrupts `context.lastKnownContextWindow` (line 1330) since `NaN !== undefined` passes, causing silent degradation of token usage tracking. Consider restoring the type and finiteness checks to ensure malformed values are treated as undefined.

Evidence trail:
apps/server/src/provider/Layers/ClaudeAdapter.ts lines 277-290 (REVIEWED_COMMIT) - new implementation without defensive checks

git_diff MERGE_BASE..REVIEWED_COMMIT showing old code had: `typeof contextWindow !== "number" || !Number.isFinite(contextWindow) || contextWindow <= 0` checks that are now removed

apps/server/src/provider/Layers/ClaudeAdapter.ts lines 1328-1330 (REVIEWED_COMMIT) - shows `if (resultContextWindow !== undefined)` check that would pass NaN

apps/server/src/provider/Layers/ClaudeAdapter.ts line 1340, 1345, 2025, 2057 - shows lastKnownContextWindow is used in token tracking

JavaScript Math.max() behavior: Math.max(0, undefined) returns NaN

Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import {
type SDKResultMessage,
type SettingSource,
type SDKUserMessage,
ModelUsage,
NonNullableUsage,
} from "@anthropic-ai/claude-agent-sdk";
import {
ApprovalRequestId,
Expand Down Expand Up @@ -272,78 +274,73 @@ function asRuntimeItemId(value: string): RuntimeItemId {
return RuntimeItemId.make(value);
}

function maxClaudeContextWindowFromModelUsage(modelUsage: unknown): number | undefined {
if (!modelUsage || typeof modelUsage !== "object") {
return undefined;
}
function maxClaudeContextWindowFromModelUsage(
modelUsage: Record<string, ModelUsage> | undefined,
): number | undefined {
if (!modelUsage) return undefined;

let maxContextWindow: number | undefined;
for (const value of Object.values(modelUsage as Record<string, unknown>)) {
if (!value || typeof value !== "object") {
continue;
}
const contextWindow = (value as { contextWindow?: unknown }).contextWindow;
if (
typeof contextWindow !== "number" ||
!Number.isFinite(contextWindow) ||
contextWindow <= 0
) {
continue;
}
for (const value of Object.values(modelUsage)) {
const contextWindow = value.contextWindow;
maxContextWindow = Math.max(maxContextWindow ?? 0, contextWindow);
}

return maxContextWindow;
}

function normalizeClaudeTokenUsage(
usage: unknown,
value: NonNullableUsage | undefined,
contextWindow?: number,
): ThreadTokenUsageSnapshot | undefined {
if (!usage || typeof usage !== "object") {
if (!value || typeof value !== "object") {
return undefined;
}

const record = usage as Record<string, unknown>;
const directUsedTokens =
typeof record.total_tokens === "number" && Number.isFinite(record.total_tokens)
? record.total_tokens
: undefined;
const usage = value as Record<string, unknown>;
const inputTokens =
(typeof record.input_tokens === "number" && Number.isFinite(record.input_tokens)
? record.input_tokens
(typeof usage.input_tokens === "number" && Number.isFinite(usage.input_tokens)
? usage.input_tokens
: 0) +
(typeof record.cache_creation_input_tokens === "number" &&
Number.isFinite(record.cache_creation_input_tokens)
? record.cache_creation_input_tokens
(typeof usage.cache_creation_input_tokens === "number" &&
Number.isFinite(usage.cache_creation_input_tokens)
? usage.cache_creation_input_tokens
: 0) +
(typeof record.cache_read_input_tokens === "number" &&
Number.isFinite(record.cache_read_input_tokens)
? record.cache_read_input_tokens
(typeof usage.cache_read_input_tokens === "number" &&
Number.isFinite(usage.cache_read_input_tokens)
? usage.cache_read_input_tokens
: 0);
const outputTokens =
typeof record.output_tokens === "number" && Number.isFinite(record.output_tokens)
? record.output_tokens
typeof usage.output_tokens === "number" && Number.isFinite(usage.output_tokens)
? usage.output_tokens
: 0;
const derivedUsedTokens = inputTokens + outputTokens;
const usedTokens = directUsedTokens ?? (derivedUsedTokens > 0 ? derivedUsedTokens : undefined);
if (usedTokens === undefined || usedTokens <= 0) {
const derivedTotalProcessedTokens = inputTokens + outputTokens;
const totalProcessedTokens =
(typeof usage.total_tokens === "number" && Number.isFinite(usage.total_tokens)
? usage.total_tokens
: undefined) ?? (derivedTotalProcessedTokens > 0 ? derivedTotalProcessedTokens : undefined);
if (totalProcessedTokens === undefined || totalProcessedTokens <= 0) {
return undefined;
}

const maxTokens =
typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
? contextWindow
: undefined;
const usedTokens =
maxTokens !== undefined ? Math.min(totalProcessedTokens, maxTokens) : totalProcessedTokens;

return {
usedTokens,
lastUsedTokens: usedTokens,
...(totalProcessedTokens > usedTokens ? { totalProcessedTokens } : {}),
...(inputTokens > 0 ? { inputTokens } : {}),
...(outputTokens > 0 ? { outputTokens } : {}),
...(typeof contextWindow === "number" && Number.isFinite(contextWindow) && contextWindow > 0
? { maxTokens: contextWindow }
: {}),
...(typeof record.tool_uses === "number" && Number.isFinite(record.tool_uses)
? { toolUses: record.tool_uses }
...(maxTokens !== undefined ? { maxTokens } : {}),
...(typeof usage.tool_uses === "number" && Number.isFinite(usage.tool_uses)
? { toolUses: usage.tool_uses }
: {}),
...(typeof record.duration_ms === "number" && Number.isFinite(record.duration_ms)
? { durationMs: record.duration_ms }
...(typeof usage.duration_ms === "number" && Number.isFinite(usage.duration_ms)
? { durationMs: usage.duration_ms }
: {}),
};
}
Expand Down Expand Up @@ -1328,8 +1325,6 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
errorMessage?: string,
result?: SDKResultMessage,
) {
const resultUsage =
result?.usage && typeof result.usage === "object" ? { ...result.usage } : undefined;
const resultContextWindow = maxClaudeContextWindowFromModelUsage(result?.modelUsage);
if (resultContextWindow !== undefined) {
context.lastKnownContextWindow = resultContextWindow;
Expand All @@ -1341,9 +1336,11 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
// Instead, use the last known context-window-accurate usage from task_progress
// events and treat the accumulated total as totalProcessedTokens.
const accumulatedSnapshot = normalizeClaudeTokenUsage(
resultUsage,
result?.usage,
resultContextWindow ?? context.lastKnownContextWindow,
);
const accumulatedTotalProcessedTokens =
accumulatedSnapshot?.totalProcessedTokens ?? accumulatedSnapshot?.usedTokens;
const lastGoodUsage = context.lastKnownTokenUsage;
const maxTokens = resultContextWindow ?? context.lastKnownContextWindow;
const usageSnapshot: ThreadTokenUsageSnapshot | undefined = lastGoodUsage
Expand All @@ -1352,8 +1349,10 @@ const makeClaudeAdapter = Effect.fn("makeClaudeAdapter")(function* (
...(typeof maxTokens === "number" && Number.isFinite(maxTokens) && maxTokens > 0
? { maxTokens }
: {}),
...(accumulatedSnapshot && accumulatedSnapshot.usedTokens > lastGoodUsage.usedTokens
? { totalProcessedTokens: accumulatedSnapshot.usedTokens }
...(typeof accumulatedTotalProcessedTokens === "number" &&
Number.isFinite(accumulatedTotalProcessedTokens) &&
accumulatedTotalProcessedTokens > lastGoodUsage.usedTokens
? { totalProcessedTokens: accumulatedTotalProcessedTokens }
: {}),
}
: accumulatedSnapshot;
Expand Down
Loading