Skip to content

Commit 8f35155

Browse files
Surface context window usage in the UI (#1351)
1 parent 529315b commit 8f35155

File tree

15 files changed

+1040
-3
lines changed

15 files changed

+1040
-3
lines changed

apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.test.ts

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1898,6 +1898,185 @@ describe("ProviderRuntimeIngestion", () => {
18981898
expect(checkpoint?.checkpointRef).toBe("provider-diff:evt-turn-diff-updated");
18991899
});
19001900

1901+
it("projects context window updates into normalized thread activities", async () => {
1902+
const harness = await createHarness();
1903+
const now = new Date().toISOString();
1904+
1905+
harness.emit({
1906+
type: "thread.token-usage.updated",
1907+
eventId: asEventId("evt-thread-token-usage-updated"),
1908+
provider: "codex",
1909+
createdAt: now,
1910+
threadId: asThreadId("thread-1"),
1911+
payload: {
1912+
usage: {
1913+
usedTokens: 1075,
1914+
totalProcessedTokens: 10_200,
1915+
maxTokens: 128_000,
1916+
inputTokens: 1000,
1917+
cachedInputTokens: 500,
1918+
outputTokens: 50,
1919+
reasoningOutputTokens: 25,
1920+
lastUsedTokens: 1075,
1921+
lastInputTokens: 1000,
1922+
lastCachedInputTokens: 500,
1923+
lastOutputTokens: 50,
1924+
lastReasoningOutputTokens: 25,
1925+
compactsAutomatically: true,
1926+
},
1927+
},
1928+
});
1929+
1930+
const thread = await waitForThread(harness.engine, (entry) =>
1931+
entry.activities.some(
1932+
(activity: ProviderRuntimeTestActivity) => activity.kind === "context-window.updated",
1933+
),
1934+
);
1935+
1936+
const usageActivity = thread.activities.find(
1937+
(activity: ProviderRuntimeTestActivity) => activity.kind === "context-window.updated",
1938+
);
1939+
expect(usageActivity).toBeDefined();
1940+
expect(usageActivity?.payload).toMatchObject({
1941+
usedTokens: 1075,
1942+
totalProcessedTokens: 10_200,
1943+
maxTokens: 128_000,
1944+
inputTokens: 1000,
1945+
cachedInputTokens: 500,
1946+
outputTokens: 50,
1947+
reasoningOutputTokens: 25,
1948+
lastUsedTokens: 1075,
1949+
compactsAutomatically: true,
1950+
});
1951+
});
1952+
1953+
it("projects Codex camelCase token usage payloads into normalized thread activities", async () => {
1954+
const harness = await createHarness();
1955+
const now = new Date().toISOString();
1956+
1957+
harness.emit({
1958+
type: "thread.token-usage.updated",
1959+
eventId: asEventId("evt-thread-token-usage-updated-camel"),
1960+
provider: "codex",
1961+
createdAt: now,
1962+
threadId: asThreadId("thread-1"),
1963+
payload: {
1964+
usage: {
1965+
usedTokens: 126,
1966+
totalProcessedTokens: 11_839,
1967+
maxTokens: 258_400,
1968+
inputTokens: 120,
1969+
cachedInputTokens: 0,
1970+
outputTokens: 6,
1971+
reasoningOutputTokens: 0,
1972+
lastUsedTokens: 126,
1973+
lastInputTokens: 120,
1974+
lastCachedInputTokens: 0,
1975+
lastOutputTokens: 6,
1976+
lastReasoningOutputTokens: 0,
1977+
compactsAutomatically: true,
1978+
},
1979+
},
1980+
});
1981+
1982+
const thread = await waitForThread(harness.engine, (entry) =>
1983+
entry.activities.some(
1984+
(activity: ProviderRuntimeTestActivity) => activity.kind === "context-window.updated",
1985+
),
1986+
);
1987+
1988+
const usageActivity = thread.activities.find(
1989+
(activity: ProviderRuntimeTestActivity) => activity.kind === "context-window.updated",
1990+
);
1991+
expect(usageActivity?.payload).toMatchObject({
1992+
usedTokens: 126,
1993+
totalProcessedTokens: 11_839,
1994+
maxTokens: 258_400,
1995+
inputTokens: 120,
1996+
cachedInputTokens: 0,
1997+
outputTokens: 6,
1998+
reasoningOutputTokens: 0,
1999+
lastUsedTokens: 126,
2000+
lastInputTokens: 120,
2001+
lastOutputTokens: 6,
2002+
compactsAutomatically: true,
2003+
});
2004+
});
2005+
2006+
it("projects Claude usage snapshots with context window into normalized thread activities", async () => {
2007+
const harness = await createHarness();
2008+
const now = new Date().toISOString();
2009+
2010+
harness.emit({
2011+
type: "thread.token-usage.updated",
2012+
eventId: asEventId("evt-thread-token-usage-updated-claude-window"),
2013+
provider: "claudeAgent",
2014+
createdAt: now,
2015+
threadId: asThreadId("thread-1"),
2016+
payload: {
2017+
usage: {
2018+
usedTokens: 31_251,
2019+
lastUsedTokens: 31_251,
2020+
maxTokens: 200_000,
2021+
toolUses: 25,
2022+
durationMs: 43_567,
2023+
},
2024+
},
2025+
raw: {
2026+
source: "claude.sdk.message",
2027+
method: "claude/result/success",
2028+
payload: {},
2029+
},
2030+
});
2031+
2032+
const thread = await waitForThread(harness.engine, (entry) =>
2033+
entry.activities.some(
2034+
(activity: ProviderRuntimeTestActivity) => activity.kind === "context-window.updated",
2035+
),
2036+
);
2037+
2038+
const usageActivity = thread.activities.find(
2039+
(activity: ProviderRuntimeTestActivity) => activity.kind === "context-window.updated",
2040+
);
2041+
expect(usageActivity?.payload).toMatchObject({
2042+
usedTokens: 31_251,
2043+
lastUsedTokens: 31_251,
2044+
maxTokens: 200_000,
2045+
toolUses: 25,
2046+
durationMs: 43_567,
2047+
});
2048+
});
2049+
2050+
it("projects compacted thread state into context compaction activities", async () => {
2051+
const harness = await createHarness();
2052+
const now = new Date().toISOString();
2053+
2054+
harness.emit({
2055+
type: "thread.state.changed",
2056+
eventId: asEventId("evt-thread-compacted"),
2057+
provider: "codex",
2058+
createdAt: now,
2059+
threadId: asThreadId("thread-1"),
2060+
turnId: asTurnId("turn-1"),
2061+
payload: {
2062+
state: "compacted",
2063+
detail: { source: "provider" },
2064+
},
2065+
});
2066+
2067+
const thread = await waitForThread(harness.engine, (entry) =>
2068+
entry.activities.some(
2069+
(activity: ProviderRuntimeTestActivity) => activity.kind === "context-compaction",
2070+
),
2071+
);
2072+
2073+
const activity = thread.activities.find(
2074+
(candidate: ProviderRuntimeTestActivity) => candidate.kind === "context-compaction",
2075+
);
2076+
expect(activity?.summary).toBe("Context compacted");
2077+
expect(activity?.tone).toBe("info");
2078+
});
2079+
19012080
it("projects Codex task lifecycle chunks into thread activities", async () => {
19022081
const harness = await createHarness();
19032082
const now = new Date().toISOString();

apps/server/src/orchestration/Layers/ProviderRuntimeIngestion.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {
88
CheckpointRef,
99
isToolLifecycleItemType,
1010
ThreadId,
11+
type ThreadTokenUsageSnapshot,
1112
TurnId,
1213
type OrchestrationThreadActivity,
1314
type ProviderRuntimeEvent,
@@ -101,6 +102,15 @@ function asString(value: unknown): string | undefined {
101102
return typeof value === "string" ? value : undefined;
102103
}
103104

105+
function buildContextWindowActivityPayload(
106+
event: ProviderRuntimeEvent,
107+
): ThreadTokenUsageSnapshot | undefined {
108+
if (event.type !== "thread.token-usage.updated" || event.payload.usage.usedTokens <= 0) {
109+
return undefined;
110+
}
111+
return event.payload.usage;
112+
}
113+
104114
function runtimePayloadRecord(event: ProviderRuntimeEvent): Record<string, unknown> | undefined {
105115
const payload = (event as { payload?: unknown }).payload;
106116
if (!payload || typeof payload !== "object") {
@@ -409,6 +419,48 @@ function runtimeEventToActivities(
409419
];
410420
}
411421

422+
case "thread.state.changed": {
423+
if (event.payload.state !== "compacted") {
424+
return [];
425+
}
426+
427+
return [
428+
{
429+
id: event.eventId,
430+
createdAt: event.createdAt,
431+
tone: "info",
432+
kind: "context-compaction",
433+
summary: "Context compacted",
434+
payload: {
435+
state: event.payload.state,
436+
...(event.payload.detail !== undefined ? { detail: event.payload.detail } : {}),
437+
},
438+
turnId: toTurnId(event.turnId) ?? null,
439+
...maybeSequence,
440+
},
441+
];
442+
}
443+
444+
case "thread.token-usage.updated": {
445+
const payload = buildContextWindowActivityPayload(event);
446+
if (!payload) {
447+
return [];
448+
}
449+
450+
return [
451+
{
452+
id: event.eventId,
453+
createdAt: event.createdAt,
454+
tone: "info",
455+
kind: "context-window.updated",
456+
summary: "Context window updated",
457+
payload,
458+
turnId: toTurnId(event.turnId) ?? null,
459+
...maybeSequence,
460+
},
461+
];
462+
}
463+
412464
case "item.updated": {
413465
if (!isToolLifecycleItemType(event.payload.itemType)) {
414466
return [];

apps/server/src/provider/Layers/ClaudeAdapter.test.ts

Lines changed: 122 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1225,7 +1225,7 @@ describe("ClaudeAdapterLive", () => {
12251225
return Effect.gen(function* () {
12261226
const adapter = yield* ClaudeAdapter;
12271227

1228-
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 5).pipe(
1228+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 6).pipe(
12291229
Stream.runCollect,
12301230
Effect.forkChild,
12311231
);
@@ -1267,6 +1267,127 @@ describe("ClaudeAdapterLive", () => {
12671267
);
12681268
});
12691269

1270+
it.effect("emits thread token usage updates from Claude task progress", () => {
1271+
const harness = makeHarness();
1272+
return Effect.gen(function* () {
1273+
const adapter = yield* ClaudeAdapter;
1274+
1275+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 6).pipe(
1276+
Stream.runCollect,
1277+
Effect.forkChild,
1278+
);
1279+
1280+
yield* adapter.startSession({
1281+
threadId: THREAD_ID,
1282+
provider: "claudeAgent",
1283+
runtimeMode: "full-access",
1284+
});
1285+
1286+
harness.query.emit({
1287+
type: "system",
1288+
subtype: "task_progress",
1289+
task_id: "task-usage-1",
1290+
description: "Thinking through the patch",
1291+
usage: {
1292+
total_tokens: 321,
1293+
tool_uses: 2,
1294+
duration_ms: 654,
1295+
},
1296+
session_id: "sdk-session-task-usage",
1297+
uuid: "task-usage-progress-1",
1298+
} as unknown as SDKMessage);
1299+
1300+
const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
1301+
const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated");
1302+
const progressEvent = runtimeEvents.find((event) => event.type === "task.progress");
1303+
assert.equal(usageEvent?.type, "thread.token-usage.updated");
1304+
if (usageEvent?.type === "thread.token-usage.updated") {
1305+
assert.deepEqual(usageEvent.payload, {
1306+
usage: {
1307+
usedTokens: 321,
1308+
lastUsedTokens: 321,
1309+
toolUses: 2,
1310+
durationMs: 654,
1311+
},
1312+
});
1313+
}
1314+
assert.equal(progressEvent?.type, "task.progress");
1315+
if (usageEvent && progressEvent) {
1316+
assert.notStrictEqual(usageEvent.eventId, progressEvent.eventId);
1317+
}
1318+
}).pipe(
1319+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
1320+
Effect.provide(harness.layer),
1321+
);
1322+
});
1323+
1324+
it.effect("emits Claude context window on result completion usage snapshots", () => {
1325+
const harness = makeHarness();
1326+
return Effect.gen(function* () {
1327+
const adapter = yield* ClaudeAdapter;
1328+
1329+
const runtimeEventsFiber = yield* Stream.take(adapter.streamEvents, 7).pipe(
1330+
Stream.runCollect,
1331+
Effect.forkChild,
1332+
);
1333+
1334+
yield* adapter.startSession({
1335+
threadId: THREAD_ID,
1336+
provider: "claudeAgent",
1337+
runtimeMode: "full-access",
1338+
});
1339+
1340+
yield* adapter.sendTurn({
1341+
threadId: THREAD_ID,
1342+
input: "hello",
1343+
attachments: [],
1344+
});
1345+
1346+
harness.query.emit({
1347+
type: "result",
1348+
subtype: "success",
1349+
is_error: false,
1350+
duration_ms: 1234,
1351+
duration_api_ms: 1200,
1352+
num_turns: 1,
1353+
result: "done",
1354+
stop_reason: "end_turn",
1355+
session_id: "sdk-session-result-usage",
1356+
usage: {
1357+
input_tokens: 4,
1358+
cache_creation_input_tokens: 2715,
1359+
cache_read_input_tokens: 21144,
1360+
output_tokens: 679,
1361+
},
1362+
modelUsage: {
1363+
"claude-opus-4-6": {
1364+
contextWindow: 200000,
1365+
maxOutputTokens: 64000,
1366+
},
1367+
},
1368+
} as unknown as SDKMessage);
1369+
harness.query.finish();
1370+
1371+
const runtimeEvents = Array.from(yield* Fiber.join(runtimeEventsFiber));
1372+
const usageEvent = runtimeEvents.find((event) => event.type === "thread.token-usage.updated");
1373+
assert.equal(usageEvent?.type, "thread.token-usage.updated");
1374+
if (usageEvent?.type === "thread.token-usage.updated") {
1375+
assert.deepEqual(usageEvent.payload, {
1376+
usage: {
1377+
usedTokens: 24542,
1378+
lastUsedTokens: 24542,
1379+
inputTokens: 23863,
1380+
outputTokens: 679,
1381+
maxTokens: 200000,
1382+
},
1383+
});
1384+
}
1385+
}).pipe(
1386+
Effect.provideService(Random.Random, makeDeterministicRandomService()),
1387+
Effect.provide(harness.layer),
1388+
);
1389+
});
1390+
12701391
it.effect(
12711392
"emits completion only after turn result when assistant frames arrive before deltas",
12721393
() => {

0 commit comments

Comments
 (0)