Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added
- Added per-step token cost tracking and estimated tool call token usage to Ask Sourcebot chat history. [#1353](https://github.com/sourcebot-dev/sourcebot/pull/1353)

## [5.0.4] - 2026-06-18

### Changed
Expand Down
3 changes: 2 additions & 1 deletion packages/web/src/ee/features/chat/agent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@ const createAssistantMessage = (parts: SBChatMessagePart[]): SBChatMessage => ({
});

const createFakeStreamResult = () => ({
response: Promise.resolve(new Response()),
response: Promise.resolve({ messages: [] }),
steps: Promise.resolve([]),
totalUsage: Promise.resolve({
inputTokens: 1,
outputTokens: 1,
Expand Down
69 changes: 67 additions & 2 deletions packages/web/src/ee/features/chat/agent.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { SBChatMessage, SBChatMessageMetadata } from "@/features/chat/types";
import { SBChatMessage, SBChatMessageMetadata, StepTokenUsageEntry, ToolTokenUsageEntry } from "@/features/chat/types";
import { estimateModelToolOutputTokens } from "@/features/chat/tokenEstimation";
import { getFileSource } from '@/features/git';
import { isServiceError } from "@/lib/utils";
import { LanguageModelV3 as AISDKLanguageModelV3 } from "@ai-sdk/provider";
Expand Down Expand Up @@ -190,19 +191,76 @@ export const createMessageStream = async ({
});

const totalUsage = await researchStream.totalUsage;
const steps = await researchStream.steps;
const response = await researchStream.response;

// Tool output estimates are derived from `response.messages` rather
// than per-step `toolResults` because the response messages cover
// tool calls that never run inside a step — approval-gated tools
// execute before the step loop, and thrown tool errors are recorded
// as `tool-error` parts that `toolResults` excludes. Their
// `tool-result` parts also carry the output in model-visible form
// (`toModelOutput` already applied), which is exactly the payload
// whose token footprint we want to estimate.
const toolUsageByToolCallId = new Map<string, ToolTokenUsageEntry>(
response.messages.flatMap((message) =>
message.role !== 'tool' ? [] : message.content.flatMap((part) =>
part.type !== 'tool-result' ? [] : [[part.toolCallId, {
toolCallId: part.toolCallId,
toolName: part.toolName,
estimatedOutputTokens: estimateModelToolOutputTokens(part.output),
}] as const]
)
)
);

// One entry per step, in step order. The UI joins its step groups
// to these entries by array position, so the order and count must
// mirror the stream's steps exactly. Tool calls nest under the
// step they ran in; `content` is matched rather than `toolResults`
// so that thrown tool errors (`tool-error` parts, which
// `toolResults` excludes) are still attributed to their step.
const stepTokenUsage: StepTokenUsageEntry[] = steps.map(({ usage, content }) => ({
inputTokens: usage.inputTokens,
outputTokens: usage.outputTokens,
cacheReadTokens: usage.inputTokenDetails?.cacheReadTokens,
tools: content.flatMap((part) => {
if (part.type !== 'tool-result' && part.type !== 'tool-error') {
return [];
}
const entry = toolUsageByToolCallId.get(part.toolCallId);
if (!entry) {
return [];
}
toolUsageByToolCallId.delete(part.toolCallId);
return [entry];
}),
}));

// Any estimates left unclaimed belong to tool calls that executed
// before the step loop (approval continuations). Their output
// enters the context as input to this phase's first step, so nest
// them under it.
if (toolUsageByToolCallId.size > 0 && stepTokenUsage.length > 0) {
stepTokenUsage[0].tools.unshift(...toolUsageByToolCallId.values());
}

writer.write({
type: 'message-metadata',
messageMetadata: {
// Spread first so the derived fields below can't be overwritten by caller metadata.
...metadata,
totalTokens: (priorMetadata?.totalTokens ?? 0) + (totalUsage.totalTokens ?? 0),
totalInputTokens: (priorMetadata?.totalInputTokens ?? 0) + (totalUsage.inputTokens ?? 0),
totalOutputTokens: (priorMetadata?.totalOutputTokens ?? 0) + (totalUsage.outputTokens ?? 0),
totalCacheReadTokens: (priorMetadata?.totalCacheReadTokens ?? 0) + (totalUsage.inputTokenDetails?.cacheReadTokens ?? 0),
totalCacheWriteTokens: (priorMetadata?.totalCacheWriteTokens ?? 0) + (totalUsage.inputTokenDetails?.cacheWriteTokens ?? 0),
totalResponseTimeMs: (priorMetadata?.totalResponseTimeMs ?? 0) + (new Date().getTime() - startTime.getTime()),
// Concatenated (not summed) across approval-continuation
// phases so earlier phases' steps are preserved in order.
stepTokenUsage: [...(priorMetadata?.stepTokenUsage ?? []), ...stepTokenUsage],
modelName,
traceId,
...metadata,
}
});

Expand Down Expand Up @@ -430,6 +488,13 @@ const createAgentStream = async ({
logger.warn(`Tool call repair failed for "${toolCall.toolName}": ${error.message}`);
return null;
},
// Token usage collection deliberately does NOT happen here: the SDK
// awaits this callback before starting the next step, so it must
// stay cheap, and `toolResults` misses tool calls that never run
// inside a step (approval-gated tools execute before the step loop)
// as well as thrown tool errors (recorded as `tool-error` parts).
// Both are instead derived post-stream in `createMessageStream`
// from `steps` and `response.messages`.
onStepFinish: ({ toolResults }) => {
toolResults.forEach(({ output, dynamic }) => {
if (dynamic || isServiceError(output)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,33 +91,57 @@ const ChatThreadListItemComponent = forwardRef<HTMLDivElement, ChatThreadListIte
// should be visible to the user. By "steps", we mean parts that originated
// from the same LLM invocation. By "visibile", we mean parts that have some
// visual representation in the UI (e.g., text, reasoning, tool calls, etc.).
const uiVisibleThinkingSteps = useMemo(() => {
const steps = groupMessageIntoSteps(assistantMessage?.parts ?? []);

// Filter out the answerPart and empty steps
return steps
.map(
(step) => step
// First, filter out any parts that are not text
.filter((part) => {
if (part.type === 'text') {
return !part.text.includes(ANSWER_TAG);
}

return true;
})
.filter((part) => {
// Only include text, reasoning, and tool parts
return (
part.type === 'text' ||
part.type === 'reasoning' ||
part.type.startsWith('tool-') ||
part.type === 'dynamic-tool'
)
})
)
//
// Each step is tagged with its stepIndex — the invocation's position in
// the turn, which indexes into `metadata.stepTokenUsage`. Indices are
// assigned by counting 'step-start' markers (one per invocation) BEFORE
// any filtering, so dropping empty or answer-only steps below cannot
// shift the indices of the steps that remain.
const { uiVisibleThinkingSteps, answerStepIndex } = useMemo(() => {
const groupedParts = groupMessageIntoSteps(assistantMessage?.parts ?? []);

// Parts written before the first step-start (e.g. data parts) don't
// belong to any step; they get stepIndex -1 and never survive the
// visibility filters below.
let stepIndex = -1;
let answerStepIndex: number | undefined = undefined;

const steps = groupedParts
.map((stepParts) => {
if (stepParts[0]?.type === 'step-start') {
stepIndex++;
}

if (stepParts.some((part) => part.type === 'text' && part.text.includes(ANSWER_TAG))) {
answerStepIndex = stepIndex;
}

return {
stepIndex,
parts: stepParts
// First, filter out the answer text
.filter((part) => {
if (part.type === 'text') {
return !part.text.includes(ANSWER_TAG);
}

return true;
})
.filter((part) => {
// Only include text, reasoning, and tool parts
return (
part.type === 'text' ||
part.type === 'reasoning' ||
part.type.startsWith('tool-') ||
part.type === 'dynamic-tool'
)
}),
};
})
// Then, filter out any steps that are empty
.filter(step => step.length > 0);
.filter((step) => step.parts.length > 0);

return { uiVisibleThinkingSteps: steps, answerStepIndex };
}, [assistantMessage?.parts]);

// "thinking" is when the agent is generating output that is not the answer.
Expand Down Expand Up @@ -379,6 +403,7 @@ const ChatThreadListItemComponent = forwardRef<HTMLDivElement, ChatThreadListIte
isNetworkActive={isNetworkActive}
isAwaitingToolApproval={isAwaitingToolApproval}
thinkingSteps={uiVisibleThinkingSteps}
answerStepIndex={answerStepIndex}
metadata={assistantMessage?.metadata}
/>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ describe('DetailsCard', () => {
isTurnInProgress={true}
isNetworkActive={false}
isAwaitingToolApproval={false}
thinkingSteps={[[failedActivationPart]]}
thinkingSteps={[{ stepIndex: 0, parts: [failedActivationPart] }]}
/>
</TooltipProvider>
);
Expand Down
Loading
Loading