Skip to content

Commit 4082595

Browse files
authored
Merge pull request #582 from rajbos/rajbos/add-thinking-effort-tracking
feat: add thinking effort (reasoning effort) tracking
2 parents a930179 + 5d895ae commit 4082595

8 files changed

Lines changed: 297 additions & 10 deletions

File tree

.github/instructions/vscode-extension.instructions.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,11 @@ The entire extension's logic is contained within the `CopilotTokenTracker` class
4848
- **Watch Mode**: For active development, use `npm run watch` from `vscode-extension/`. This will automatically recompile the extension on file changes.
4949
- **Testing/Debugging**: Press `F5` in VS Code to open the Extension Development Host. This will launch a new VS Code window with the extension running. `console.log` statements from `vscode-extension/src/extension.ts` will appear in the Developer Tools console of this new window (Help > Toggle Developer Tools).
5050

51-
**Important build guidance:** After making changes to source code or related files (TypeScript, JavaScript, JSON, or other code files used by the extension), always run `npm run compile` from `vscode-extension/` to validate that the project still builds and lints cleanly before opening a pull request or releasing. You do not need to run the full compile step for documentation-only changes (Markdown files), but you should run it after any edits that touch source, configuration, or JSON data files.
51+
**Important build guidance:** After making changes to source code or related files (TypeScript, JavaScript, JSON, or other code files used by the extension), always run both `npm ci` and then `npm run compile` from `vscode-extension/` to validate that the project still builds and lints cleanly before opening a pull request or releasing. Also run the unit tests with `npm run test:node` to catch any regressions. You do not need to run the full compile step for documentation-only changes (Markdown files), but you should run it after any edits that touch source, configuration, or JSON data files.
52+
53+
**Always use `npm ci` (not `npm install`) when validating a build**`npm ci` installs from the lockfile exactly, mirroring what CI does, and will catch any dependency drift. Use `npm install` only when intentionally adding or updating packages.
54+
55+
> ⚠️ **Common mistake**: The `edit` tool's old_str/new_str replacement can accidentally drop comment delimiters (e.g. `/**` opening a JSDoc block) when the match boundary falls exactly at that line. After editing `tokenEstimation.ts` or any file with JSDoc comments, always verify the file compiles before committing.
5256
5357
## Development Guidelines
5458

vscode-extension/src/extension.ts

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import * as vscode from 'vscode';
1+
import * as vscode from 'vscode';
22
import * as fs from 'fs';
33
import * as path from 'path';
44
import * as os from 'os';
@@ -71,6 +71,7 @@ import {
7171
getTotalTokensFromModelUsage as _getTotalTokensFromModelUsage,
7272
reconstructJsonlStateAsync as _reconstructJsonlStateAsync,
7373
extractSubAgentData as _extractSubAgentData,
74+
buildReasoningEffortTimeline as _buildReasoningEffortTimeline,
7475
} from './tokenEstimation';
7576
import { SessionDiscovery } from './sessionDiscovery';
7677
import { CacheManager } from './cacheManager';
@@ -167,7 +168,7 @@ type RepoPrStatsResult = {
167168

168169
class CopilotTokenTracker implements vscode.Disposable {
169170
// Cache version - increment this when making changes that require cache invalidation
170-
private static readonly CACHE_VERSION = 36; // Add first-user-message fallback title for untitled Copilot CLI sessions
171+
private static readonly CACHE_VERSION = 37; // Add thinking effort (reasoning effort) tracking
171172
// Maximum length for displaying workspace IDs in diagnostics/customization matrix
172173
private static readonly WORKSPACE_ID_DISPLAY_LENGTH = 8;
173174

@@ -4034,6 +4035,9 @@ class CopilotTokenTracker implements vscode.Disposable {
40344035
// blocking the extension host event loop on large files.
40354036
const { sessionState } = await _reconstructJsonlStateAsync(lines);
40364037

4038+
// Build per-request effort map from delta lines
4039+
const { effortByRequestId } = _buildReasoningEffortTimeline(lines);
4040+
40374041
// Extract session-level info
40384042
let sessionMode: 'ask' | 'edit' | 'agent' | 'plan' | 'customAgent' = 'ask';
40394043
let currentModel: string | null = null;
@@ -4119,14 +4123,29 @@ class CopilotTokenTracker implements vscode.Disposable {
41194123
inputTokensEstimate: this.estimateTokensFromText(userMessage, requestModel),
41204124
outputTokensEstimate: this.estimateTokensFromText(responseText, requestModel),
41214125
thinkingTokensEstimate: this.estimateTokensFromText(thinkingText, requestModel),
4122-
actualUsage
4126+
actualUsage,
4127+
thinkingEffort: effortByRequestId.get(request.requestId)
41234128
};
41244129

41254130
turns.push(turn);
41264131
}
41274132
} else {
41284133
// Non-delta JSONL (Copilot CLI format)
41294134
let turnNumber = 0;
4135+
let cliSessionModel = 'gpt-4o';
4136+
let cliSessionEffort: string | undefined;
4137+
4138+
// Pre-scan for session.start to extract default model and effort
4139+
for (const line of lines) {
4140+
try {
4141+
const ev = JSON.parse(line);
4142+
if (ev.type === 'session.start' && ev.data) {
4143+
if (typeof ev.data.selectedModel === 'string') { cliSessionModel = ev.data.selectedModel; }
4144+
if (typeof ev.data.reasoningEffort === 'string') { cliSessionEffort = ev.data.reasoningEffort; }
4145+
break;
4146+
}
4147+
} catch { /* skip */ }
4148+
}
41304149

41314150
for (const line of lines) {
41324151
try {
@@ -4138,19 +4157,24 @@ class CopilotTokenTracker implements vscode.Disposable {
41384157
const contextRefs = this.createEmptyContextRefs();
41394158
const userMessage = event.data.content;
41404159
this.analyzeContextReferences(userMessage, contextRefs);
4160+
const turnModel = event.model || event.data?.model || cliSessionModel;
4161+
const turnEffort: string | undefined = typeof event.data?.reasoningEffort === 'string'
4162+
? event.data.reasoningEffort
4163+
: cliSessionEffort;
41414164
const turn: ChatTurn = {
41424165
turnNumber,
41434166
timestamp: event.timestamp ? new Date(event.timestamp).toISOString() : null,
41444167
mode: 'agent', // CLI is typically agent mode
41454168
userMessage,
41464169
assistantResponse: '',
4147-
model: event.model || 'gpt-4o',
4170+
model: turnModel,
41484171
toolCalls: [],
41494172
contextReferences: contextRefs,
41504173
mcpTools: [],
4151-
inputTokensEstimate: this.estimateTokensFromText(userMessage, event.model || 'gpt-4o'),
4174+
inputTokensEstimate: this.estimateTokensFromText(userMessage, turnModel),
41524175
outputTokensEstimate: 0,
4153-
thinkingTokensEstimate: 0
4176+
thinkingTokensEstimate: 0,
4177+
thinkingEffort: turnEffort
41544178
};
41554179
turns.push(turn);
41564180
}
@@ -4295,6 +4319,8 @@ class CopilotTokenTracker implements vscode.Disposable {
42954319
this.warn(`Error loading usage analysis for ${sessionFile}: ${usageError}`);
42964320
}
42974321

4322+
const sessionCache = this.getCachedSessionData(sessionFile);
4323+
42984324
return {
42994325
file: details.file,
43004326
title: details.title || null,
@@ -4307,7 +4333,8 @@ class CopilotTokenTracker implements vscode.Disposable {
43074333
firstInteraction: details.firstInteraction,
43084334
lastInteraction: details.lastInteraction,
43094335
turns,
4310-
usageAnalysis
4336+
usageAnalysis,
4337+
actualTokens: sessionCache?.actualTokens || 0
43114338
};
43124339
}
43134340

vscode-extension/src/tokenEstimation.ts

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,85 @@ export async function reconstructJsonlStateAsync(lines: string[], yieldInterval
243243
return { sessionState, isDeltaBased };
244244
}
245245

246+
/**
247+
* Build a map from requestId → reasoning effort level by scanning delta-based JSONL lines.
248+
*
249+
* The effort level is taken from `configurationSchema.properties.reasoningEffort.default`
250+
* on the active selectedModel at the time each request is added to the session.
251+
*
252+
* Returns: Map<requestId, effort> plus the default effort at session start.
253+
*/
254+
export function buildReasoningEffortTimeline(lines: string[]): {
255+
effortByRequestId: Map<string, string>;
256+
defaultEffort: string | null;
257+
switchCount: number;
258+
} {
259+
const effortByRequestId = new Map<string, string>();
260+
let currentEffort: string | null = null;
261+
let defaultEffort: string | null = null;
262+
let switchCount = 0;
263+
264+
function extractEffortFromModel(model: unknown): string | null {
265+
if (!model || typeof model !== 'object') { return null; }
266+
const m = model as Record<string, unknown>;
267+
const metadata = m['metadata'];
268+
if (!metadata || typeof metadata !== 'object') { return null; }
269+
const meta = metadata as Record<string, unknown>;
270+
const schema = meta['configurationSchema'];
271+
if (!schema || typeof schema !== 'object') { return null; }
272+
const s = schema as Record<string, unknown>;
273+
const props = s['properties'];
274+
if (!props || typeof props !== 'object') { return null; }
275+
const p = props as Record<string, unknown>;
276+
const re = p['reasoningEffort'];
277+
if (!re || typeof re !== 'object') { return null; }
278+
const r = re as Record<string, unknown>;
279+
return typeof r['default'] === 'string' ? r['default'] : null;
280+
}
281+
282+
for (const line of lines) {
283+
if (!line.trim()) { continue; }
284+
let delta: any;
285+
try { delta = JSON.parse(line); } catch { continue; }
286+
if (typeof delta.kind !== 'number') { continue; }
287+
288+
if (delta.kind === 0) {
289+
// Initial state: extract model from inputState.selectedModel
290+
const model = delta.v?.inputState?.selectedModel;
291+
const effort = extractEffortFromModel(model);
292+
if (effort !== null) {
293+
currentEffort = effort;
294+
defaultEffort = effort;
295+
}
296+
} else if (delta.kind === 1) {
297+
const k = delta.k;
298+
// Update to inputState.selectedModel — two-element path
299+
if (Array.isArray(k) && k[0] === 'inputState' && k[1] === 'selectedModel') {
300+
const effort = extractEffortFromModel(delta.v);
301+
if (effort !== null && effort !== currentEffort) {
302+
if (currentEffort !== null) { switchCount++; }
303+
currentEffort = effort;
304+
}
305+
}
306+
} else if (delta.kind === 2) {
307+
const k = delta.k;
308+
// New request being added: k = ["requests", <index>]
309+
if (Array.isArray(k) && k[0] === 'requests' && typeof k[1] === 'number' && currentEffort !== null) {
310+
const req = delta.v;
311+
if (req && typeof req === 'object') {
312+
const r = req as Record<string, unknown>;
313+
const requestId = typeof r['requestId'] === 'string' ? r['requestId'] : null;
314+
if (requestId) {
315+
effortByRequestId.set(requestId, currentEffort);
316+
}
317+
}
318+
}
319+
}
320+
}
321+
322+
return { effortByRequestId, defaultEffort, switchCount };
323+
}
324+
246325
/**
247326
* Extract per-request actual token usage from raw JSONL lines using regex.
248327
* Handles cases where lines with result data fail JSON.parse due to bad escape characters.

vscode-extension/src/types.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,16 @@ category?: 'copilot' | 'non-copilot';
122122
}
123123

124124
// New interfaces for usage analysis
125+
/** Per-level request counts for thinking effort (reasoning effort) tracking. */
126+
export interface ThinkingEffortUsage {
127+
/** Number of requests submitted at each effort level, keyed by level name (e.g. "low", "medium", "high"). */
128+
byEffort: { [effort: string]: number };
129+
/** Number of times the effort level changed within this session. */
130+
switchCount: number;
131+
/** The effort level active at the start of the session, or null if not available. */
132+
defaultEffort: string | null;
133+
}
134+
125135
export interface SessionUsageAnalysis {
126136
toolCalls: ToolCallUsage;
127137
modeUsage: ModeUsage;
@@ -138,6 +148,7 @@ export interface SessionUsageAnalysis {
138148
unknownRequests: number;
139149
totalRequests: number;
140150
};
151+
thinkingEffort?: ThinkingEffortUsage;
141152
editScope?: EditScopeUsage;
142153
applyUsage?: ApplyButtonUsage;
143154
sessionDuration?: SessionDurationData;
@@ -289,6 +300,12 @@ export interface UsageAnalysisPeriod {
289300
sessionDuration: SessionDurationData;
290301
conversationPatterns: ConversationPatterns;
291302
agentTypes: AgentTypeUsage;
303+
/** Aggregated thinking effort (reasoning effort) usage across all sessions in this period. */
304+
thinkingEffortUsage?: {
305+
byEffort: { [effort: string]: number };
306+
sessionCount: number; // sessions with effort data
307+
switchCount: number; // total effort switches across all sessions
308+
};
292309
}
293310

294311
// Detailed session file information for diagnostics view
@@ -337,6 +354,8 @@ export interface ChatTurn {
337354
outputTokensEstimate: number;
338355
thinkingTokensEstimate: number;
339356
actualUsage?: ActualUsage;
357+
/** Thinking effort level active when this turn was submitted (e.g. "low", "medium", "high"). */
358+
thinkingEffort?: string;
340359
}
341360

342361
// Full session log data for the log viewer
@@ -353,6 +372,8 @@ export interface SessionLogData {
353372
lastInteraction: string | null;
354373
turns: ChatTurn[];
355374
usageAnalysis?: SessionUsageAnalysis;
375+
/** Session-level actual token count from LLM API (e.g. session.shutdown in CLI format). 0 when unavailable. */
376+
actualTokens?: number;
356377
}
357378

358379
// Local summary type for customization files (mirrors webview/shared/contextRefUtils.ts)

vscode-extension/src/usageAnalysis.ts

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import {
2929
extractPerRequestUsageFromRawLines,
3030
createEmptyContextRefs,
3131
extractSubAgentData,
32+
buildReasoningEffortTimeline,
3233
} from './tokenEstimation';
3334
import {
3435
getModeType,
@@ -239,6 +240,17 @@ export function mergeUsageAnalysis(period: UsageAnalysisPeriod, analysis: Sessio
239240
period.agentTypes.workspaceAgent += analysis.agentTypes.workspaceAgent;
240241
period.agentTypes.other += analysis.agentTypes.other;
241242
}
243+
244+
if (analysis.thinkingEffort) {
245+
if (!period.thinkingEffortUsage) {
246+
period.thinkingEffortUsage = { byEffort: {}, sessionCount: 0, switchCount: 0 };
247+
}
248+
period.thinkingEffortUsage.sessionCount++;
249+
period.thinkingEffortUsage.switchCount += analysis.thinkingEffort.switchCount;
250+
for (const [effort, count] of Object.entries(analysis.thinkingEffort.byEffort)) {
251+
period.thinkingEffortUsage.byEffort[effort] = (period.thinkingEffortUsage.byEffort[effort] || 0) + count;
252+
}
253+
}
242254
}
243255

244256
/**
@@ -1362,6 +1374,22 @@ export async function analyzeSessionUsage(deps: UsageAnalysisDeps, sessionFile:
13621374
applyModelTierClassification(deps, uniqueModels, models, analysis);
13631375
}
13641376

1377+
// Extract thinking effort (reasoning effort) from delta lines
1378+
{
1379+
const { effortByRequestId, defaultEffort, switchCount: effortSwitchCount } = buildReasoningEffortTimeline(lines);
1380+
if (defaultEffort !== null || effortByRequestId.size > 0) {
1381+
const byEffort: { [effort: string]: number } = {};
1382+
for (const [, effort] of effortByRequestId) {
1383+
byEffort[effort] = (byEffort[effort] || 0) + 1;
1384+
}
1385+
// If we have a defaultEffort but no per-request data, record it as the session default
1386+
if (effortByRequestId.size === 0 && defaultEffort !== null) {
1387+
byEffort[defaultEffort] = requests.length;
1388+
}
1389+
analysis.thinkingEffort = { byEffort, switchCount: effortSwitchCount, defaultEffort };
1390+
}
1391+
}
1392+
13651393
// Derive conversation patterns from mode usage before returning
13661394
deriveConversationPatterns(analysis);
13671395

@@ -1370,11 +1398,36 @@ export async function analyzeSessionUsage(deps: UsageAnalysisDeps, sessionFile:
13701398

13711399
// Non-delta JSONL (Copilot CLI format) - process line-by-line
13721400
let sessionMode = 'ask';
1401+
let cliDefaultModel = 'gpt-4o';
1402+
let cliDefaultEffort: string | null = null;
1403+
let cliRequestCount = 0;
1404+
const cliEffortByRequest: { [effort: string]: number } = {};
13731405
for (const line of lines) {
13741406
if (!line.trim()) { continue; }
13751407
try {
13761408
const event = JSON.parse(line);
13771409

1410+
// Copilot CLI session.start carries model + reasoningEffort
1411+
if (event.type === 'session.start' && event.data) {
1412+
if (typeof event.data.selectedModel === 'string') {
1413+
cliDefaultModel = event.data.selectedModel;
1414+
}
1415+
if (typeof event.data.reasoningEffort === 'string') {
1416+
cliDefaultEffort = event.data.reasoningEffort;
1417+
}
1418+
}
1419+
1420+
// Count user.message requests and accumulate effort counts
1421+
if (event.type === 'user.message') {
1422+
cliRequestCount++;
1423+
const effort = typeof event.data?.reasoningEffort === 'string'
1424+
? event.data.reasoningEffort
1425+
: cliDefaultEffort;
1426+
if (effort) {
1427+
cliEffortByRequest[effort] = (cliEffortByRequest[effort] || 0) + 1;
1428+
}
1429+
}
1430+
13781431
// Handle VS Code incremental format - detect mode from session header
13791432
if (event.kind === 0 && event.v?.inputState?.mode) {
13801433
sessionMode = getModeType(event.v.inputState.mode);
@@ -1517,6 +1570,15 @@ export async function analyzeSessionUsage(deps: UsageAnalysisDeps, sessionFile:
15171570
// Skip malformed lines
15181571
}
15191572
}
1573+
1574+
// Store CLI thinking effort data if available
1575+
if (cliDefaultEffort !== null || Object.keys(cliEffortByRequest).length > 0) {
1576+
const byEffort = Object.keys(cliEffortByRequest).length > 0
1577+
? cliEffortByRequest
1578+
: (cliDefaultEffort !== null ? { [cliDefaultEffort]: cliRequestCount } : {});
1579+
analysis.thinkingEffort = { byEffort, switchCount: 0, defaultEffort: cliDefaultEffort };
1580+
}
1581+
15201582
// Calculate model switching for JSONL files before returning
15211583
await calculateModelSwitching(deps, sessionFile, analysis, fileContent);
15221584

@@ -1692,6 +1754,11 @@ export async function getModelUsageFromSession(deps: Pick<UsageAnalysisDeps, 'wa
16921754
sessionState = applyDelta(sessionState, event);
16931755
}
16941756

1757+
// Copilot CLI session.start carries the selected model
1758+
if (event.type === 'session.start' && typeof event.data?.selectedModel === 'string') {
1759+
defaultModel = event.data.selectedModel;
1760+
}
1761+
16951762
// Handle VS Code incremental format - extract model from session header (kind: 0)
16961763
// The schema has v.selectedModel.identifier or v.selectedModel.metadata.id
16971764
if (event.kind === 0) {

0 commit comments

Comments
 (0)