rajbos
diff --git a/‎.github/instructions/vscode-extension.instructions.md‎
Lines changed: 5 additions & 1 deletion b/‎.github/instructions/vscode-extension.instructions.md‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎vscode-extension/src/extension.ts‎
Lines changed: 34 additions & 7 deletions b/‎vscode-extension/src/extension.ts‎
Lines changed: 34 additions & 7 deletions
diff --git a/‎vscode-extension/src/tokenEstimation.ts‎
Lines changed: 79 additions & 0 deletions b/‎vscode-extension/src/tokenEstimation.ts‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎vscode-extension/src/types.ts‎
Lines changed: 21 additions & 0 deletions b/‎vscode-extension/src/types.ts‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎vscode-extension/src/usageAnalysis.ts‎
Lines changed: 67 additions & 0 deletions b/‎vscode-extension/src/usageAnalysis.ts‎
Lines changed: 67 additions & 0 deletions
@@ -48,7 +48,11 @@ The entire extension's logic is contained within the `CopilotTokenTracker` class
 - **Watch Mode**: For active development, use `npm run watch` from `vscode-extension/`. This will automatically recompile the extension on file changes.
 - **Testing/Debugging**: Press `F5` in VS Code to open the Extension Development Host. This will launch a new VS Code window with the extension running. `console.log` statements from `vscode-extension/src/extension.ts` will appear in the Developer Tools console of this new window (Help > Toggle Developer Tools).
 
-**Important build guidance:** After making changes to source code or related files (TypeScript, JavaScript, JSON, or other code files used by the extension), always run `npm run compile` from `vscode-extension/` to validate that the project still builds and lints cleanly before opening a pull request or releasing. You do not need to run the full compile step for documentation-only changes (Markdown files), but you should run it after any edits that touch source, configuration, or JSON data files.
+**Important build guidance:** After making changes to source code or related files (TypeScript, JavaScript, JSON, or other code files used by the extension), always run both `npm ci` and then `npm run compile` from `vscode-extension/` to validate that the project still builds and lints cleanly before opening a pull request or releasing. Also run the unit tests with `npm run test:node` to catch any regressions. You do not need to run the full compile step for documentation-only changes (Markdown files), but you should run it after any edits that touch source, configuration, or JSON data files.
+
+**Always use `npm ci` (not `npm install`) when validating a build** — `npm ci` installs from the lockfile exactly, mirroring what CI does, and will catch any dependency drift. Use `npm install` only when intentionally adding or updating packages.
+
+> ⚠️ **Common mistake**: The `edit` tool's old_str/new_str replacement can accidentally drop comment delimiters (e.g. `/**` opening a JSDoc block) when the match boundary falls exactly at that line. After editing `tokenEstimation.ts` or any file with JSDoc comments, always verify the file compiles before committing.
 
 ## Development Guidelines
 
 
@@ -1,4 +1,4 @@
-import * as vscode from 'vscode';
+import * as vscode from 'vscode';
 import * as fs from 'fs';
 import * as path from 'path';
 import * as os from 'os';
@@ -71,6 +71,7 @@ import {
   getTotalTokensFromModelUsage as _getTotalTokensFromModelUsage,
   reconstructJsonlStateAsync as _reconstructJsonlStateAsync,
   extractSubAgentData as _extractSubAgentData,
+  buildReasoningEffortTimeline as _buildReasoningEffortTimeline,
 } from './tokenEstimation';
 import { SessionDiscovery } from './sessionDiscovery';
 import { CacheManager } from './cacheManager';
@@ -167,7 +168,7 @@ type RepoPrStatsResult = {
 
 class CopilotTokenTracker implements vscode.Disposable {
 	// Cache version - increment this when making changes that require cache invalidation
-	private static readonly CACHE_VERSION = 36; // Add first-user-message fallback title for untitled Copilot CLI sessions
+	private static readonly CACHE_VERSION = 37; // Add thinking effort (reasoning effort) tracking
 	// Maximum length for displaying workspace IDs in diagnostics/customization matrix
 	private static readonly WORKSPACE_ID_DISPLAY_LENGTH = 8;
 
@@ -4034,6 +4035,9 @@ class CopilotTokenTracker implements vscode.Disposable {
 					// blocking the extension host event loop on large files.
 					const { sessionState } = await _reconstructJsonlStateAsync(lines);
 
+					// Build per-request effort map from delta lines
+					const { effortByRequestId } = _buildReasoningEffortTimeline(lines);
+
 					// Extract session-level info
 					let sessionMode: 'ask' | 'edit' | 'agent' | 'plan' | 'customAgent' = 'ask';
 					let currentModel: string | null = null;
@@ -4119,14 +4123,29 @@ class CopilotTokenTracker implements vscode.Disposable {
 						inputTokensEstimate: this.estimateTokensFromText(userMessage, requestModel),
 						outputTokensEstimate: this.estimateTokensFromText(responseText, requestModel),
 						thinkingTokensEstimate: this.estimateTokensFromText(thinkingText, requestModel),
-						actualUsage
+						actualUsage,
+						thinkingEffort: effortByRequestId.get(request.requestId)
 					};
 
 					turns.push(turn);
 				}
 			} else {
 			// Non-delta JSONL (Copilot CLI format)
 			let turnNumber = 0;
+			let cliSessionModel = 'gpt-4o';
+			let cliSessionEffort: string | undefined;
+
+			// Pre-scan for session.start to extract default model and effort
+			for (const line of lines) {
+				try {
+					const ev = JSON.parse(line);
+					if (ev.type === 'session.start' && ev.data) {
+						if (typeof ev.data.selectedModel === 'string') { cliSessionModel = ev.data.selectedModel; }
+						if (typeof ev.data.reasoningEffort === 'string') { cliSessionEffort = ev.data.reasoningEffort; }
+						break;
+					}
+				} catch { /* skip */ }
+			}
 
 			for (const line of lines) {
 				try {
@@ -4138,19 +4157,24 @@ class CopilotTokenTracker implements vscode.Disposable {
 						const contextRefs = this.createEmptyContextRefs();
 						const userMessage = event.data.content;
 						this.analyzeContextReferences(userMessage, contextRefs);
+						const turnModel = event.model || event.data?.model || cliSessionModel;
+						const turnEffort: string | undefined = typeof event.data?.reasoningEffort === 'string'
+							? event.data.reasoningEffort
+							: cliSessionEffort;
 						const turn: ChatTurn = {
 							turnNumber,
 							timestamp: event.timestamp ? new Date(event.timestamp).toISOString() : null,
 							mode: 'agent', // CLI is typically agent mode
 							userMessage,
 							assistantResponse: '',
-							model: event.model || 'gpt-4o',
+							model: turnModel,
 							toolCalls: [],
 							contextReferences: contextRefs,
 							mcpTools: [],
-							inputTokensEstimate: this.estimateTokensFromText(userMessage, event.model || 'gpt-4o'),
+							inputTokensEstimate: this.estimateTokensFromText(userMessage, turnModel),
 							outputTokensEstimate: 0,
-							thinkingTokensEstimate: 0
+							thinkingTokensEstimate: 0,
+							thinkingEffort: turnEffort
 						};
 						turns.push(turn);
 					}
@@ -4295,6 +4319,8 @@ class CopilotTokenTracker implements vscode.Disposable {
 			this.warn(`Error loading usage analysis for ${sessionFile}: ${usageError}`);
 		}
 
+		const sessionCache = this.getCachedSessionData(sessionFile);
+
 		return {
 			file: details.file,
 			title: details.title || null,
@@ -4307,7 +4333,8 @@ class CopilotTokenTracker implements vscode.Disposable {
 			firstInteraction: details.firstInteraction,
 			lastInteraction: details.lastInteraction,
 			turns,
-			usageAnalysis
+			usageAnalysis,
+			actualTokens: sessionCache?.actualTokens || 0
 		};
 	}
 
 
@@ -243,6 +243,85 @@ export async function reconstructJsonlStateAsync(lines: string[], yieldInterval
 	return { sessionState, isDeltaBased };
 }
 
+/**
+ * Build a map from requestId → reasoning effort level by scanning delta-based JSONL lines.
+ *
+ * The effort level is taken from `configurationSchema.properties.reasoningEffort.default`
+ * on the active selectedModel at the time each request is added to the session.
+ *
+ * Returns: Map<requestId, effort> plus the default effort at session start.
+ */
+export function buildReasoningEffortTimeline(lines: string[]): {
+  effortByRequestId: Map<string, string>;
+  defaultEffort: string | null;
+  switchCount: number;
+} {
+  const effortByRequestId = new Map<string, string>();
+  let currentEffort: string | null = null;
+  let defaultEffort: string | null = null;
+  let switchCount = 0;
+
+  function extractEffortFromModel(model: unknown): string | null {
+    if (!model || typeof model !== 'object') { return null; }
+    const m = model as Record<string, unknown>;
+    const metadata = m['metadata'];
+    if (!metadata || typeof metadata !== 'object') { return null; }
+    const meta = metadata as Record<string, unknown>;
+    const schema = meta['configurationSchema'];
+    if (!schema || typeof schema !== 'object') { return null; }
+    const s = schema as Record<string, unknown>;
+    const props = s['properties'];
+    if (!props || typeof props !== 'object') { return null; }
+    const p = props as Record<string, unknown>;
+    const re = p['reasoningEffort'];
+    if (!re || typeof re !== 'object') { return null; }
+    const r = re as Record<string, unknown>;
+    return typeof r['default'] === 'string' ? r['default'] : null;
+  }
+
+  for (const line of lines) {
+    if (!line.trim()) { continue; }
+    let delta: any;
+    try { delta = JSON.parse(line); } catch { continue; }
+    if (typeof delta.kind !== 'number') { continue; }
+
+    if (delta.kind === 0) {
+      // Initial state: extract model from inputState.selectedModel
+      const model = delta.v?.inputState?.selectedModel;
+      const effort = extractEffortFromModel(model);
+      if (effort !== null) {
+        currentEffort = effort;
+        defaultEffort = effort;
+      }
+    } else if (delta.kind === 1) {
+      const k = delta.k;
+      // Update to inputState.selectedModel — two-element path
+      if (Array.isArray(k) && k[0] === 'inputState' && k[1] === 'selectedModel') {
+        const effort = extractEffortFromModel(delta.v);
+        if (effort !== null && effort !== currentEffort) {
+          if (currentEffort !== null) { switchCount++; }
+          currentEffort = effort;
+        }
+      }
+    } else if (delta.kind === 2) {
+      const k = delta.k;
+      // New request being added: k = ["requests", <index>]
+      if (Array.isArray(k) && k[0] === 'requests' && typeof k[1] === 'number' && currentEffort !== null) {
+        const req = delta.v;
+        if (req && typeof req === 'object') {
+          const r = req as Record<string, unknown>;
+          const requestId = typeof r['requestId'] === 'string' ? r['requestId'] : null;
+          if (requestId) {
+            effortByRequestId.set(requestId, currentEffort);
+          }
+        }
+      }
+    }
+  }
+
+  return { effortByRequestId, defaultEffort, switchCount };
+}
+
 /**
  * Extract per-request actual token usage from raw JSONL lines using regex.
  * Handles cases where lines with result data fail JSON.parse due to bad escape characters.
 
@@ -122,6 +122,16 @@ category?: 'copilot' | 'non-copilot';
 }
 
 // New interfaces for usage analysis
+/** Per-level request counts for thinking effort (reasoning effort) tracking. */
+export interface ThinkingEffortUsage {
+  /** Number of requests submitted at each effort level, keyed by level name (e.g. "low", "medium", "high"). */
+  byEffort: { [effort: string]: number };
+  /** Number of times the effort level changed within this session. */
+  switchCount: number;
+  /** The effort level active at the start of the session, or null if not available. */
+  defaultEffort: string | null;
+}
+
 export interface SessionUsageAnalysis {
   toolCalls: ToolCallUsage;
   modeUsage: ModeUsage;
@@ -138,6 +148,7 @@ export interface SessionUsageAnalysis {
     unknownRequests: number;
     totalRequests: number;
   };
+  thinkingEffort?: ThinkingEffortUsage;
   editScope?: EditScopeUsage;
   applyUsage?: ApplyButtonUsage;
   sessionDuration?: SessionDurationData;
@@ -289,6 +300,12 @@ export interface UsageAnalysisPeriod {
   sessionDuration: SessionDurationData;
   conversationPatterns: ConversationPatterns;
   agentTypes: AgentTypeUsage;
+  /** Aggregated thinking effort (reasoning effort) usage across all sessions in this period. */
+  thinkingEffortUsage?: {
+    byEffort: { [effort: string]: number };
+    sessionCount: number; // sessions with effort data
+    switchCount: number;  // total effort switches across all sessions
+  };
 }
 
 // Detailed session file information for diagnostics view
@@ -337,6 +354,8 @@ export interface ChatTurn {
   outputTokensEstimate: number;
   thinkingTokensEstimate: number;
   actualUsage?: ActualUsage;
+  /** Thinking effort level active when this turn was submitted (e.g. "low", "medium", "high"). */
+  thinkingEffort?: string;
 }
 
 // Full session log data for the log viewer
@@ -353,6 +372,8 @@ export interface SessionLogData {
   lastInteraction: string | null;
   turns: ChatTurn[];
   usageAnalysis?: SessionUsageAnalysis;
+  /** Session-level actual token count from LLM API (e.g. session.shutdown in CLI format). 0 when unavailable. */
+  actualTokens?: number;
 }
 
 // Local summary type for customization files (mirrors webview/shared/contextRefUtils.ts)
 
@@ -29,6 +29,7 @@ import {
 	extractPerRequestUsageFromRawLines,
 	createEmptyContextRefs,
 	extractSubAgentData,
+	buildReasoningEffortTimeline,
 } from './tokenEstimation';
 import {
 	getModeType,
@@ -239,6 +240,17 @@ export function mergeUsageAnalysis(period: UsageAnalysisPeriod, analysis: Sessio
 		period.agentTypes.workspaceAgent += analysis.agentTypes.workspaceAgent;
 		period.agentTypes.other += analysis.agentTypes.other;
 	}
+
+	if (analysis.thinkingEffort) {
+		if (!period.thinkingEffortUsage) {
+			period.thinkingEffortUsage = { byEffort: {}, sessionCount: 0, switchCount: 0 };
+		}
+		period.thinkingEffortUsage.sessionCount++;
+		period.thinkingEffortUsage.switchCount += analysis.thinkingEffort.switchCount;
+		for (const [effort, count] of Object.entries(analysis.thinkingEffort.byEffort)) {
+			period.thinkingEffortUsage.byEffort[effort] = (period.thinkingEffortUsage.byEffort[effort] || 0) + count;
+		}
+	}
 }
 
 /**
@@ -1362,6 +1374,22 @@ export async function analyzeSessionUsage(deps: UsageAnalysisDeps, sessionFile:
 					applyModelTierClassification(deps, uniqueModels, models, analysis);
 				}
 
+				// Extract thinking effort (reasoning effort) from delta lines
+				{
+					const { effortByRequestId, defaultEffort, switchCount: effortSwitchCount } = buildReasoningEffortTimeline(lines);
+					if (defaultEffort !== null || effortByRequestId.size > 0) {
+						const byEffort: { [effort: string]: number } = {};
+						for (const [, effort] of effortByRequestId) {
+							byEffort[effort] = (byEffort[effort] || 0) + 1;
+						}
+						// If we have a defaultEffort but no per-request data, record it as the session default
+						if (effortByRequestId.size === 0 && defaultEffort !== null) {
+							byEffort[defaultEffort] = requests.length;
+						}
+						analysis.thinkingEffort = { byEffort, switchCount: effortSwitchCount, defaultEffort };
+					}
+				}
+
 				// Derive conversation patterns from mode usage before returning
 				deriveConversationPatterns(analysis);
 
@@ -1370,11 +1398,36 @@ export async function analyzeSessionUsage(deps: UsageAnalysisDeps, sessionFile:
 
 			// Non-delta JSONL (Copilot CLI format) - process line-by-line
 			let sessionMode = 'ask';
+			let cliDefaultModel = 'gpt-4o';
+			let cliDefaultEffort: string | null = null;
+			let cliRequestCount = 0;
+			const cliEffortByRequest: { [effort: string]: number } = {};
 			for (const line of lines) {
 				if (!line.trim()) { continue; }
 				try {
 					const event = JSON.parse(line);
 
+					// Copilot CLI session.start carries model + reasoningEffort
+					if (event.type === 'session.start' && event.data) {
+						if (typeof event.data.selectedModel === 'string') {
+							cliDefaultModel = event.data.selectedModel;
+						}
+						if (typeof event.data.reasoningEffort === 'string') {
+							cliDefaultEffort = event.data.reasoningEffort;
+						}
+					}
+
+					// Count user.message requests and accumulate effort counts
+					if (event.type === 'user.message') {
+						cliRequestCount++;
+						const effort = typeof event.data?.reasoningEffort === 'string'
+							? event.data.reasoningEffort
+							: cliDefaultEffort;
+						if (effort) {
+							cliEffortByRequest[effort] = (cliEffortByRequest[effort] || 0) + 1;
+						}
+					}
+
 					// Handle VS Code incremental format - detect mode from session header
 					if (event.kind === 0 && event.v?.inputState?.mode) {
 						sessionMode = getModeType(event.v.inputState.mode);
@@ -1517,6 +1570,15 @@ export async function analyzeSessionUsage(deps: UsageAnalysisDeps, sessionFile:
 					// Skip malformed lines
 				}
 			}
+
+			// Store CLI thinking effort data if available
+			if (cliDefaultEffort !== null || Object.keys(cliEffortByRequest).length > 0) {
+				const byEffort = Object.keys(cliEffortByRequest).length > 0
+					? cliEffortByRequest
+					: (cliDefaultEffort !== null ? { [cliDefaultEffort]: cliRequestCount } : {});
+				analysis.thinkingEffort = { byEffort, switchCount: 0, defaultEffort: cliDefaultEffort };
+			}
+
 			// Calculate model switching for JSONL files before returning
 			await calculateModelSwitching(deps, sessionFile, analysis, fileContent);
 
@@ -1692,6 +1754,11 @@ export async function getModelUsageFromSession(deps: Pick<UsageAnalysisDeps, 'wa
 						sessionState = applyDelta(sessionState, event);
 					}
 
+					// Copilot CLI session.start carries the selected model
+					if (event.type === 'session.start' && typeof event.data?.selectedModel === 'string') {
+						defaultModel = event.data.selectedModel;
+					}
+
 					// Handle VS Code incremental format - extract model from session header (kind: 0)
 					// The schema has v.selectedModel.identifier or v.selectedModel.metadata.id
 					if (event.kind === 0) {