fix: eliminate repeated JSONL delta reconstruction that starves extension host (#565)

tianzheng-zhou · rajbos · web-flow · commit 781a92155fcd · 2026-04-10T07:23:26.000Z
Root cause: the extension host's single-threaded event loop was blocked by
repeated synchronous split+JSON.parse+applyDelta loops on the same large
delta-based JSONL files across multiple analysis helpers, triggering the
VS Code unresponsive watchdog and crash-restart loop.

Three fixes:

1. usageAnalysis.ts: the delta-based JSONL early-return branch in
   analyzeSessionUsage now computes model switching inline from the
   already-reconstructed sessionState instead of calling
   calculateModelSwitching (which re-read the file and called
   getModelUsageFromSession for yet another re-read). The non-delta
   JSONL and regular JSON paths now pass preloadedContent through to
   calculateModelSwitching and trackEnhancedMetrics to avoid re-reads.

2. extension.ts: removed the hidden pre-warm of calculateUsageAnalysisStats
   that ran even when the analysis panel was not open. This triggered
   workspace customization scans and JSONL processing on every 5-minute
   timer tick, amplifying the event-loop starvation on startup.

3. extension.ts: replaced hand-rolled synchronous applyDelta loops in the
   session details and log viewer paths with reconstructJsonlStateAsync,
   a new helper in tokenEstimation.ts that yields to the event loop every
   500 lines to prevent blocking.

Co-authored-by: Rob Bos &lt;rajbos@users.noreply.github.com&gt;
diff --git a/vscode-extension/src/extension.ts b/vscode-extension/src/extension.ts
@@ -67,6 +67,7 @@ import {
   calculateEstimatedCost as _calculateEstimatedCost,
   createEmptyContextRefs as _createEmptyContextRefs,
   getTotalTokensFromModelUsage as _getTotalTokensFromModelUsage,
+  reconstructJsonlStateAsync as _reconstructJsonlStateAsync,
 } from './tokenEstimation';
 import { SessionDiscovery } from './sessionDiscovery';
 import { CacheManager } from './cacheManager';
@@ -1074,8 +1075,10 @@ class CopilotTokenTracker implements vscode.Disposable {
 					this.analysisPanel.webview.html = this.getUsageAnalysisHtml(this.analysisPanel.webview, analysisStats);
 				}
 			} else {
-				// Pre-populate the cache even when panel isn't open, so first open is fast
-				await this.calculateUsageAnalysisStats(false);
+				// Skip pre-warming usage analysis when the panel isn't open.
+				// calculateUsageAnalysisStats triggers workspace customization scans
+				// and JSONL reconstruction which can starve the extension host event loop
+				// on startup, amplifying the crash-loop risk.
 			}
 
 			// If the maturity panel is open, update its content.
@@ -2927,16 +2930,9 @@ class CopilotTokenTracker implements vscode.Disposable {
 				}
 
 				if (isDeltaBased) {
-					// Delta-based format: reconstruct full state first, then extract details
-					let sessionState: any = {};
-					for (const line of lines) {
-						try {
-							const delta = JSON.parse(line);
-							sessionState = this.applyDelta(sessionState, delta);
-						} catch {
-							// Skip invalid lines
-						}
-					}
+					// Delta-based format: reconstruct full state asynchronously to avoid
+					// blocking the extension host event loop on large files.
+					const { sessionState } = await _reconstructJsonlStateAsync(lines);
 
 					// Extract session metadata from reconstructed state
 					if (sessionState.creationDate) {
@@ -3443,16 +3439,9 @@ class CopilotTokenTracker implements vscode.Disposable {
 				}
 
 				if (isDeltaBased) {
-					// Delta-based format: reconstruct full state first, then extract turns
-					let sessionState: any = {};
-					for (const line of lines) {
-						try {
-							const delta = JSON.parse(line);
-							sessionState = this.applyDelta(sessionState, delta);
-						} catch {
-							// Skip invalid lines
-						}
-					}
+					// Delta-based format: reconstruct full state asynchronously to avoid
+					// blocking the extension host event loop on large files.
+					const { sessionState } = await _reconstructJsonlStateAsync(lines);
 
 					// Extract session-level info
 					let sessionMode: 'ask' | 'edit' | 'agent' | 'plan' | 'customAgent' = 'ask';
diff --git a/vscode-extension/src/tokenEstimation.ts b/vscode-extension/src/tokenEstimation.ts
@@ -150,6 +150,33 @@ export function estimateTokensFromJsonlSession(fileContent: string): { tokens: n
 	return { tokens: totalTokens + totalThinkingTokens, thinkingTokens: totalThinkingTokens, actualTokens: finalActualTokens };
 }
 
+/**
+ * Asynchronously reconstruct the full session state from delta-based JSONL lines.
+ * Yields to the event loop every `yieldInterval` lines to prevent starving the
+ * extension host's single-threaded event loop on large files.
+ */
+export async function reconstructJsonlStateAsync(lines: string[], yieldInterval = 500): Promise<{ sessionState: any; isDeltaBased: boolean }> {
+	let sessionState: any = {};
+	let isDeltaBased = false;
+	for (let i = 0; i < lines.length; i++) {
+		const line = lines[i];
+		if (!line.trim()) { continue; }
+		try {
+			const delta = JSON.parse(line);
+			if (typeof delta.kind === 'number') {
+				isDeltaBased = true;
+				sessionState = applyDelta(sessionState, delta);
+			}
+		} catch {
+			// Skip invalid lines
+		}
+		if (isDeltaBased && i > 0 && i % yieldInterval === 0) {
+			await new Promise<void>(resolve => setTimeout(resolve, 0));
+		}
+	}
+	return { sessionState, isDeltaBased };
+}
+
 /**
  * Extract per-request actual token usage from raw JSONL lines using regex.
  * Handles cases where lines with result data fail JSON.parse due to bad escape characters.
diff --git a/vscode-extension/src/usageAnalysis.ts b/vscode-extension/src/usageAnalysis.ts
@@ -556,11 +556,11 @@ function applyModelTierClassification(
  * Calculate model switching statistics for a session file.
  * This method updates the analysis.modelSwitching field in place.
  */
-export async function calculateModelSwitching(deps: Pick<UsageAnalysisDeps, 'warn' | 'modelPricing' | 'openCode' | 'continue_' | 'tokenEstimators'>, sessionFile: string, analysis: SessionUsageAnalysis): Promise<void> {
+export async function calculateModelSwitching(deps: Pick<UsageAnalysisDeps, 'warn' | 'modelPricing' | 'openCode' | 'continue_' | 'tokenEstimators'>, sessionFile: string, analysis: SessionUsageAnalysis, preloadedContent?: string): Promise<void> {
 	try {
 		// Use non-cached method to avoid circular dependency
 		// (getSessionFileDataCached -> analyzeSessionUsage -> getModelUsageFromSessionCached -> getSessionFileDataCached)
-		const modelUsage = await getModelUsageFromSession(deps, sessionFile);
+		const modelUsage = await getModelUsageFromSession(deps, sessionFile, preloadedContent);
 		const modelCount = modelUsage ? Object.keys(modelUsage).length : 0;
 
 		// Skip if modelUsage is undefined or empty (not a valid session file)
@@ -593,7 +593,7 @@ export async function calculateModelSwitching(deps: Pick<UsageAnalysisDeps, 'war
 		analysis.modelSwitching.hasMixedTiers = standardModels.length > 0 && premiumModels.length > 0;
 
 		// Count requests per tier and model switches by examining request sequence
-		const fileContent = await fs.promises.readFile(sessionFile, 'utf8');
+		const fileContent = preloadedContent ?? await fs.promises.readFile(sessionFile, 'utf8');
 		// Check if this is a UUID-only file (new Copilot CLI format)
 		if (isUuidPointerFile(fileContent)) {
 			return;
@@ -719,9 +719,9 @@ export async function calculateModelSwitching(deps: Pick<UsageAnalysisDeps, 'war
  * - Conversation patterns (multi-turn sessions)
  * - Agent type usage
  */
-export async function trackEnhancedMetrics(deps: Pick<UsageAnalysisDeps, 'warn'>, sessionFile: string, analysis: SessionUsageAnalysis): Promise<void> {
+export async function trackEnhancedMetrics(deps: Pick<UsageAnalysisDeps, 'warn'>, sessionFile: string, analysis: SessionUsageAnalysis, preloadedContent?: string): Promise<void> {
 	try {
-		const fileContent = await fs.promises.readFile(sessionFile, 'utf8');
+		const fileContent = preloadedContent ?? await fs.promises.readFile(sessionFile, 'utf8');
 
 		// Check if this is a UUID-only file (new Copilot CLI format)
 		if (isUuidPointerFile(fileContent)) {
@@ -1280,8 +1280,42 @@ export async function analyzeSessionUsage(deps: UsageAnalysisDeps, sessionFile:
 					}
 				}
 
-				// Calculate model switching for delta-based JSONL files
-				await calculateModelSwitching(deps, sessionFile, analysis);
+				// Compute model switching inline from the already-reconstructed state
+				// to avoid re-reading and re-parsing the file in calculateModelSwitching.
+				{
+					// Derive the session-level default model from reconstructed state,
+					// mirroring the selectedModel extraction used in the line-by-line path.
+					const sessionDefaultModel = (
+						sessionState.selectedModel?.identifier ||
+						sessionState.selectedModel?.metadata?.id ||
+						sessionState.inputState?.selectedModel?.metadata?.id ||
+						'gpt-4o'
+					).replace(/^copilot\//, '');
+
+					const models: string[] = [];
+					for (const req of requests) {
+						if (!req || !req.requestId) { continue; }
+						let reqModel = sessionDefaultModel;
+						if (req.modelId) {
+							reqModel = req.modelId.replace(/^copilot\//, '');
+						} else if (req.result?.metadata?.modelId) {
+							reqModel = req.result.metadata.modelId.replace(/^copilot\//, '');
+						} else if (req.result?.details) {
+							reqModel = getModelFromRequest(req, deps.modelPricing);
+						}
+						models.push(reqModel);
+					}
+					const uniqueModels = [...new Set(models)];
+					analysis.modelSwitching.uniqueModels = uniqueModels;
+					analysis.modelSwitching.modelCount = uniqueModels.length;
+					analysis.modelSwitching.totalRequests = models.length;
+					let switchCount = 0;
+					for (let mi = 1; mi < models.length; mi++) {
+						if (models[mi] !== models[mi - 1]) { switchCount++; }
+					}
+					analysis.modelSwitching.switchCount = switchCount;
+					applyModelTierClassification(deps, uniqueModels, models, analysis);
+				}
 
 				// Derive conversation patterns from mode usage before returning
 				deriveConversationPatterns(analysis);
@@ -1439,7 +1473,7 @@ export async function analyzeSessionUsage(deps: UsageAnalysisDeps, sessionFile:
 				}
 			}
 			// Calculate model switching for JSONL files before returning
-			await calculateModelSwitching(deps, sessionFile, analysis);
+			await calculateModelSwitching(deps, sessionFile, analysis, fileContent);
 
 			// Derive conversation patterns from mode usage before returning
 			deriveConversationPatterns(analysis);
@@ -1531,16 +1565,16 @@ export async function analyzeSessionUsage(deps: UsageAnalysisDeps, sessionFile:
 				}
 			}
 		}
+
+		// Calculate model switching statistics from session (pass preloaded content to avoid re-reading)
+		await calculateModelSwitching(deps, sessionFile, analysis, fileContent);
+
+		// Track new metrics: edit scope, apply usage, session duration, conversation patterns, agent types
+		await trackEnhancedMetrics(deps, sessionFile, analysis, fileContent);
 	} catch (error) {
 		deps.warn(`Error analyzing session usage from ${sessionFile}: ${error}`);
 	}
 
-	// Calculate model switching statistics from session
-	await calculateModelSwitching(deps, sessionFile, analysis);
-
-	// Track new metrics: edit scope, apply usage, session duration, conversation patterns, agent types
-	await trackEnhancedMetrics(deps, sessionFile, analysis);
-
 	return analysis;
 }