Skip to content

Commit 5c10ad3

Browse files
authored
fix: timeout handling for JSON-RPC requests in NativePythonFinder (#1154)
Fixes #1140 - Discovery stuck indefinitely Fixes #1081 - VS Code stuck on "Discovering Python Interpreters" on fresh Windows install Fixes #956 - Perpetually "Refreshing virtual environments"
1 parent 9be0201 commit 5c10ad3

File tree

1 file changed

+235
-20
lines changed

1 file changed

+235
-20
lines changed

src/managers/common/nativePythonFinder.ts

Lines changed: 235 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
import { ChildProcess } from 'child_process';
12
import * as fs from 'fs-extra';
23
import * as path from 'path';
34
import { PassThrough } from 'stream';
4-
import { Disposable, ExtensionContext, LogOutputChannel, Uri } from 'vscode';
5+
import { CancellationTokenSource, Disposable, ExtensionContext, LogOutputChannel, Uri } from 'vscode';
56
import * as rpc from 'vscode-jsonrpc/node';
67
import { PythonProjectApi } from '../../api';
78
import { spawnProcess } from '../../common/childProcess.apis';
@@ -14,6 +15,15 @@ import { createRunningWorkerPool, WorkerPool } from '../../common/utils/workerPo
1415
import { getConfiguration, getWorkspaceFolders } from '../../common/workspace.apis';
1516
import { noop } from './utils';
1617

18+
// Timeout constants for JSON-RPC requests (in milliseconds)
19+
const CONFIGURE_TIMEOUT_MS = 30_000; // 30 seconds for configuration
20+
const REFRESH_TIMEOUT_MS = 120_000; // 2 minutes for full refresh
21+
const RESOLVE_TIMEOUT_MS = 30_000; // 30 seconds for single resolve
22+
23+
// Restart/recovery constants
24+
const MAX_RESTART_ATTEMPTS = 3;
25+
const RESTART_BACKOFF_BASE_MS = 1_000; // 1 second base, exponential: 1s, 2s, 4s
26+
1727
export async function getNativePythonToolsPath(): Promise<string> {
1828
const envsExt = getExtension(ENVS_EXTENSION_ID);
1929
if (envsExt) {
@@ -104,11 +114,48 @@ interface RefreshOptions {
104114
searchPaths?: string[];
105115
}
106116

117+
/**
118+
* Wraps a JSON-RPC sendRequest call with a timeout.
119+
* @param connection The JSON-RPC connection
120+
* @param method The RPC method name
121+
* @param params The parameters to send
122+
* @param timeoutMs Timeout in milliseconds
123+
* @returns The result of the request
124+
* @throws Error if the request times out
125+
*/
126+
async function sendRequestWithTimeout<T>(
127+
connection: rpc.MessageConnection,
128+
method: string,
129+
params: unknown,
130+
timeoutMs: number,
131+
): Promise<T> {
132+
const cts = new CancellationTokenSource();
133+
const timeoutPromise = new Promise<never>((_, reject) => {
134+
const timer = setTimeout(() => {
135+
cts.cancel();
136+
reject(new Error(`Request '${method}' timed out after ${timeoutMs}ms`));
137+
}, timeoutMs);
138+
// Clear timeout if the CancellationTokenSource is disposed
139+
cts.token.onCancellationRequested(() => clearTimeout(timer));
140+
});
141+
142+
try {
143+
return await Promise.race([connection.sendRequest<T>(method, params, cts.token), timeoutPromise]);
144+
} finally {
145+
cts.dispose();
146+
}
147+
}
148+
107149
class NativePythonFinderImpl implements NativePythonFinder {
108-
private readonly connection: rpc.MessageConnection;
150+
private connection: rpc.MessageConnection;
109151
private readonly pool: WorkerPool<NativePythonEnvironmentKind | Uri[] | undefined, NativeInfo[]>;
110152
private cache: Map<string, NativeInfo[]> = new Map();
111-
private readonly startDisposables: Disposable[] = [];
153+
private startDisposables: Disposable[] = [];
154+
private proc: ChildProcess | undefined;
155+
private processExited: boolean = false;
156+
private startFailed: boolean = false;
157+
private restartAttempts: number = 0;
158+
private isRestarting: boolean = false;
112159

113160
constructor(
114161
private readonly outputChannel: LogOutputChannel,
@@ -125,13 +172,127 @@ class NativePythonFinderImpl implements NativePythonFinder {
125172
}
126173

127174
public async resolve(executable: string): Promise<NativeEnvInfo> {
128-
await this.configure();
129-
const environment = await this.connection.sendRequest<NativeEnvInfo>('resolve', {
130-
executable,
131-
});
175+
await this.ensureProcessRunning();
176+
try {
177+
await this.configure();
178+
const environment = await sendRequestWithTimeout<NativeEnvInfo>(
179+
this.connection,
180+
'resolve',
181+
{ executable },
182+
RESOLVE_TIMEOUT_MS,
183+
);
184+
185+
this.outputChannel.info(`Resolved Python Environment ${environment.executable}`);
186+
// Reset restart attempts on successful request
187+
this.restartAttempts = 0;
188+
return environment;
189+
} catch (ex) {
190+
// On timeout, kill the hung process so next request triggers restart
191+
if (ex instanceof Error && ex.message.includes('timed out')) {
192+
this.outputChannel.warn('[pet] Resolve request timed out, killing hung process for restart');
193+
this.killProcess();
194+
this.processExited = true;
195+
}
196+
throw ex;
197+
}
198+
}
199+
200+
/**
201+
* Ensures the PET process is running. If it has exited or failed, attempts to restart
202+
* with exponential backoff up to MAX_RESTART_ATTEMPTS times.
203+
* @throws Error if the process cannot be started after all retry attempts
204+
*/
205+
private async ensureProcessRunning(): Promise<void> {
206+
// Process is running fine
207+
if (!this.startFailed && !this.processExited) {
208+
return;
209+
}
210+
211+
// Already in the process of restarting (prevent recursive restarts)
212+
if (this.isRestarting) {
213+
throw new Error('Python Environment Tools (PET) is currently restarting. Please try again.');
214+
}
215+
216+
// Check if we've exceeded max restart attempts
217+
if (this.restartAttempts >= MAX_RESTART_ATTEMPTS) {
218+
throw new Error(
219+
`Python Environment Tools (PET) failed after ${MAX_RESTART_ATTEMPTS} restart attempts. ` +
220+
'Please reload the window or check the output channel for details. ' +
221+
'To debug, run "Python Environments: Run Python Environment Tool (PET) in Terminal" from the Command Palette.',
222+
);
223+
}
224+
225+
// Attempt restart with exponential backoff
226+
await this.restart();
227+
}
228+
229+
/**
230+
* Kills the current PET process (if running) and starts a fresh one.
231+
* Implements exponential backoff between restart attempts.
232+
*/
233+
private async restart(): Promise<void> {
234+
this.isRestarting = true;
235+
this.restartAttempts++;
236+
237+
const backoffMs = RESTART_BACKOFF_BASE_MS * Math.pow(2, this.restartAttempts - 1);
238+
this.outputChannel.warn(
239+
`[pet] Restarting Python Environment Tools (attempt ${this.restartAttempts}/${MAX_RESTART_ATTEMPTS}, ` +
240+
`waiting ${backoffMs}ms)`,
241+
);
132242

133-
this.outputChannel.info(`Resolved Python Environment ${environment.executable}`);
134-
return environment;
243+
try {
244+
// Kill existing process if still running
245+
this.killProcess();
246+
247+
// Dispose existing connection and streams
248+
this.startDisposables.forEach((d) => d.dispose());
249+
this.startDisposables = [];
250+
251+
// Wait with exponential backoff before restarting
252+
await new Promise((resolve) => setTimeout(resolve, backoffMs));
253+
254+
// Reset state flags
255+
this.processExited = false;
256+
this.startFailed = false;
257+
this.lastConfiguration = undefined; // Force reconfiguration
258+
259+
// Start fresh
260+
this.connection = this.start();
261+
262+
this.outputChannel.info('[pet] Python Environment Tools restarted successfully');
263+
264+
// Reset restart attempts on successful start (process didn't immediately fail)
265+
// We'll reset this only after a successful request completes
266+
} catch (ex) {
267+
this.outputChannel.error('[pet] Failed to restart Python Environment Tools:', ex);
268+
this.outputChannel.error(
269+
'[pet] To debug, run "Python Environments: Run Python Environment Tool (PET) in Terminal" from the Command Palette.',
270+
);
271+
throw ex;
272+
} finally {
273+
this.isRestarting = false;
274+
}
275+
}
276+
277+
/**
278+
* Attempts to kill the PET process. Used during restart and timeout recovery.
279+
*/
280+
private killProcess(): void {
281+
if (this.proc && this.proc.exitCode === null) {
282+
try {
283+
this.outputChannel.info('[pet] Killing hung/crashed PET process');
284+
this.proc.kill('SIGTERM');
285+
// Give it a moment to terminate gracefully, then force kill
286+
setTimeout(() => {
287+
if (this.proc && this.proc.exitCode === null) {
288+
this.proc.kill('SIGKILL');
289+
}
290+
}, 500);
291+
} catch (ex) {
292+
this.outputChannel.error('[pet] Error killing process:', ex);
293+
}
294+
}
295+
this.proc = undefined;
135296
}
136297

137298
public async refresh(hardRefresh: boolean, options?: NativePythonEnvironmentKind | Uri[]): Promise<NativeInfo[]> {
@@ -228,20 +389,43 @@ class NativePythonFinderImpl implements NativePythonFinder {
228389
// we have got the exit event.
229390
const readable = new PassThrough();
230391
const writable = new PassThrough();
392+
231393
try {
232-
const proc = spawnProcess(this.toolPath, ['server'], { env: process.env, stdio: 'pipe' });
233-
proc.stdout.pipe(readable, { end: false });
234-
proc.stderr.on('data', (data) => this.outputChannel.error(`[pet] ${data.toString()}`));
235-
writable.pipe(proc.stdin, { end: false });
394+
this.proc = spawnProcess(this.toolPath, ['server'], { env: process.env, stdio: 'pipe' });
395+
396+
if (!this.proc.stdout || !this.proc.stderr || !this.proc.stdin) {
397+
throw new Error('Failed to create stdio streams for PET process');
398+
}
399+
400+
this.proc.stdout.pipe(readable, { end: false });
401+
this.proc.stderr.on('data', (data) => this.outputChannel.error(`[pet] ${data.toString()}`));
402+
writable.pipe(this.proc.stdin, { end: false });
403+
404+
// Handle process exit - mark as exited so pending requests fail fast
405+
this.proc.on('exit', (code, signal) => {
406+
this.processExited = true;
407+
if (code !== 0) {
408+
this.outputChannel.error(
409+
`[pet] Python Environment Tools exited unexpectedly with code ${code}, signal ${signal}`,
410+
);
411+
}
412+
});
413+
414+
// Handle process errors (e.g., ENOENT if executable not found)
415+
this.proc.on('error', (err) => {
416+
this.processExited = true;
417+
this.outputChannel.error('[pet] Process error:', err);
418+
});
236419

420+
const proc = this.proc;
237421
this.startDisposables.push({
238422
dispose: () => {
239423
try {
240424
if (proc.exitCode === null) {
241425
// Attempt graceful shutdown by closing stdin before killing
242426
// This gives the process a chance to clean up
243427
this.outputChannel.debug('[pet] Shutting down Python Locator server');
244-
proc.stdin.end();
428+
proc.stdin?.end();
245429
// Give process a moment to exit gracefully, then force kill
246430
setTimeout(() => {
247431
if (proc.exitCode === null) {
@@ -255,7 +439,14 @@ class NativePythonFinderImpl implements NativePythonFinder {
255439
},
256440
});
257441
} catch (ex) {
442+
// Mark start as failed so all subsequent requests fail immediately
443+
this.startFailed = true;
258444
this.outputChannel.error(`[pet] Error starting Python Finder ${this.toolPath} server`, ex);
445+
this.outputChannel.error(
446+
'[pet] To debug, run "Python Environments: Run Python Environment Tool (PET) in Terminal" from the Command Palette.',
447+
);
448+
// Don't continue - throw so caller knows spawn failed
449+
throw ex;
259450
}
260451
const connection = rpc.createMessageConnection(
261452
new rpc.StreamMessageReader(readable),
@@ -300,6 +491,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
300491
}
301492

302493
private async doRefresh(options?: NativePythonEnvironmentKind | Uri[]): Promise<NativeInfo[]> {
494+
await this.ensureProcessRunning();
303495
const disposables: Disposable[] = [];
304496
const unresolved: Promise<void>[] = [];
305497
const nativeInfo: NativeInfo[] = [];
@@ -311,10 +503,12 @@ class NativePythonFinderImpl implements NativePythonFinder {
311503
this.outputChannel.info(`Discovered env: ${data.executable || data.prefix}`);
312504
if (data.executable && (!data.version || !data.prefix)) {
313505
unresolved.push(
314-
this.connection
315-
.sendRequest<NativeEnvInfo>('resolve', {
316-
executable: data.executable,
317-
})
506+
sendRequestWithTimeout<NativeEnvInfo>(
507+
this.connection,
508+
'resolve',
509+
{ executable: data.executable },
510+
RESOLVE_TIMEOUT_MS,
511+
)
318512
.then((environment: NativeEnvInfo) => {
319513
this.outputChannel.info(
320514
`Resolved environment during PET refresh: ${environment.executable}`,
@@ -334,9 +528,23 @@ class NativePythonFinderImpl implements NativePythonFinder {
334528
nativeInfo.push(data);
335529
}),
336530
);
337-
await this.connection.sendRequest<{ duration: number }>('refresh', refreshOptions);
531+
await sendRequestWithTimeout<{ duration: number }>(
532+
this.connection,
533+
'refresh',
534+
refreshOptions,
535+
REFRESH_TIMEOUT_MS,
536+
);
338537
await Promise.all(unresolved);
538+
539+
// Reset restart attempts on successful refresh
540+
this.restartAttempts = 0;
339541
} catch (ex) {
542+
// On timeout, kill the hung process so next request triggers restart
543+
if (ex instanceof Error && ex.message.includes('timed out')) {
544+
this.outputChannel.warn('[pet] Request timed out, killing hung process for restart');
545+
this.killProcess();
546+
this.processExited = true;
547+
}
340548
this.outputChannel.error('[pet] Error refreshing', ex);
341549
throw ex;
342550
} finally {
@@ -371,9 +579,16 @@ class NativePythonFinderImpl implements NativePythonFinder {
371579
this.outputChannel.info('[pet] configure: Sending configuration update:', JSON.stringify(options));
372580
try {
373581
this.lastConfiguration = options;
374-
await this.connection.sendRequest('configure', options);
582+
await sendRequestWithTimeout(this.connection, 'configure', options, CONFIGURE_TIMEOUT_MS);
375583
} catch (ex) {
584+
// On timeout, kill the hung process so next request triggers restart
585+
if (ex instanceof Error && ex.message.includes('timed out')) {
586+
this.outputChannel.warn('[pet] Configure request timed out, killing hung process for restart');
587+
this.killProcess();
588+
this.processExited = true;
589+
}
376590
this.outputChannel.error('[pet] configure: Configuration error', ex);
591+
throw ex;
377592
}
378593
}
379594

0 commit comments

Comments
 (0)