1+ import { ChildProcess } from 'child_process' ;
12import * as fs from 'fs-extra' ;
23import * as path from 'path' ;
34import { PassThrough } from 'stream' ;
4- import { Disposable , ExtensionContext , LogOutputChannel , Uri } from 'vscode' ;
5+ import { CancellationTokenSource , Disposable , ExtensionContext , LogOutputChannel , Uri } from 'vscode' ;
56import * as rpc from 'vscode-jsonrpc/node' ;
67import { PythonProjectApi } from '../../api' ;
78import { spawnProcess } from '../../common/childProcess.apis' ;
@@ -14,6 +15,15 @@ import { createRunningWorkerPool, WorkerPool } from '../../common/utils/workerPo
1415import { getConfiguration , getWorkspaceFolders } from '../../common/workspace.apis' ;
1516import { noop } from './utils' ;
1617
18+ // Timeout constants for JSON-RPC requests (in milliseconds)
19+ const CONFIGURE_TIMEOUT_MS = 30_000 ; // 30 seconds for configuration
20+ const REFRESH_TIMEOUT_MS = 120_000 ; // 2 minutes for full refresh
21+ const RESOLVE_TIMEOUT_MS = 30_000 ; // 30 seconds for single resolve
22+
23+ // Restart/recovery constants
24+ const MAX_RESTART_ATTEMPTS = 3 ;
25+ const RESTART_BACKOFF_BASE_MS = 1_000 ; // 1 second base, exponential: 1s, 2s, 4s
26+
1727export async function getNativePythonToolsPath ( ) : Promise < string > {
1828 const envsExt = getExtension ( ENVS_EXTENSION_ID ) ;
1929 if ( envsExt ) {
@@ -104,11 +114,48 @@ interface RefreshOptions {
104114 searchPaths ?: string [ ] ;
105115}
106116
117+ /**
118+ * Wraps a JSON-RPC sendRequest call with a timeout.
119+ * @param connection The JSON-RPC connection
120+ * @param method The RPC method name
121+ * @param params The parameters to send
122+ * @param timeoutMs Timeout in milliseconds
123+ * @returns The result of the request
124+ * @throws Error if the request times out
125+ */
126+ async function sendRequestWithTimeout < T > (
127+ connection : rpc . MessageConnection ,
128+ method : string ,
129+ params : unknown ,
130+ timeoutMs : number ,
131+ ) : Promise < T > {
132+ const cts = new CancellationTokenSource ( ) ;
133+ const timeoutPromise = new Promise < never > ( ( _ , reject ) => {
134+ const timer = setTimeout ( ( ) => {
135+ cts . cancel ( ) ;
136+ reject ( new Error ( `Request '${ method } ' timed out after ${ timeoutMs } ms` ) ) ;
137+ } , timeoutMs ) ;
138+ // Clear timeout if the CancellationTokenSource is disposed
139+ cts . token . onCancellationRequested ( ( ) => clearTimeout ( timer ) ) ;
140+ } ) ;
141+
142+ try {
143+ return await Promise . race ( [ connection . sendRequest < T > ( method , params , cts . token ) , timeoutPromise ] ) ;
144+ } finally {
145+ cts . dispose ( ) ;
146+ }
147+ }
148+
107149class NativePythonFinderImpl implements NativePythonFinder {
108- private readonly connection : rpc . MessageConnection ;
150+ private connection : rpc . MessageConnection ;
109151 private readonly pool : WorkerPool < NativePythonEnvironmentKind | Uri [ ] | undefined , NativeInfo [ ] > ;
110152 private cache : Map < string , NativeInfo [ ] > = new Map ( ) ;
111- private readonly startDisposables : Disposable [ ] = [ ] ;
153+ private startDisposables : Disposable [ ] = [ ] ;
154+ private proc : ChildProcess | undefined ;
155+ private processExited : boolean = false ;
156+ private startFailed : boolean = false ;
157+ private restartAttempts : number = 0 ;
158+ private isRestarting : boolean = false ;
112159
113160 constructor (
114161 private readonly outputChannel : LogOutputChannel ,
@@ -125,13 +172,123 @@ class NativePythonFinderImpl implements NativePythonFinder {
125172 }
126173
127174 public async resolve ( executable : string ) : Promise < NativeEnvInfo > {
128- await this . configure ( ) ;
129- const environment = await this . connection . sendRequest < NativeEnvInfo > ( 'resolve' , {
130- executable,
131- } ) ;
175+ await this . ensureProcessRunning ( ) ;
176+ try {
177+ await this . configure ( ) ;
178+ const environment = await sendRequestWithTimeout < NativeEnvInfo > (
179+ this . connection ,
180+ 'resolve' ,
181+ { executable } ,
182+ RESOLVE_TIMEOUT_MS ,
183+ ) ;
184+
185+ this . outputChannel . info ( `Resolved Python Environment ${ environment . executable } ` ) ;
186+ // Reset restart attempts on successful request
187+ this . restartAttempts = 0 ;
188+ return environment ;
189+ } catch ( ex ) {
190+ // On timeout, kill the hung process so next request triggers restart
191+ if ( ex instanceof Error && ex . message . includes ( 'timed out' ) ) {
192+ this . outputChannel . warn ( '[pet] Resolve request timed out, killing hung process for restart' ) ;
193+ this . killProcess ( ) ;
194+ this . processExited = true ;
195+ }
196+ throw ex ;
197+ }
198+ }
199+
200+ /**
201+ * Ensures the PET process is running. If it has exited or failed, attempts to restart
202+ * with exponential backoff up to MAX_RESTART_ATTEMPTS times.
203+ * @throws Error if the process cannot be started after all retry attempts
204+ */
205+ private async ensureProcessRunning ( ) : Promise < void > {
206+ // Process is running fine
207+ if ( ! this . startFailed && ! this . processExited ) {
208+ return ;
209+ }
210+
211+ // Already in the process of restarting (prevent recursive restarts)
212+ if ( this . isRestarting ) {
213+ throw new Error ( 'Python Environment Tools (PET) is currently restarting. Please try again.' ) ;
214+ }
215+
216+ // Check if we've exceeded max restart attempts
217+ if ( this . restartAttempts >= MAX_RESTART_ATTEMPTS ) {
218+ throw new Error (
219+ `Python Environment Tools (PET) failed after ${ MAX_RESTART_ATTEMPTS } restart attempts. ` +
220+ 'Please reload the window or check the output channel for details.' ,
221+ ) ;
222+ }
223+
224+ // Attempt restart with exponential backoff
225+ await this . restart ( ) ;
226+ }
227+
228+ /**
229+ * Kills the current PET process (if running) and starts a fresh one.
230+ * Implements exponential backoff between restart attempts.
231+ */
232+ private async restart ( ) : Promise < void > {
233+ this . isRestarting = true ;
234+ this . restartAttempts ++ ;
235+
236+ const backoffMs = RESTART_BACKOFF_BASE_MS * Math . pow ( 2 , this . restartAttempts - 1 ) ;
237+ this . outputChannel . warn (
238+ `[pet] Restarting Python Environment Tools (attempt ${ this . restartAttempts } /${ MAX_RESTART_ATTEMPTS } , ` +
239+ `waiting ${ backoffMs } ms)` ,
240+ ) ;
241+
242+ try {
243+ // Kill existing process if still running
244+ this . killProcess ( ) ;
245+
246+ // Dispose existing connection and streams
247+ this . startDisposables . forEach ( ( d ) => d . dispose ( ) ) ;
248+ this . startDisposables = [ ] ;
249+
250+ // Wait with exponential backoff before restarting
251+ await new Promise ( ( resolve ) => setTimeout ( resolve , backoffMs ) ) ;
132252
133- this . outputChannel . info ( `Resolved Python Environment ${ environment . executable } ` ) ;
134- return environment ;
253+ // Reset state flags
254+ this . processExited = false ;
255+ this . startFailed = false ;
256+ this . lastConfiguration = undefined ; // Force reconfiguration
257+
258+ // Start fresh
259+ this . connection = this . start ( ) ;
260+
261+ this . outputChannel . info ( '[pet] Python Environment Tools restarted successfully' ) ;
262+
263+ // Reset restart attempts on successful start (process didn't immediately fail)
264+ // We'll reset this only after a successful request completes
265+ } catch ( ex ) {
266+ this . outputChannel . error ( '[pet] Failed to restart Python Environment Tools:' , ex ) ;
267+ throw ex ;
268+ } finally {
269+ this . isRestarting = false ;
270+ }
271+ }
272+
273+ /**
274+ * Attempts to kill the PET process. Used during restart and timeout recovery.
275+ */
276+ private killProcess ( ) : void {
277+ if ( this . proc && this . proc . exitCode === null ) {
278+ try {
279+ this . outputChannel . info ( '[pet] Killing hung/crashed PET process' ) ;
280+ this . proc . kill ( 'SIGTERM' ) ;
281+ // Give it a moment to terminate gracefully, then force kill
282+ setTimeout ( ( ) => {
283+ if ( this . proc && this . proc . exitCode === null ) {
284+ this . proc . kill ( 'SIGKILL' ) ;
285+ }
286+ } , 500 ) ;
287+ } catch ( ex ) {
288+ this . outputChannel . error ( '[pet] Error killing process:' , ex ) ;
289+ }
290+ }
291+ this . proc = undefined ;
135292 }
136293
137294 public async refresh ( hardRefresh : boolean , options ?: NativePythonEnvironmentKind | Uri [ ] ) : Promise < NativeInfo [ ] > {
@@ -228,12 +385,35 @@ class NativePythonFinderImpl implements NativePythonFinder {
228385 // we have got the exit event.
229386 const readable = new PassThrough ( ) ;
230387 const writable = new PassThrough ( ) ;
388+
231389 try {
232- const proc = spawnProcess ( this . toolPath , [ 'server' ] , { env : process . env , stdio : 'pipe' } ) ;
233- proc . stdout . pipe ( readable , { end : false } ) ;
234- proc . stderr . on ( 'data' , ( data ) => this . outputChannel . error ( `[pet] ${ data . toString ( ) } ` ) ) ;
235- writable . pipe ( proc . stdin , { end : false } ) ;
390+ this . proc = spawnProcess ( this . toolPath , [ 'server' ] , { env : process . env , stdio : 'pipe' } ) ;
391+
392+ if ( ! this . proc . stdout || ! this . proc . stderr || ! this . proc . stdin ) {
393+ throw new Error ( 'Failed to create stdio streams for PET process' ) ;
394+ }
236395
396+ this . proc . stdout . pipe ( readable , { end : false } ) ;
397+ this . proc . stderr . on ( 'data' , ( data ) => this . outputChannel . error ( `[pet] ${ data . toString ( ) } ` ) ) ;
398+ writable . pipe ( this . proc . stdin , { end : false } ) ;
399+
400+ // Handle process exit - mark as exited so pending requests fail fast
401+ this . proc . on ( 'exit' , ( code , signal ) => {
402+ this . processExited = true ;
403+ if ( code !== 0 ) {
404+ this . outputChannel . error (
405+ `[pet] Python Environment Tools exited unexpectedly with code ${ code } , signal ${ signal } ` ,
406+ ) ;
407+ }
408+ } ) ;
409+
410+ // Handle process errors (e.g., ENOENT if executable not found)
411+ this . proc . on ( 'error' , ( err ) => {
412+ this . processExited = true ;
413+ this . outputChannel . error ( '[pet] Process error:' , err ) ;
414+ } ) ;
415+
416+ const proc = this . proc ;
237417 this . startDisposables . push ( {
238418 dispose : ( ) => {
239419 try {
@@ -255,7 +435,11 @@ class NativePythonFinderImpl implements NativePythonFinder {
255435 } ,
256436 } ) ;
257437 } catch ( ex ) {
438+ // Mark start as failed so all subsequent requests fail immediately
439+ this . startFailed = true ;
258440 this . outputChannel . error ( `[pet] Error starting Python Finder ${ this . toolPath } server` , ex ) ;
441+ // Don't continue - throw so caller knows spawn failed
442+ throw ex ;
259443 }
260444 const connection = rpc . createMessageConnection (
261445 new rpc . StreamMessageReader ( readable ) ,
@@ -300,6 +484,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
300484 }
301485
302486 private async doRefresh ( options ?: NativePythonEnvironmentKind | Uri [ ] ) : Promise < NativeInfo [ ] > {
487+ await this . ensureProcessRunning ( ) ;
303488 const disposables : Disposable [ ] = [ ] ;
304489 const unresolved : Promise < void > [ ] = [ ] ;
305490 const nativeInfo : NativeInfo [ ] = [ ] ;
@@ -311,10 +496,12 @@ class NativePythonFinderImpl implements NativePythonFinder {
311496 this . outputChannel . info ( `Discovered env: ${ data . executable || data . prefix } ` ) ;
312497 if ( data . executable && ( ! data . version || ! data . prefix ) ) {
313498 unresolved . push (
314- this . connection
315- . sendRequest < NativeEnvInfo > ( 'resolve' , {
316- executable : data . executable ,
317- } )
499+ sendRequestWithTimeout < NativeEnvInfo > (
500+ this . connection ,
501+ 'resolve' ,
502+ { executable : data . executable } ,
503+ RESOLVE_TIMEOUT_MS ,
504+ )
318505 . then ( ( environment : NativeEnvInfo ) => {
319506 this . outputChannel . info (
320507 `Resolved environment during PET refresh: ${ environment . executable } ` ,
@@ -334,9 +521,23 @@ class NativePythonFinderImpl implements NativePythonFinder {
334521 nativeInfo . push ( data ) ;
335522 } ) ,
336523 ) ;
337- await this . connection . sendRequest < { duration : number } > ( 'refresh' , refreshOptions ) ;
524+ await sendRequestWithTimeout < { duration : number } > (
525+ this . connection ,
526+ 'refresh' ,
527+ refreshOptions ,
528+ REFRESH_TIMEOUT_MS ,
529+ ) ;
338530 await Promise . all ( unresolved ) ;
531+
532+ // Reset restart attempts on successful refresh
533+ this . restartAttempts = 0 ;
339534 } catch ( ex ) {
535+ // On timeout, kill the hung process so next request triggers restart
536+ if ( ex instanceof Error && ex . message . includes ( 'timed out' ) ) {
537+ this . outputChannel . warn ( '[pet] Request timed out, killing hung process for restart' ) ;
538+ this . killProcess ( ) ;
539+ this . processExited = true ;
540+ }
340541 this . outputChannel . error ( '[pet] Error refreshing' , ex ) ;
341542 throw ex ;
342543 } finally {
@@ -371,9 +572,16 @@ class NativePythonFinderImpl implements NativePythonFinder {
371572 this . outputChannel . info ( '[pet] configure: Sending configuration update:' , JSON . stringify ( options ) ) ;
372573 try {
373574 this . lastConfiguration = options ;
374- await this . connection . sendRequest ( 'configure' , options ) ;
575+ await sendRequestWithTimeout ( this . connection , 'configure' , options , CONFIGURE_TIMEOUT_MS ) ;
375576 } catch ( ex ) {
577+ // On timeout, kill the hung process so next request triggers restart
578+ if ( ex instanceof Error && ex . message . includes ( 'timed out' ) ) {
579+ this . outputChannel . warn ( '[pet] Configure request timed out, killing hung process for restart' ) ;
580+ this . killProcess ( ) ;
581+ this . processExited = true ;
582+ }
376583 this . outputChannel . error ( '[pet] configure: Configuration error' , ex ) ;
584+ throw ex ;
377585 }
378586 }
379587
0 commit comments