Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 64 additions & 13 deletions src/managers/common/nativePythonFinder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ const RESOLVE_TIMEOUT_MS = 30_000; // 30 seconds for single resolve
// Restart/recovery constants
const MAX_RESTART_ATTEMPTS = 3;
const RESTART_BACKOFF_BASE_MS = 1_000; // 1 second base, exponential: 1s, 2s, 4s
const MAX_CONFIGURE_TIMEOUTS_BEFORE_KILL = 2; // Kill on the 2nd consecutive timeout

/**
* Computes the configure timeout with exponential backoff.
* @param retryCount Number of consecutive configure timeouts so far
* @returns Timeout in milliseconds: 30s, 60s, 120s, ... capped at REFRESH_TIMEOUT_MS
*/
export function getConfigureTimeoutMs(retryCount: number): number {
return Math.min(CONFIGURE_TIMEOUT_MS * Math.pow(2, retryCount), REFRESH_TIMEOUT_MS);
}

export async function getNativePythonToolsPath(): Promise<string> {
const envsExt = getExtension(ENVS_EXTENSION_ID);
Expand Down Expand Up @@ -119,14 +129,27 @@ interface RefreshOptions {
searchPaths?: string[];
}

/**
* Error thrown when a JSON-RPC request times out.
*/
export class RpcTimeoutError extends Error {
constructor(
public readonly method: string,
timeoutMs: number,
) {
super(`Request '${method}' timed out after ${timeoutMs}ms`);
Comment thread
karthiknadig marked this conversation as resolved.
this.name = this.constructor.name;
}
}

/**
* Wraps a JSON-RPC sendRequest call with a timeout.
* @param connection The JSON-RPC connection
* @param method The RPC method name
* @param params The parameters to send
* @param timeoutMs Timeout in milliseconds
* @returns The result of the request
* @throws Error if the request times out
* @throws RpcTimeoutError if the request times out
*/
async function sendRequestWithTimeout<T>(
connection: rpc.MessageConnection,
Expand All @@ -138,7 +161,7 @@ async function sendRequestWithTimeout<T>(
const timeoutPromise = new Promise<never>((_, reject) => {
const timer = setTimeout(() => {
cts.cancel();
reject(new Error(`Request '${method}' timed out after ${timeoutMs}ms`));
reject(new RpcTimeoutError(method, timeoutMs));
}, timeoutMs);
// Clear timeout if the CancellationTokenSource is disposed
cts.token.onCancellationRequested(() => clearTimeout(timer));
Expand All @@ -161,6 +184,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
private startFailed: boolean = false;
private restartAttempts: number = 0;
private isRestarting: boolean = false;
private configureTimeoutCount: number = 0;

constructor(
private readonly outputChannel: LogOutputChannel,
Expand Down Expand Up @@ -192,8 +216,9 @@ class NativePythonFinderImpl implements NativePythonFinder {
this.restartAttempts = 0;
return environment;
} catch (ex) {
// On timeout, kill the hung process so next request triggers restart
if (ex instanceof Error && ex.message.includes('timed out')) {
// On resolve timeout (not configure — configure handles its own timeout),
// kill the hung process so next request triggers restart
if (ex instanceof RpcTimeoutError && ex.method !== 'configure') {
this.outputChannel.warn('[pet] Resolve request timed out, killing hung process for restart');
this.killProcess();
this.processExited = true;
Expand Down Expand Up @@ -260,6 +285,7 @@ class NativePythonFinderImpl implements NativePythonFinder {
this.processExited = false;
this.startFailed = false;
this.lastConfiguration = undefined; // Force reconfiguration
this.configureTimeoutCount = 0;

// Start fresh
this.connection = this.start();
Expand Down Expand Up @@ -544,8 +570,9 @@ class NativePythonFinderImpl implements NativePythonFinder {
// Reset restart attempts on successful refresh
this.restartAttempts = 0;
} catch (ex) {
// On timeout, kill the hung process so next request triggers restart
if (ex instanceof Error && ex.message.includes('timed out')) {
// On refresh timeout (not configure — configure handles its own timeout),
// kill the hung process so next request triggers restart
if (ex instanceof RpcTimeoutError && ex.method !== 'configure') {
this.outputChannel.warn('[pet] Request timed out, killing hung process for restart');
this.killProcess();
this.processExited = true;
Expand Down Expand Up @@ -583,17 +610,41 @@ class NativePythonFinderImpl implements NativePythonFinder {
return;
}
this.outputChannel.info('[pet] configure: Sending configuration update:', JSON.stringify(options));
// Exponential backoff: 30s, 60s on retry. Capped at REFRESH_TIMEOUT_MS.
const timeoutMs = getConfigureTimeoutMs(this.configureTimeoutCount);
if (this.configureTimeoutCount > 0) {
this.outputChannel.info(
`[pet] configure: Using extended timeout of ${timeoutMs}ms (retry ${this.configureTimeoutCount})`,
);
}
try {
await sendRequestWithTimeout(this.connection, 'configure', options, timeoutMs);
// Only cache after success so failed/timed-out calls will retry
this.lastConfiguration = options;
await sendRequestWithTimeout(this.connection, 'configure', options, CONFIGURE_TIMEOUT_MS);
this.configureTimeoutCount = 0;
} catch (ex) {
// On timeout, kill the hung process so next request triggers restart
if (ex instanceof Error && ex.message.includes('timed out')) {
this.outputChannel.warn('[pet] Configure request timed out, killing hung process for restart');
this.killProcess();
this.processExited = true;
// Clear cached config so the next call retries instead of short-circuiting via configurationEquals
this.lastConfiguration = undefined;
if (ex instanceof RpcTimeoutError) {
this.configureTimeoutCount++;
if (this.configureTimeoutCount >= MAX_CONFIGURE_TIMEOUTS_BEFORE_KILL) {
// Repeated configure timeouts suggest PET is truly hung — kill and restart
this.outputChannel.error(
`[pet] Configure timed out ${this.configureTimeoutCount} consecutive times, killing hung process for restart`,
);
this.killProcess();
this.processExited = true;
this.configureTimeoutCount = 0;
} else {
// First timeout — PET may still be working. Let it continue and retry next call.
this.outputChannel.warn(
`[pet] Configure request timed out (attempt ${this.configureTimeoutCount}/${MAX_CONFIGURE_TIMEOUTS_BEFORE_KILL}), ` +
'will retry on next request without killing process',
);
}
Comment thread
karthiknadig marked this conversation as resolved.
} else {
Comment thread
karthiknadig marked this conversation as resolved.
this.outputChannel.error('[pet] configure: Configuration error', ex);
}
this.outputChannel.error('[pet] configure: Configuration error', ex);
throw ex;
}
}
Comment thread
karthiknadig marked this conversation as resolved.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import assert from 'node:assert';
import { RpcTimeoutError, getConfigureTimeoutMs } from '../../../managers/common/nativePythonFinder';

suite('RpcTimeoutError', () => {
test('has correct name property', () => {
const error = new RpcTimeoutError('configure', 30000);
assert.strictEqual(error.name, 'RpcTimeoutError');
});

test('has correct method property', () => {
const error = new RpcTimeoutError('configure', 30000);
assert.strictEqual(error.method, 'configure');
});

test('message includes method name and timeout', () => {
const error = new RpcTimeoutError('resolve', 5000);
assert.strictEqual(error.message, "Request 'resolve' timed out after 5000ms");
});

test('is instanceof Error', () => {
const error = new RpcTimeoutError('configure', 30000);
assert.ok(error instanceof Error);
assert.ok(error instanceof RpcTimeoutError);
});
});

suite('getConfigureTimeoutMs', () => {
test('returns base timeout (30s) on first attempt', () => {
assert.strictEqual(getConfigureTimeoutMs(0), 30_000);
});

test('doubles timeout on first retry (60s)', () => {
assert.strictEqual(getConfigureTimeoutMs(1), 60_000);
});

test('doubles again on second retry (120s)', () => {
assert.strictEqual(getConfigureTimeoutMs(2), 120_000);
});

test('caps at REFRESH_TIMEOUT_MS (120s) for higher retries', () => {
// 30_000 * 2^3 = 240_000, but capped at 120_000
assert.strictEqual(getConfigureTimeoutMs(3), 120_000);
assert.strictEqual(getConfigureTimeoutMs(10), 120_000);
});
});
Comment thread
karthiknadig marked this conversation as resolved.
Loading