Skip to content

Commit f2a697b

Browse files
authored
feat: add telemetry for PET initialization duration and improve timeout handling (#1390)
30s timeout → retry → 30s timeout allows for faster feedback and quicker success
1 parent 3308a2b commit f2a697b

File tree

4 files changed

+83
-12
lines changed

4 files changed

+83
-12
lines changed

src/common/telemetry/constants.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,14 @@ export enum EventNames {
7474
* - reason: string ('tool_not_found')
7575
*/
7676
MANAGER_REGISTRATION_SKIPPED = 'MANAGER_REGISTRATION.SKIPPED',
77+
/**
78+
* Telemetry event for PET (Python Environment Tools) initialization timing.
79+
* Tracks how long it takes to create and start the native Python finder.
80+
* Properties:
81+
* - result: 'success' | 'error' | 'timeout'
82+
* - errorType: string (classified error category, on failure only)
83+
*/
84+
PET_INIT_DURATION = 'PET.INIT_DURATION',
7785
}
7886

7987
// Map all events to their properties
@@ -302,4 +310,16 @@ export interface IEventNamePropertyMapping {
302310
managerName: string;
303311
reason: 'tool_not_found';
304312
};
313+
314+
/* __GDPR__
315+
"pet.init_duration": {
316+
"result": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
317+
"errorType": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "owner": "eleanorjboyd" },
318+
"<duration>": { "classification": "SystemMetaData", "purpose": "FeatureInsight", "isMeasurement": true, "owner": "eleanorjboyd" }
319+
}
320+
*/
321+
[EventNames.PET_INIT_DURATION]: {
322+
result: 'success' | 'error' | 'timeout';
323+
errorType?: string;
324+
};
305325
}

src/extension.ts

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,20 @@ export async function activate(context: ExtensionContext): Promise<PythonEnviron
545545
context.subscriptions.push({ dispose: clearHangWatchdog });
546546
try {
547547
// This is the finder that is used by all the built in environment managers
548-
const nativeFinder: NativePythonFinder = await createNativePythonFinder(outputChannel, api, context);
548+
const petStart = new StopWatch();
549+
let nativeFinder: NativePythonFinder;
550+
try {
551+
nativeFinder = await createNativePythonFinder(outputChannel, api, context);
552+
sendTelemetryEvent(EventNames.PET_INIT_DURATION, petStart.elapsedTime, { result: 'success' });
553+
} catch (petError) {
554+
sendTelemetryEvent(
555+
EventNames.PET_INIT_DURATION,
556+
petStart.elapsedTime,
557+
{ result: 'error', errorType: classifyError(petError) },
558+
petError instanceof Error ? petError : undefined,
559+
);
560+
throw petError;
561+
}
549562
context.subscriptions.push(nativeFinder);
550563
const sysMgr = new SysPythonManager(nativeFinder, api, outputChannel);
551564
sysPythonManager.resolve(sysMgr);

src/managers/common/nativePythonFinder.ts

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,23 @@ import { noop } from './utils';
1717

1818
// Timeout constants for JSON-RPC requests (in milliseconds)
1919
const CONFIGURE_TIMEOUT_MS = 30_000; // 30 seconds for configuration
20-
const REFRESH_TIMEOUT_MS = 120_000; // 2 minutes for full refresh
20+
const MAX_CONFIGURE_TIMEOUT_MS = 60_000; // Max configure timeout after retries (60s)
21+
const REFRESH_TIMEOUT_MS = 30_000; // 30 seconds for full refresh (with 1 retry = 60s max)
2122
const RESOLVE_TIMEOUT_MS = 30_000; // 30 seconds for single resolve
2223

2324
// Restart/recovery constants
2425
const MAX_RESTART_ATTEMPTS = 3;
2526
const RESTART_BACKOFF_BASE_MS = 1_000; // 1 second base, exponential: 1s, 2s, 4s
2627
const MAX_CONFIGURE_TIMEOUTS_BEFORE_KILL = 2; // Kill on the 2nd consecutive timeout
28+
const MAX_REFRESH_RETRIES = 1; // Retry refresh once after timeout
2729

2830
/**
2931
* Computes the configure timeout with exponential backoff.
3032
* @param retryCount Number of consecutive configure timeouts so far
31-
* @returns Timeout in milliseconds: 30s, 60s, 120s, ... capped at REFRESH_TIMEOUT_MS
33+
* @returns Timeout in milliseconds: 30s, 60s, capped at MAX_CONFIGURE_TIMEOUT_MS (60s)
3234
*/
3335
export function getConfigureTimeoutMs(retryCount: number): number {
34-
return Math.min(CONFIGURE_TIMEOUT_MS * Math.pow(2, retryCount), REFRESH_TIMEOUT_MS);
36+
return Math.min(CONFIGURE_TIMEOUT_MS * Math.pow(2, retryCount), MAX_CONFIGURE_TIMEOUT_MS);
3537
}
3638

3739
/**
@@ -563,6 +565,41 @@ class NativePythonFinderImpl implements NativePythonFinder {
563565
}
564566

565567
private async doRefresh(options?: NativePythonEnvironmentKind | Uri[]): Promise<NativeInfo[]> {
568+
let lastError: unknown;
569+
570+
for (let attempt = 0; attempt <= MAX_REFRESH_RETRIES; attempt++) {
571+
try {
572+
return await this.doRefreshAttempt(options, attempt);
573+
} catch (ex) {
574+
lastError = ex;
575+
576+
// Only retry on timeout errors
577+
if (ex instanceof RpcTimeoutError && ex.method !== 'configure') {
578+
if (attempt < MAX_REFRESH_RETRIES) {
579+
this.outputChannel.warn(
580+
`[pet] Refresh timed out (attempt ${attempt + 1}/${MAX_REFRESH_RETRIES + 1}), restarting and retrying...`,
581+
);
582+
// Kill and restart for retry
583+
this.killProcess();
584+
this.processExited = true;
585+
continue;
586+
}
587+
// Final attempt failed
588+
this.outputChannel.error(`[pet] Refresh failed after ${MAX_REFRESH_RETRIES + 1} attempts`);
589+
}
590+
// Non-timeout errors or final timeout - rethrow
591+
throw ex;
592+
}
593+
}
594+
595+
// Should not reach here, but TypeScript needs this
596+
throw lastError;
597+
}
598+
599+
private async doRefreshAttempt(
600+
options: NativePythonEnvironmentKind | Uri[] | undefined,
601+
attempt: number,
602+
): Promise<NativeInfo[]> {
566603
await this.ensureProcessRunning();
567604
const disposables: Disposable[] = [];
568605
const unresolved: Promise<void>[] = [];
@@ -610,6 +647,9 @@ class NativePythonFinderImpl implements NativePythonFinder {
610647

611648
// Reset restart attempts on successful refresh
612649
this.restartAttempts = 0;
650+
if (attempt > 0) {
651+
this.outputChannel.info(`[pet] Refresh succeeded on retry attempt ${attempt + 1}`);
652+
}
613653
} catch (ex) {
614654
// On refresh timeout (not configure — configure handles its own timeout),
615655
// kill the hung process so next request triggers restart

src/test/managers/common/nativePythonFinder.configureTimeout.unit.test.ts

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,12 @@ suite('getConfigureTimeoutMs', () => {
3737
assert.strictEqual(getConfigureTimeoutMs(1), 60_000);
3838
});
3939

40-
test('doubles again on second retry (120s)', () => {
41-
assert.strictEqual(getConfigureTimeoutMs(2), 120_000);
42-
});
43-
44-
test('caps at REFRESH_TIMEOUT_MS (120s) for higher retries', () => {
45-
// 30_000 * 2^3 = 240_000, but capped at 120_000
46-
assert.strictEqual(getConfigureTimeoutMs(3), 120_000);
47-
assert.strictEqual(getConfigureTimeoutMs(10), 120_000);
40+
test('caps at MAX_CONFIGURE_TIMEOUT_MS (60s) for higher retries', () => {
41+
// 30_000 * 2^2 = 120_000, but capped at 60_000
42+
assert.strictEqual(getConfigureTimeoutMs(2), 60_000);
43+
// 30_000 * 2^3 = 240_000, but capped at 60_000
44+
assert.strictEqual(getConfigureTimeoutMs(3), 60_000);
45+
assert.strictEqual(getConfigureTimeoutMs(10), 60_000);
4846
});
4947
});
5048

0 commit comments

Comments
 (0)