Skip to content

Commit 4e9c24e

Browse files
Increase VM SSH readiness timeout and diagnostics
1 parent ee98526 commit 4e9c24e

3 files changed

Lines changed: 39 additions & 10 deletions

File tree

docs/exec-plans/active/EP-001-portable-coder-foundation-and-multi-provider-mvp.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ Initial user target providers/tools:
4444
- [x] (2026-02-20) Fix Windows VM start cloud-init port selection to fallback when default range is exhausted
4545
- [x] (2026-02-20) Add Windows helper script to patch legacy `start-vm.ps1` cloud-init port fallback on existing local clones
4646
- [x] (2026-02-20) Make Windows smoke SSH probe tolerate transient native-command connection errors during VM boot
47+
- [x] (2026-02-20) Increase VM SSH readiness timeouts and diagnostics for slow first-boot cloud images
4748
- [ ] (2026-02-18) Document setup/runbook and close out EP-001
4849

4950
## Context and Orientation
@@ -123,6 +124,7 @@ Acceptance criteria for EP-001:
123124
- 2026-02-20: PowerShell `Start-Process -ArgumentList` rejected empty elements, so `ssh-keygen -N` required explicit empty-string handling.
124125
- 2026-02-20: Some Windows hosts had no free ports in `38080-38120`; cloud-init server startup now needs fallback port allocation.
125126
- 2026-02-20: On some PowerShell environments, transient `ssh` connection failures surfaced as exceptions; smoke probing must retry instead of aborting early.
127+
- 2026-02-20: First boot SSH readiness on some hosts can exceed 120 seconds, so timeout windows need to be configurable and longer by default.
126128

127129
## Decision Log
128130
- 2026-02-18: Adopt harness-first planning model before implementation.

scripts/pcoder.cjs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1011,7 +1011,7 @@ function runInLinuxPortableVm(options) {
10111011
sshPort,
10121012
sshUser,
10131013
sshKeyPath,
1014-
timeoutSeconds: 120
1014+
timeoutSeconds: resolveVmSshTimeoutSeconds(mergedEnv)
10151015
});
10161016

10171017
const remoteRoot = mergedEnv.PCODER_VM_PROJECTS_ROOT || '/home/portable/projects';
@@ -1201,6 +1201,18 @@ function waitForVmSshReady(options) {
12011201
fail(`Timed out waiting for VM SSH readiness after ${timeoutSeconds}s.`);
12021202
}
12031203

1204+
function resolveVmSshTimeoutSeconds(env) {
1205+
const raw = env.PCODER_VM_SSH_TIMEOUT_SECONDS;
1206+
if (!raw) {
1207+
return 300;
1208+
}
1209+
const parsed = Number.parseInt(String(raw), 10);
1210+
if (!Number.isFinite(parsed) || parsed < 10 || parsed > 3600) {
1211+
fail('PCODER_VM_SSH_TIMEOUT_SECONDS must be an integer between 10 and 3600.');
1212+
}
1213+
return parsed;
1214+
}
1215+
12041216
function resolveVmToolRunner(tool, adapter, env) {
12051217
const overrideKey = `PCODER_VM_${tool.toUpperCase()}_CMD`;
12061218
if (env[overrideKey]) {

scripts/runtime/windows/smoke-check.ps1

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
param(
2-
[switch]$SkipToolChecks
2+
[switch]$SkipToolChecks,
3+
[int]$SshReadyTimeoutSeconds = 300
34
)
45

56
$ErrorActionPreference = 'Stop'
@@ -53,6 +54,7 @@ function Invoke-Ssh {
5354
param(
5455
[string]$SshExe,
5556
[string]$SshPort,
57+
[string]$SshUser,
5658
[string]$Script
5759
)
5860

@@ -63,7 +65,7 @@ function Invoke-Ssh {
6365
'-o', 'StrictHostKeyChecking=no',
6466
'-o', 'UserKnownHostsFile=NUL',
6567
'-o', 'ConnectTimeout=5',
66-
'portable@127.0.0.1',
68+
"$SshUser@127.0.0.1",
6769
'bash', '-lc', $Script
6870
)
6971

@@ -185,25 +187,38 @@ try {
185187
throw 'No SSH client found.'
186188
}
187189
Add-Check -Name 'ssh:client' -Ok $true -Detail $sshExe
190+
$sshUser = if ($env:PCODER_VM_USER) { $env:PCODER_VM_USER } else { 'portable' }
191+
Add-Check -Name 'ssh:user' -Ok $true -Detail $sshUser
188192

189193
$sshReady = $false
190-
for ($attempt = 1; $attempt -le 60; $attempt++) {
191-
$probe = Invoke-Ssh -SshExe $sshExe -SshPort $sshPort -Script 'echo vm-ready'
194+
$pollIntervalSeconds = 2
195+
$maxAttempts = [Math]::Max([Math]::Ceiling($SshReadyTimeoutSeconds / $pollIntervalSeconds), 1)
196+
$lastProbeOutput = ''
197+
for ($attempt = 1; $attempt -le $maxAttempts; $attempt++) {
198+
$probe = Invoke-Ssh -SshExe $sshExe -SshPort $sshPort -SshUser $sshUser -Script 'echo vm-ready'
199+
if ($probe.output) {
200+
$lastProbeOutput = $probe.output.Trim()
201+
}
192202
if ($probe.status -eq 0 -and $probe.output -match 'vm-ready') {
193203
$sshReady = $true
194204
Add-Check -Name 'ssh:ready' -Ok $true -Detail "attempt=$attempt"
195205
break
196206
}
197-
Start-Sleep -Seconds 2
207+
Start-Sleep -Seconds $pollIntervalSeconds
198208
}
199209
if (-not $sshReady) {
200-
Add-Check -Name 'ssh:ready' -Ok $false -Detail 'timeout'
201-
throw 'Timed out waiting for VM SSH readiness.'
210+
Add-Check -Name 'ssh:ready' -Ok $false -Detail "timeout (${SshReadyTimeoutSeconds}s)"
211+
$lastDetail = $lastProbeOutput
212+
if (-not $lastDetail) {
213+
$lastDetail = '(no ssh output)'
214+
}
215+
$lastDetail = $lastDetail -replace '\r?\n', ' '
216+
throw "Timed out waiting for VM SSH readiness after ${SshReadyTimeoutSeconds}s. Last output: $lastDetail"
202217
}
203218

204219
if (-not $SkipToolChecks) {
205220
foreach ($tool in @('codex', 'claude')) {
206-
$hasTool = Invoke-Ssh -SshExe $sshExe -SshPort $sshPort -Script "command -v $tool >/dev/null 2>&1"
221+
$hasTool = Invoke-Ssh -SshExe $sshExe -SshPort $sshPort -SshUser $sshUser -Script "command -v $tool >/dev/null 2>&1"
207222
$toolFound = $hasTool.status -eq 0
208223
$toolDetail = 'missing'
209224
if ($toolFound) {
@@ -212,7 +227,7 @@ try {
212227
Add-Check -Name "guest:$tool:command" -Ok $toolFound -Detail $toolDetail
213228

214229
if ($toolFound) {
215-
$version = Invoke-Ssh -SshExe $sshExe -SshPort $sshPort -Script "$tool --version"
230+
$version = Invoke-Ssh -SshExe $sshExe -SshPort $sshPort -SshUser $sshUser -Script "$tool --version"
216231
$versionOk = $version.status -eq 0
217232
$versionDetail = 'ok'
218233
if (-not $versionOk) {

0 commit comments

Comments
 (0)