Skip to content

Commit 648542c

Browse files
Add TCG override path for Windows VM smoke recovery
1 parent 2c35d2e commit 648542c

5 files changed

Lines changed: 54 additions & 1 deletion

File tree

docs/RUNBOOKS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ Backend spec:
5353
- Start VM implementation: `scripts/runtime/windows/start-vm.ps1`
5454
- Stop VM: `scripts/runtime/windows/stop-vm.cmd`
5555
- Smoke checklist: `scripts/runtime/windows/smoke-check.cmd`
56+
- Smoke checklist (force TCG): `scripts/runtime/windows/smoke-check-tcg.cmd`
5657

5758
## Linux-Portable Run Flow
5859
1. `pcoder` starts/ensures VM via `start-vm.cmd`.
@@ -61,6 +62,11 @@ Backend spec:
6162
4. Tool command runs over SSH in guest project directory.
6263
5. Project is copied back to host after run (unless `--no-sync-back`).
6364

65+
Acceleration override (Windows troubleshooting):
66+
- `PCODER_VM_ACCEL_MODE=auto` (default): try WHPX then fallback to TCG if launch fails.
67+
- `PCODER_VM_ACCEL_MODE=whpx`: force WHPX only.
68+
- `PCODER_VM_ACCEL_MODE=tcg`: force software virtualization when WHPX boot/SSH is unreliable.
69+
6470
## Auth Operations
6571
- Show auth modes and portable auth paths: `pcoder auth status`
6672
- Login with OAuth in current default mode: `pcoder auth login codex`, `pcoder auth login claude`

docs/exec-plans/active/EP-001-portable-coder-foundation-and-multi-provider-mvp.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ Initial user target providers/tools:
4848
- [x] (2026-02-20) Fix Windows smoke SSH probe command invocation so readiness check validates `echo vm-ready` output correctly
4949
- [x] (2026-02-20) Update Windows smoke SSH probe to pass remote commands directly (avoids PowerShell stdin encoding edge cases)
5050
- [x] (2026-02-20) Normalize Windows PowerShell native `ssh` stderr handling so host-key warnings do not abort readiness probes
51+
- [x] (2026-02-20) Add Windows VM acceleration override (`PCODER_VM_ACCEL_MODE`) to force TCG when WHPX guests fail to reach SSH readiness
52+
- [x] (2026-02-20) Add `smoke-check-tcg.cmd` helper for one-command Windows smoke validation in forced software mode
5153
- [ ] (2026-02-18) Document setup/runbook and close out EP-001
5254

5355
## Context and Orientation
@@ -131,6 +133,7 @@ Acceptance criteria for EP-001:
131133
- 2026-02-20: Passing `ssh ... bash -lc <script>` as split args can drop expected output semantics; piping script content to `bash -s` is safer for deterministic probing.
132134
- 2026-02-20: Piping probe scripts into native `ssh` from Windows PowerShell can hit stdin encoding edge cases; direct remote command args are more reliable.
133135
- 2026-02-20: In Windows PowerShell, native stderr can surface as terminating `ErrorRecord` when `$ErrorActionPreference='Stop'`, so probe wrappers must normalize stderr explicitly.
136+
- 2026-02-20: Some hosts can launch QEMU with WHPX but still fail guest SSH readiness; explicit TCG override is needed for deterministic recovery.
134137

135138
## Decision Log
136139
- 2026-02-18: Adopt harness-first planning model before implementation.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
@echo off
2+
setlocal EnableExtensions
3+
4+
set "PCODER_VM_ACCEL_MODE=tcg"
5+
set "SCRIPT_DIR=%~dp0"
6+
7+
call "%SCRIPT_DIR%smoke-check.cmd" %*
8+
exit /b %errorlevel%
9+

scripts/runtime/windows/smoke-check.ps1

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,11 @@ try {
232232
$lastDetail = '(no ssh output)'
233233
}
234234
$lastDetail = $lastDetail -replace '\r?\n', ' '
235-
throw "Timed out waiting for VM SSH readiness after ${SshReadyTimeoutSeconds}s. Last output: $lastDetail"
235+
$hint = ''
236+
if ($mode -eq 'accelerated-whpx') {
237+
$hint = " Hint: retry with `$env:PCODER_VM_ACCEL_MODE='tcg' to force software virtualization."
238+
}
239+
throw "Timed out waiting for VM SSH readiness after ${SshReadyTimeoutSeconds}s. Last output: $lastDetail$hint"
236240
}
237241

238242
if (-not $SkipToolChecks) {

scripts/runtime/windows/start-vm.ps1

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,11 @@ if (Test-Path $vmPid) {
191191

192192
Stop-CloudInitServer
193193

194+
$requestedAccelMode = if ($env:PCODER_VM_ACCEL_MODE) { "$($env:PCODER_VM_ACCEL_MODE)".ToLowerInvariant() } else { 'auto' }
195+
if ($requestedAccelMode -ne 'auto' -and $requestedAccelMode -ne 'whpx' -and $requestedAccelMode -ne 'tcg') {
196+
throw "Invalid PCODER_VM_ACCEL_MODE '$requestedAccelMode'. Expected one of: auto, whpx, tcg."
197+
}
198+
194199
$requestedPortRaw = $env:PCODER_VM_SSH_PORT
195200
$sshPort = 0
196201
if ($requestedPortRaw) {
@@ -220,6 +225,32 @@ $baseArgs = @(
220225
'-smbios', "type=1,serial=ds=nocloud-net;s=http://10.0.2.2:$cloudInitPort/"
221226
)
222227

228+
if ($requestedAccelMode -eq 'whpx') {
229+
$forcedWhpx = Start-QemuAttempt -Mode 'accelerated-whpx' -AccelerationArgs @('-accel', 'whpx') -BaseArgs $baseArgs -QemuBinary $qemuExe -LogPath $vmLog
230+
if ($forcedWhpx) {
231+
$forcedWhpx.Id | Out-File -Encoding ascii -FilePath $vmPid
232+
'accelerated-whpx' | Out-File -Encoding ascii -FilePath $vmMode
233+
$sshPort | Out-File -Encoding ascii -FilePath $sshPortFile
234+
Write-Host "VM started in forced accelerated mode (whpx). PID: $($forcedWhpx.Id). SSH port: $sshPort"
235+
exit 0
236+
}
237+
Stop-CloudInitServer
238+
throw "Failed to start VM in forced whpx mode. Check log: $vmLog"
239+
}
240+
241+
if ($requestedAccelMode -eq 'tcg') {
242+
$forcedTcg = Start-QemuAttempt -Mode 'portable-forced-tcg' -AccelerationArgs @('-accel', 'tcg') -BaseArgs $baseArgs -QemuBinary $qemuExe -LogPath $vmLog
243+
if ($forcedTcg) {
244+
$forcedTcg.Id | Out-File -Encoding ascii -FilePath $vmPid
245+
'portable-fallback-tcg' | Out-File -Encoding ascii -FilePath $vmMode
246+
$sshPort | Out-File -Encoding ascii -FilePath $sshPortFile
247+
Write-Host "VM started in forced portable mode (tcg). PID: $($forcedTcg.Id). SSH port: $sshPort"
248+
exit 0
249+
}
250+
Stop-CloudInitServer
251+
throw "Failed to start VM in forced tcg mode. Check log: $vmLog"
252+
}
253+
223254
$accelerated = Start-QemuAttempt -Mode 'accelerated-whpx' -AccelerationArgs @('-accel', 'whpx') -BaseArgs $baseArgs -QemuBinary $qemuExe -LogPath $vmLog
224255
if ($accelerated) {
225256
$accelerated.Id | Out-File -Encoding ascii -FilePath $vmPid

0 commit comments

Comments
 (0)