Skip to content

Commit 9681eca

Browse files
authored
fix: avoid PTY wait deadlock on wait errors (#165)
## Summary - Fix the PTY wait deadlock by ensuring the child-monitor thread always sets the shared completion slot, including wait failures. - Store wait failures as `Arc<std::io::Error>` in the shared `OnceLock` (`Result<ExitStatus, Arc<std::io::Error>>`) so error state is shareable without synthetic fallback status. - Change `ChildHandle::wait()` to return `anyhow::Result<ExitStatus>` and propagate wait failures through callers. - Update downstream helpers/callers (`pty_terminal_test`, milestone tests, and `vite_task_bin` e2e harness) to consume the new `anyhow::Result` return type. - Keep PTY integration tests strict at `#[timeout(5000)]` and run them without a Windows serialization guard. ## Stable Reproduction (before fix) To deterministically reproduce the real deadlock path (without mocking `child.wait()` return values), I used a temporary Windows-only repro patch that swaps the internal `WinChild` process handle with a non-process handle (`CreateEventW` event handle). This makes `child.wait()` execute the real Windows wait/get-exit-code path and return `Err`. With the old code path (`if let Ok(status) = child.wait() { ... }`), `OnceLock` was never set on error, and `ChildHandle::wait()` blocked forever. Repro command: ```bash cargo xtest --builder cargo-xwin --target x86_64-pc-windows-msvc -p pty_terminal --test terminal -- read_to_end_returns_exit_status_nonzero ``` Observed behavior under repro: - timed out at ~15s with `#[timeout(15000)]` - timed out again at ~60s with `#[timeout(60000)]` That demonstrates timeout-independent deadlock behavior. ## Why this fixes it - Background monitor thread now always sets the shared wait result (`Ok` or `Err`). - `ChildHandle::wait()` cannot block forever on an unset `OnceLock`. - Wait failures are preserved and surfaced as real errors (no synthetic `ExitStatus::with_exit_code(1)`). ## Validation - `cargo clippy -p pty_terminal --all-targets --all-features -- -D warnings` - `cargo test -p pty_terminal -p pty_terminal_test` - `cargo test -p vite_task_bin --test e2e_snapshots --no-run` - `cargo xtest --builder cargo-xwin --target x86_64-pc-windows-msvc -p pty_terminal --test terminal` - `cargo xtest --builder cargo-xwin --target aarch64-pc-windows-msvc -p pty_terminal --test terminal` - `cargo xtest --builder cargo-xwin --target x86_64-pc-windows-msvc -p pty_terminal --test terminal` repeated 40 times (all green, without serial guard)
1 parent 379efcd commit 9681eca

File tree

5 files changed

+29
-20
lines changed

5 files changed

+29
-20
lines changed

crates/pty_terminal/src/terminal.rs

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use portable_pty::{ChildKiller, ExitStatus, MasterPty};
1010

1111
use crate::geo::ScreenSize;
1212

13+
type ChildWaitResult = Result<ExitStatus, Arc<std::io::Error>>;
14+
1315
/// The read half of a PTY connection. Implements [`Read`].
1416
///
1517
/// Reading feeds data through an internal vt100 parser (shared with [`PtyWriter`]),
@@ -32,7 +34,7 @@ pub struct PtyWriter {
3234
/// A cloneable handle to a child process spawned in a PTY.
3335
pub struct ChildHandle {
3436
child_killer: Box<dyn ChildKiller + Send + Sync>,
35-
exit_status: Arc<OnceLock<ExitStatus>>,
37+
exit_status: Arc<OnceLock<ChildWaitResult>>,
3638
}
3739

3840
impl Clone for ChildHandle {
@@ -221,9 +223,15 @@ impl PtyWriter {
221223

222224
impl ChildHandle {
223225
/// Blocks until the child process has exited and returns its exit status.
224-
#[must_use]
225-
pub fn wait(&self) -> ExitStatus {
226-
self.exit_status.wait().clone()
226+
///
227+
/// # Errors
228+
///
229+
/// Returns an error if waiting for the child process exit status fails.
230+
pub fn wait(&self) -> anyhow::Result<ExitStatus> {
231+
match self.exit_status.wait() {
232+
Ok(status) => Ok(status.clone()),
233+
Err(error) => Err(anyhow::Error::new(Arc::clone(error))),
234+
}
227235
}
228236

229237
/// Kills the child process.
@@ -263,17 +271,14 @@ impl Terminal {
263271
let child_killer = child.clone_killer();
264272
drop(pty_pair.slave); // Critical: drop slave so EOF is signaled when child exits
265273
let master = pty_pair.master;
266-
let exit_status: Arc<OnceLock<ExitStatus>> = Arc::new(OnceLock::new());
274+
let exit_status: Arc<OnceLock<ChildWaitResult>> = Arc::new(OnceLock::new());
267275

268276
// Background thread: wait for child to exit, set exit status, then close writer to trigger EOF
269277
thread::spawn({
270278
let writer = Arc::clone(&writer);
271279
let exit_status = Arc::clone(&exit_status);
272280
move || {
273-
// Wait for child and set exit status
274-
if let Ok(status) = child.wait() {
275-
let _ = exit_status.set(status);
276-
}
281+
let _ = exit_status.set(child.wait().map_err(Arc::new));
277282
// Close writer to signal EOF to the reader
278283
*writer.lock().unwrap() = None;
279284
}

crates/pty_terminal/tests/terminal.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ fn is_terminal() {
2020
Terminal::spawn(ScreenSize { rows: 80, cols: 80 }, cmd).unwrap();
2121
let mut discard = Vec::new();
2222
pty_reader.read_to_end(&mut discard).unwrap();
23-
let _ = child_handle.wait();
23+
let _ = child_handle.wait().unwrap();
2424
let output = pty_reader.screen_contents();
2525
assert_eq!(output.trim(), "true true true");
2626
}
@@ -47,7 +47,7 @@ fn write_basic_echo() {
4747

4848
let mut discard = Vec::new();
4949
pty_reader.read_to_end(&mut discard).unwrap();
50-
let _ = child_handle.wait();
50+
let _ = child_handle.wait().unwrap();
5151

5252
let output = pty_reader.screen_contents();
5353
// PTY echoes the input, so we see "hello world\nhello world"
@@ -98,7 +98,7 @@ fn write_multiple_lines() {
9898

9999
let mut discard = Vec::new();
100100
pty_reader.read_to_end(&mut discard).unwrap();
101-
let _ = child_handle.wait();
101+
let _ = child_handle.wait().unwrap();
102102

103103
let output = pty_reader.screen_contents();
104104
// PTY echoes input, then child prints "Echo: {line}\n" for each
@@ -119,7 +119,7 @@ fn write_after_exit() {
119119
// Read all output - this blocks until child exits and EOF is reached
120120
let mut discard = Vec::new();
121121
pty_reader.read_to_end(&mut discard).unwrap();
122-
let _ = child_handle.wait();
122+
let _ = child_handle.wait().unwrap();
123123

124124
// Writer shutdown is done by a background thread after child wait returns.
125125
// Poll briefly for the writer state to flip to closed before asserting write failure.
@@ -165,7 +165,7 @@ fn write_interactive_prompt() {
165165

166166
let mut discard = Vec::new();
167167
pty_reader.read_to_end(&mut discard).unwrap();
168-
let _ = child_handle.wait();
168+
let _ = child_handle.wait().unwrap();
169169

170170
let output = pty_reader.screen_contents();
171171
assert_eq!(output.trim(), "Name: Alice\nHello, Alice");
@@ -346,7 +346,7 @@ fn read_to_end_returns_exit_status_success() {
346346
Terminal::spawn(ScreenSize { rows: 80, cols: 80 }, cmd).unwrap();
347347
let mut discard = Vec::new();
348348
pty_reader.read_to_end(&mut discard).unwrap();
349-
let status = child_handle.wait();
349+
let status = child_handle.wait().unwrap();
350350
assert!(status.success());
351351
assert_eq!(status.exit_code(), 0);
352352
}
@@ -362,7 +362,7 @@ fn read_to_end_returns_exit_status_nonzero() {
362362
Terminal::spawn(ScreenSize { rows: 80, cols: 80 }, cmd).unwrap();
363363
let mut discard = Vec::new();
364364
pty_reader.read_to_end(&mut discard).unwrap();
365-
let status = child_handle.wait();
365+
let status = child_handle.wait().unwrap();
366366
assert!(!status.success());
367367
assert_eq!(status.exit_code(), 42);
368368
}

crates/pty_terminal_test/src/lib.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,14 @@ impl Reader {
9191

9292
/// Reads all remaining PTY output until the child exits, then returns the exit status.
9393
///
94+
/// # Errors
95+
///
96+
/// Returns an error if waiting for the child process exit status fails.
97+
///
9498
/// # Panics
9599
///
96100
/// Panics if reading from the PTY fails.
97-
pub fn wait_for_exit(&mut self) -> ExitStatus {
101+
pub fn wait_for_exit(&mut self) -> anyhow::Result<ExitStatus> {
98102
let mut discard = Vec::new();
99103
self.pty.read_to_end(&mut discard).expect("PTY read_to_end failed");
100104
self.child_handle.wait()

crates/pty_terminal_test/tests/milestone.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ fn milestone_raw_mode_keystrokes() {
6767
// Write 'q' to quit and wait for the child to exit
6868
writer.write_all(b"q").unwrap();
6969
writer.flush().unwrap();
70-
let status = reader.wait_for_exit();
70+
let status = reader.wait_for_exit().unwrap();
7171
assert!(status.success());
7272
}
7373

@@ -122,6 +122,6 @@ fn milestone_does_not_pollute_screen() {
122122

123123
writer.write_all(b"q").unwrap();
124124
writer.flush().unwrap();
125-
let status = reader.wait_for_exit();
125+
let status = reader.wait_for_exit().unwrap();
126126
assert!(status.success());
127127
}

crates/vite_task_bin/tests/e2e_snapshots/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ fn run_case_inner(tmpdir: &AbsolutePath, fixture_path: &std::path::Path, fixture
391391
}
392392
}
393393

394-
let status = terminal.reader.wait_for_exit();
394+
let status = terminal.reader.wait_for_exit().unwrap();
395395
let screen = terminal.reader.screen_contents();
396396

397397
{

0 commit comments

Comments
 (0)