diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 000000000..28f2963e9
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,3 @@
+{
+ "$schema": "https://json.schemastore.org/claude-code-settings.json"
+}
\ No newline at end of file
diff --git a/.claude/skills/pty-tui-testing/SKILL.md b/.claude/skills/pty-tui-testing/SKILL.md
new file mode 100644
index 000000000..6ec80feb6
--- /dev/null
+++ b/.claude/skills/pty-tui-testing/SKILL.md
@@ -0,0 +1,345 @@
+---
+name: pty-tui-testing
+description: Use this skill to build features that require testing the TUI with the PTY-based integration test harness.
+---
+
+
+*CRITICAL* Add the following steps to your Todo list using TodoWrite:
+
+From this point on, ignore any existing tests until you have a working example validated through a new PTY-based integration test.
+1. Identify the TUI behavior to test (startup, input handling, agent responses, screen updates, etc.)
+2. Write a new Rust integration test in `codex-rs/tui-integration-tests/tests/`
+ - Use `TuiSession::spawn()` or `TuiSession::spawn_with_config()` to launch codex in a PTY
+ - Configure terminal dimensions (rows, cols) appropriate for the test
+ - Configure `SessionConfig` with mock agent behavior if needed
+3. Follow these steps in a loop until the test passes:
+ - Add debug logging using `DEBUG_TUI_PTY=1` environment variable
+ - Run the specific test: `cargo test test_name -- --nocapture`
+ - Examine the PTY polling behavior, screen contents, and timing
+ - Update the test expectations or fix the TUI code
+If you get stuck: did you add DEBUG_TUI_PTY=1 logging?
+4. Review snapshots if using `insta::assert_snapshot!()` and accept with `cargo insta review`
+5. Run all TUI integration tests to ensure nothing broke: `cargo test -p tui-integration-tests`
+
+
+# PTY-Based TUI Integration Testing
+
+To test the Codex terminal user interface, write Rust integration tests using the `tui-integration-tests` harness. This framework spawns the real `codex` binary in a pseudo-terminal (PTY) and validates terminal output through screen content assertions.
+
+## Core Workflow
+
+**Test Structure:**
+
+All tests follow this pattern:
+1. Spawn a TUI session in a PTY with configured dimensions
+2. Wait for expected screen content to appear
+3. Send keyboard input to simulate user interactions
+4. Poll and validate screen state changes
+5. Optionally capture snapshots for regression testing
+
+**TUI Session Lifecycle:**
+
+```rust
+use tui_integration_tests::{TuiSession, SessionConfig, Key};
+use std::time::Duration;
+
+const TIMEOUT: Duration = Duration::from_secs(5);
+
+#[test]
+fn test_tui_behavior() {
+ // Spawn codex in a 24x80 terminal with default config
+ let mut session = TuiSession::spawn(24, 80)
+ .expect("Failed to spawn codex");
+
+ // Wait for welcome message to appear
+ session.wait_for_text("To get started", TIMEOUT)
+ .expect("Welcome message did not appear");
+
+ // Simulate user typing
+ session.send_str("Hello").unwrap();
+
+ // Submit with Enter key
+ session.send_key(Key::Enter).unwrap();
+
+ // Wait for agent response
+ session.wait_for_text("Test message", TIMEOUT)
+ .expect("Agent response did not appear");
+
+ // Assert final screen state
+ let contents = session.screen_contents();
+ assert!(contents.contains("expected text"));
+}
+```
+
+**Session Configuration:**
+
+Use `SessionConfig` to control test environment:
+
+```rust
+use tui_integration_tests::{TuiSession, SessionConfig, ApprovalPolicy};
+
+let config = SessionConfig::new()
+ .with_mock_response("Custom agent response")
+ .with_approval_policy(ApprovalPolicy::Never)
+ .with_agent_env("MOCK_AGENT_DELAY_MS", "100");
+
+let mut session = TuiSession::spawn_with_config(40, 120, config)
+ .expect("Failed to spawn codex");
+```
+
+## Key Testing Patterns
+
+**Pattern 1: Startup and Initialization**
+
+Test that the TUI displays correct welcome screens and skips onboarding appropriately:
+
+```rust
+#[test]
+fn test_startup_shows_welcome() {
+ let mut session = TuiSession::spawn_with_config(
+ 24, 80,
+ SessionConfig::default()
+ .without_approval_policy()
+ .without_sandbox(),
+ ).expect("Failed to spawn codex");
+
+ session.wait_for_text("Welcome", TIMEOUT)
+ .expect("Welcome did not appear");
+
+ let contents = session.screen_contents();
+ assert!(contents.contains("Welcome to Codex"));
+ assert!(contents.contains("/tmp/"));
+}
+```
+
+**Pattern 2: Input Handling and Screen Updates**
+
+Test keyboard input, character echo, and text editing:
+
+```rust
+#[test]
+fn test_typing_and_backspace() {
+ let mut session = TuiSession::spawn(24, 80).unwrap();
+ session.wait_for_text("›", TIMEOUT).unwrap();
+
+ // Type text
+ session.send_str("Hello World").unwrap();
+ session.wait_for_text("Hello World", TIMEOUT).unwrap();
+
+ // Backspace to remove "World"
+ for _ in 0..5 {
+ session.send_key(Key::Backspace).unwrap();
+ }
+ std::thread::sleep(Duration::from_millis(100));
+
+ // Verify deletion
+ let contents = session.screen_contents();
+ assert!(contents.contains("Hello"));
+ assert!(!contents.contains("World"));
+}
+```
+
+**Pattern 3: Agent Interaction and Streaming**
+
+Test agent responses with custom mock behavior:
+
+```rust
+#[test]
+fn test_agent_response_streaming() {
+ let config = SessionConfig::new()
+ .with_mock_response("Response line 1\nResponse line 2");
+
+ let mut session = TuiSession::spawn_with_config(24, 80, config).unwrap();
+ session.wait_for_text("›", TIMEOUT).unwrap();
+
+ session.send_str("test prompt").unwrap();
+ session.send_key(Key::Enter).unwrap();
+
+ // Wait for both lines to stream in
+ session.wait_for_text("Response line 1", TIMEOUT).unwrap();
+ session.wait_for_text("Response line 2", TIMEOUT).unwrap();
+}
+```
+
+**Pattern 4: Cancellation and Control Flow**
+
+Test Escape key cancellation and Ctrl-C behavior:
+
+```rust
+#[test]
+fn test_cancel_streaming_with_escape() {
+ let config = SessionConfig::new()
+ .with_stream_until_cancel();
+
+ let mut session = TuiSession::spawn_with_config(24, 80, config).unwrap();
+ session.wait_for_text("›", TIMEOUT).unwrap();
+
+ session.send_str("test").unwrap();
+ session.send_key(Key::Enter).unwrap();
+
+ // Wait for streaming to start
+ session.wait_for_text("streaming", TIMEOUT).unwrap();
+
+ // Cancel with Escape
+ session.send_key(Key::Escape).unwrap();
+
+ // Verify cancellation message appears
+ session.wait_for_text("Cancelled", TIMEOUT).unwrap();
+}
+```
+
+**Pattern 5: Snapshot Testing**
+
+Capture and validate complete screen state:
+
+```rust
+use insta::assert_snapshot;
+
+#[test]
+fn test_screen_layout() {
+ let mut session = TuiSession::spawn(40, 120).unwrap();
+ session.wait_for_text("›", TIMEOUT).unwrap();
+
+ session.send_str("test prompt").unwrap();
+ session.send_key(Key::Enter).unwrap();
+ session.wait_for_text("Test message", TIMEOUT).unwrap();
+
+ // Capture full screen state for regression testing
+ assert_snapshot!("prompt_submitted", session.screen_contents());
+}
+```
+
+Review snapshots with `cargo insta review` after first run.
+
+**Normalizing Dynamic Content in Snapshots**
+
+When tests include dynamic content (temp paths, timestamps, random prompts), normalize before snapshotting to prevent spurious failures:
+
+```rust
+/// Normalize dynamic content in screen output for snapshot testing
+fn normalize_for_snapshot(contents: String) -> String {
+ let mut normalized = contents;
+
+ // Replace /tmp/.tmpXXXXXX with placeholder
+ if let Some(start) = normalized.find("/tmp/.tmp") {
+ if let Some(end) = normalized[start..].find(char::is_whitespace) {
+ normalized.replace_range(start..start + end, "[TMP_DIR]");
+ }
+ }
+
+ // Replace dynamic prompt text on lines starting with ›
+ let lines: Vec = normalized
+ .lines()
+ .map(|line| {
+ if line.trim_start().starts_with("›") && !line.contains("for shortcuts") {
+ "› [DEFAULT_PROMPT]".to_string()
+ } else {
+ line.to_string()
+ }
+ })
+ .collect();
+
+ lines.join("\n")
+}
+
+#[test]
+fn test_with_normalized_snapshot() {
+ let mut session = TuiSession::spawn(24, 80).unwrap();
+ session.wait_for_text("Welcome", TIMEOUT).unwrap();
+
+ // Normalize before asserting to handle dynamic temp paths
+ assert_snapshot!(
+ "welcome_screen",
+ normalize_for_snapshot(session.screen_contents())
+ );
+}
+```
+
+**Common Dynamic Content to Normalize:**
+
+- Temp directory paths: `/tmp/.tmpXXXXXX` → `[TMP_DIR]`
+- Random default prompts: `› Improve documentation...` → `› [DEFAULT_PROMPT]`
+- Timestamps: `2025-01-15 10:30:45` → `[TIMESTAMP]`
+- Session IDs, PIDs, or other ephemeral identifiers
+
+This pattern ensures snapshots focus on UI structure and static content rather than runtime-specific values. See `@/codex-rs/tui-integration-tests/tests/startup.rs` for reference implementation.
+
+## Configuration Options
+
+**SessionConfig Methods:**
+
+| Method | Purpose |
+|--------|---------|
+| `with_mock_response(text)` | Set custom agent response instead of defaults |
+| `with_stream_until_cancel()` | Make agent stream continuously until Escape pressed |
+| `with_agent_env(key, val)` | Pass environment variables to mock agent |
+| `with_approval_policy(policy)` | Control approval prompts (Untrusted, OnFailure, OnRequest, Never) |
+| `without_approval_policy()` | Remove approval policy to test trust screens |
+| `with_sandbox(sandbox)` | Set sandbox level (ReadOnly, WorkspaceWrite, DangerFullAccess) |
+| `without_sandbox()` | Remove sandbox to test trust screens |
+
+## TuiSession API
+
+**Spawning:**
+
+- `TuiSession::spawn(rows, cols)` - Launch with defaults in temp directory
+- `TuiSession::spawn_with_config(rows, cols, config)` - Launch with custom config
+
+**Input:**
+
+- `send_str(text)` - Simulate typing a string
+- `send_key(key)` - Send a keyboard event (Enter, Escape, Backspace, Arrow keys, Ctrl+key)
+
+**Polling and Waiting:**
+
+- `wait_for_text(needle, timeout)` - Poll until text appears on screen
+- `wait_for(predicate, timeout)` - Poll until custom condition matches
+- `poll()` - Manually read available output and update screen state
+- `screen_contents()` - Get current terminal screen as string
+
+**Available Keys:**
+
+- `Key::Enter`, `Key::Escape`, `Key::Backspace`
+- `Key::Up`, `Key::Down`, `Key::Left`, `Key::Right`
+- `Key::Ctrl('c')`, `Key::Ctrl('d')`, etc.
+
+## Debugging
+
+**Enable Debug Logging:**
+
+```bash
+DEBUG_TUI_PTY=1 cargo test test_name -- --nocapture
+```
+
+This shows:
+- Each `poll()` call and duration
+- Read results (bytes read, WouldBlock, EOF)
+- `wait_for()` loop iterations and elapsed time
+- Screen contents preview at each iteration
+
+**Common Issues:**
+
+1. **Test times out waiting for text**
+ - Add `DEBUG_TUI_PTY=1` to see polling behavior
+ - Check if text appears but with different formatting/spacing
+ - Verify mock agent is configured correctly
+ - Increase timeout for slower operations
+
+2. **Snapshot differences**
+ - Run `cargo insta review` to inspect changes
+ - Check for timing-dependent content (e.g., timestamps)
+ - Verify terminal dimensions match snapshot expectations
+
+3. **PTY blocking issues**
+ - Poll returns immediately even when no data (non-blocking mode)
+ - Use `wait_for()` which polls in a loop with 50ms sleep
+ - Don't rely on `poll()` alone for synchronization
+
+4. **Control sequence artifacts**
+ - PTY harness intercepts cursor position queries automatically
+ - If seeing escape sequences in output, may need additional interception
+ - Check `intercept_control_sequences()` in lib.rs
+
+## Testing Philosophy
+
+These are black-box integration tests that exercise the full executable stack (CLI → TUI → Core → ACP). Each test runs in isolation with deterministic mock agent responses, validating external behavior through screen content assertions.
diff --git a/.claude/skills/pty-tui-testing/SKILL.md.bak b/.claude/skills/pty-tui-testing/SKILL.md.bak
new file mode 100644
index 000000000..b9afc03d4
--- /dev/null
+++ b/.claude/skills/pty-tui-testing/SKILL.md.bak
@@ -0,0 +1,244 @@
+---
+name: pty-tui-testing
+description: Use this skill to build features that require testing the TUI with the PTY-based integration test harness.
+---
+
+
+*CRITICAL* Add the following steps to your Todo list using TodoWrite:
+
+From this point on, ignore any existing tests until you have a working example validated through a new test file.
+1. Ensure the codex binary is built with `cargo build` from the top directory of the cargo workspace.
+2. Write and run a PTY-based integration test that lets you interact with the TUI.
+3. Follow these steps in a loop until the feature works correctly:
+ - Add eprintln! debug logs to the TUI code. You *MUST* do this on every loop.
+ - Run the test with `cargo test --test -- --nocapture` to see debug output.
+ - Observe the terminal screen contents and timing in test output.
+ - Update the test to exercise the next scenario.
+If you get stuck: did you add debug logs? Are you checking the actual screen contents?
+4. Clean up debug logs when the feature is working.
+5. Update snapshots with `cargo insta review` if using snapshot testing.
+6. Make sure all integration tests pass with `cargo test` in the tui-integration-tests directory.
+
+
+# PTY-Based TUI Integration Testing
+
+To test terminal user interfaces, write Rust integration tests using the `tui-integration-tests` crate at `@/codex-rs/tui-integration-tests`. Your testing should drive the real binary in a pseudo-terminal to be as close to 'real' as possible.
+
+## Test Harness Overview
+
+The `TuiSession` API provides:
+- `spawn(rows, cols)` - Launch codex in a PTY with default config (temp directory, mock agent)
+- `spawn_with_config(rows, cols, config)` - Launch with custom configuration (like flags for the executable)
+- `send_str(text)` - Type text into the terminal
+- `send_key(key)` - Send keyboard events (Enter, Escape, Ctrl-C, Up/Down arrows)
+- `wait_for_text(needle, timeout)` - Poll until text appears on screen
+- `wait_for(predicate, timeout)` - Poll until custom condition matches
+- `screen_contents()` - Get current terminal screen as string
+
+All tests automatically run in isolated temporary directories under `/tmp/` with a sample `hello.py` file.
+
+## Basic Test Example
+
+Create a test file in `@/codex-rs/tui-integration-tests/tests/`:
+
+```rust
+use std::time::Duration;
+use tui_integration_tests::{SessionConfig, TuiSession, Key};
+
+const TIMEOUT: Duration = Duration::from_secs(5);
+
+#[test]
+fn test_user_can_type_prompt() {
+ // Spawn with default config (24x80 terminal, OnFailure approval policy)
+ let mut session = TuiSession::spawn(24, 80)
+ .expect("Failed to spawn codex");
+
+ // Wait for the prompt indicator to appear
+ session
+ .wait_for_text("›", TIMEOUT)
+ .expect("Prompt did not appear");
+
+ // Type a message
+ session.send_str("help me write a function").unwrap();
+
+ // Send Enter key
+ session.send_key(Key::Enter).unwrap();
+
+ // Wait for mock agent response
+ session
+ .wait_for_text("I can help", TIMEOUT)
+ .expect("Response did not appear");
+}
+```
+
+## Custom Configuration
+
+Use `SessionConfig` to customize the test environment:
+
+```rust
+let config = SessionConfig::default()
+ .with_mock_response("Custom mock agent response")
+ .with_stream_until_cancel() // Stream until Escape is pressed
+ .without_approval_policy(); // Show trust screen for testing
+
+let mut session = TuiSession::spawn_with_config(40, 120, config)
+ .expect("Failed to spawn");
+```
+
+## Keyboard Input
+
+Use the `Key` enum for special keys:
+
+```rust
+session.send_key(Key::Enter).unwrap();
+session.send_key(Key::Escape).unwrap();
+session.send_key(Key::Ctrl('c')).unwrap();
+session.send_key(Key::Up).unwrap();
+session.send_key(Key::Down).unwrap();
+session.send_key(Key::Backspace).unwrap();
+```
+
+## Polling and Waiting
+
+The polling mechanism reads from PTY in a loop:
+
+```rust
+// Wait for specific text (polls every 50ms)
+session.wait_for_text("Welcome", Duration::from_secs(5))?;
+
+// Wait for custom condition
+session.wait_for(
+ |screen| screen.contains("Ready") && screen.lines().count() > 5,
+ Duration::from_secs(10)
+)?;
+
+// Get current screen state
+let contents = session.screen_contents();
+assert!(contents.contains("expected text"));
+```
+
+## Snapshot Testing
+
+Use `insta` for regression testing of terminal output:
+
+```rust
+use insta::assert_snapshot;
+
+#[test]
+fn test_welcome_screen() {
+ let mut session = TuiSession::spawn(24, 80).unwrap();
+ session.wait_for_text("Welcome", TIMEOUT).unwrap();
+
+ // Snapshot the screen contents
+ assert_snapshot!("welcome_screen", session.screen_contents());
+}
+```
+
+Review and update snapshots:
+```bash
+cargo insta review
+```
+
+## Mock Agent Control
+
+Configure mock agent behavior via `SessionConfig`:
+
+```rust
+let config = SessionConfig::default()
+ .with_mock_response("I'll help with that")
+ .with_agent_env("MOCK_AGENT_DELAY_MS", "100")
+ .with_agent_env("MOCK_AGENT_STREAM_UNTIL_CANCEL", "1");
+```
+
+Common mock agent environment variables:
+- `MOCK_AGENT_RESPONSE` - Custom response text
+- `MOCK_AGENT_DELAY_MS` - Simulate streaming delay
+- `MOCK_AGENT_STREAM_UNTIL_CANCEL` - Stream until Escape pressed
+
+See `@/codex-rs/mock-acp-agent/docs.md` for full list.
+
+## Running Tests
+
+```bash
+# Run all integration tests
+cd codex-rs/tui-integration-tests
+cargo test
+
+# Run specific test with output
+cargo test --test startup test_startup_shows_welcome -- --nocapture
+
+# Run with debug logging
+cargo test --test startup -- --nocapture 2>&1 | grep DEBUG
+```
+
+## Debugging Tips
+
+1. **Add debug output to TUI code:**
+ ```rust
+ eprintln!("[DEBUG] Current state: {:?}", self.state);
+ ```
+
+2. **Check screen contents in tests:**
+ ```rust
+ eprintln!("Screen: {}", session.screen_contents());
+ ```
+
+3. **Use longer timeouts when debugging:**
+ ```rust
+ const DEBUG_TIMEOUT: Duration = Duration::from_secs(30);
+ ```
+
+4. **Verify terminal dimensions:**
+ ```rust
+ let contents = session.screen_contents();
+ eprintln!("Lines: {}", contents.lines().count());
+ ```
+
+## Test Isolation
+
+- Each test runs in a unique `/tmp/` directory
+- Temp directory contains a `hello.py` file with `print('Hello, World!')`
+- Temp directory is automatically cleaned up when `TuiSession` is dropped
+- Tests are completely isolated from user's home directory and each other
+
+## Architecture Reminder
+
+```
+Test Code (Rust)
+ ↓
+TuiSession (portable_pty)
+ ↓
+PTY Master ←→ PTY Slave
+ ↓ ↓
+VT100 Parser codex binary (--model mock-acp-agent)
+ ↓ ↓
+Screen State ACP JSON-RPC over stdin/stdout
+ ↓
+ mock_acp_agent (env var configured)
+```
+
+## Common Pitfalls
+
+- **Not waiting for text before assertions:** Always use `wait_for_text()` before checking screen contents
+- **Timing issues:** PTY operations are asynchronous; use polling with timeouts
+- **Screen dimensions:** Ensure test terminal size matches expected layout (default 24x80)
+- **NO_COLOR=1:** Color codes are disabled by default for test determinism
+- **Forgot to build:** Tests run the real binary, so run `cargo build` first
+
+## Anti-Patterns
+
+DO NOT:
+- ❌ Skip waiting and immediately check screen contents
+- ❌ Use `thread::sleep()` instead of `wait_for_text()`
+- ❌ Test with hardcoded absolute paths
+- ❌ Ignore the screen contents in error messages
+- ❌ Add test-only code to production TUI components
+
+DO:
+- ✅ Always poll/wait before assertions
+- ✅ Use relative timeouts based on operation complexity
+- ✅ Check actual terminal output, not internal state
+- ✅ Add `eprintln!` debug logs to understand timing
+- ✅ Use snapshot testing for complex screen layouts
+
+If tests are flaky, did you wait long enough? Did you check what's actually on the screen?
diff --git a/.gitignore b/.gitignore
index 70d6dbcbf..7479be233 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,7 +29,10 @@ result
# cli tools
CLAUDE.md
-.claude/
+# .claude/
+.claude/settings.local.json
+.claude/agents/
+.claude/commands/
AGENTS.override.md
# caches
diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index 51f73ea53..f606f80a9 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -1582,7 +1582,7 @@ dependencies = [
"unicode-segmentation",
"unicode-width 0.2.1",
"url",
- "vt100",
+ "vt100 0.16.2",
]
[[package]]
@@ -1620,7 +1620,7 @@ name = "codex-utils-pty"
version = "0.0.0"
dependencies = [
"anyhow",
- "portable-pty",
+ "portable-pty 0.9.0",
"tokio",
]
@@ -3396,6 +3396,15 @@ dependencies = [
"libc",
]
+[[package]]
+name = "ioctl-rs"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f7970510895cee30b3e9128319f2cefd4bde883a39f38baa279567ba3a7eb97d"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "ipnet"
version = "2.11.0"
@@ -3915,6 +3924,31 @@ dependencies = [
"smallvec",
]
+[[package]]
+name = "nix"
+version = "0.25.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4"
+dependencies = [
+ "autocfg",
+ "bitflags 1.3.2",
+ "cfg-if",
+ "libc",
+ "memoffset 0.6.5",
+ "pin-utils",
+]
+
+[[package]]
+name = "nix"
+version = "0.27.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"
+dependencies = [
+ "bitflags 2.10.0",
+ "cfg-if",
+ "libc",
+]
+
[[package]]
name = "nix"
version = "0.28.0"
@@ -4607,6 +4641,27 @@ dependencies = [
"portable-atomic",
]
+[[package]]
+name = "portable-pty"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "806ee80c2a03dbe1a9fb9534f8d19e4c0546b790cde8fd1fea9d6390644cb0be"
+dependencies = [
+ "anyhow",
+ "bitflags 1.3.2",
+ "downcast-rs",
+ "filedescriptor",
+ "lazy_static",
+ "libc",
+ "log",
+ "nix 0.25.1",
+ "serial",
+ "shared_library",
+ "shell-words",
+ "winapi",
+ "winreg",
+]
+
[[package]]
name = "portable-pty"
version = "0.9.0"
@@ -5762,6 +5817,48 @@ dependencies = [
"syn 2.0.104",
]
+[[package]]
+name = "serial"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1237a96570fc377c13baa1b88c7589ab66edced652e43ffb17088f003db3e86"
+dependencies = [
+ "serial-core",
+ "serial-unix",
+ "serial-windows",
+]
+
+[[package]]
+name = "serial-core"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f46209b345401737ae2125fe5b19a77acce90cd53e1658cda928e4fe9a64581"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "serial-unix"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f03fbca4c9d866e24a459cbca71283f545a37f8e3e002ad8c70593871453cab7"
+dependencies = [
+ "ioctl-rs",
+ "libc",
+ "serial-core",
+ "termios",
+]
+
+[[package]]
+name = "serial-windows"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "15c6d3b776267a75d31bbdfd5d36c0ca051251caafc285827052bc53bcdc8162"
+dependencies = [
+ "libc",
+ "serial-core",
+]
+
[[package]]
name = "serial2"
version = "0.2.31"
@@ -6264,6 +6361,15 @@ dependencies = [
"windows-sys 0.59.0",
]
+[[package]]
+name = "termios"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5d9cf598a6d7ce700a4e6a9199da127e6819a61e64b68609683cc9a01b5683a"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "termtree"
version = "0.5.1"
@@ -6911,6 +7017,19 @@ dependencies = [
"termcolor",
]
+[[package]]
+name = "tui-integration-tests"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "insta",
+ "libc",
+ "nix 0.27.1",
+ "portable-pty 0.8.1",
+ "tempfile",
+ "vt100 0.15.2",
+]
+
[[package]]
name = "typenum"
version = "1.18.0"
@@ -7070,6 +7189,18 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+[[package]]
+name = "vt100"
+version = "0.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84cd863bf0db7e392ba3bd04994be3473491b31e66340672af5d11943c6274de"
+dependencies = [
+ "itoa",
+ "log",
+ "unicode-width 0.1.14",
+ "vte 0.11.1",
+]
+
[[package]]
name = "vt100"
version = "0.16.2"
@@ -7078,7 +7209,18 @@ checksum = "054ff75fb8fa83e609e685106df4faeffdf3a735d3c74ebce97ec557d5d36fd9"
dependencies = [
"itoa",
"unicode-width 0.2.1",
- "vte",
+ "vte 0.15.0",
+]
+
+[[package]]
+name = "vte"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5022b5fbf9407086c180e9557be968742d839e68346af7792b8592489732197"
+dependencies = [
+ "arrayvec",
+ "utf8parse",
+ "vte_generate_state_changes",
]
[[package]]
@@ -7091,6 +7233,16 @@ dependencies = [
"memchr",
]
+[[package]]
+name = "vte_generate_state_changes"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e369bee1b05d510a7b4ed645f5faa90619e05437111783ea5848f28d97d3c2e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+]
+
[[package]]
name = "wait-timeout"
version = "0.2.1"
diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml
index 6b9da6df9..c66b3549f 100644
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -43,6 +43,7 @@ members = [
"utils/tokenizer",
"acp",
"mock-acp-agent",
+ "tui-integration-tests",
]
resolver = "2"
diff --git a/codex-rs/acp/docs.md b/codex-rs/acp/docs.md
index e12bc854c..9be560dc8 100644
--- a/codex-rs/acp/docs.md
+++ b/codex-rs/acp/docs.md
@@ -100,5 +100,6 @@ Each `AcpModelClient::stream()` call spawns a fresh agent process:
- Thin slice integration tests in `@/codex-rs/acp/tests/thin_slice.rs` verify end-to-end streaming with mock agent
- Unit tests in `agent.rs` use shell commands to test stderr capture, buffer overflow, and line truncation
- Integration tests in `@/codex-rs/acp/tests/integration.rs` test with actual mock-acp-agent binary
+- TUI black-box tests in `@/codex-rs/tui-integration-tests` exercise full application flow including ACP protocol
Created and maintained by Nori.
diff --git a/codex-rs/docs.md b/codex-rs/docs.md
index 8ec4a3e97..49f4d67af 100644
--- a/codex-rs/docs.md
+++ b/codex-rs/docs.md
@@ -28,6 +28,7 @@ The workspace is organized into crate categories:
| Patch System | `apply-patch` | Structured file modification |
| MCP | `mcp-types`, `rmcp-client` | Model Context Protocol support |
| ACP | `acp`, `mock-acp-agent` | Agent Context Protocol support |
+| Testing | `tui-integration-tests` | PTY-based black-box TUI testing |
| Utilities | `utils/*`, `async-utils`, `ansi-escape`, `feedback` | Helper libraries |
Key architectural patterns:
diff --git a/codex-rs/mock-acp-agent/docs.md b/codex-rs/mock-acp-agent/docs.md
index a444017c3..1fc851216 100644
--- a/codex-rs/mock-acp-agent/docs.md
+++ b/codex-rs/mock-acp-agent/docs.md
@@ -11,6 +11,7 @@ Path: @/codex-rs/mock-acp-agent
### How it fits into the larger codebase
- Used by integration tests in `@/codex-rs/acp/tests/integration.rs` to test ACP protocol flow
+- Used by TUI black-box tests in `@/codex-rs/tui-integration-tests` as the `--model mock-acp-agent` backend
- Enables end-to-end testing of `AgentProcess` without requiring real AI providers
- Produces diagnostic stderr output that tests use to verify stderr capture functionality
- Not shipped in production; exists solely for development and CI testing
@@ -51,6 +52,8 @@ Path: @/codex-rs/mock-acp-agent
| `MOCK_AGENT_REQUEST_FILE` | Reads file path via client during prompt |
| `MOCK_AGENT_STREAM_UNTIL_CANCEL` | Continuously streams until cancel notification |
| `MOCK_AGENT_STDERR_COUNT` | Emits N lines of `MOCK_AGENT_STDERR_LINE:{i}` to stderr during prompt |
+| `MOCK_AGENT_RESPONSE` | Custom response text instead of default "Test message 1/2" (added for TUI testing) |
+| `MOCK_AGENT_DELAY_MS` | Millisecond delay before completing stream to simulate realistic streaming (added for TUI testing) |
**Stderr Output for Testing:**
diff --git a/codex-rs/mock-acp-agent/src/main.rs b/codex-rs/mock-acp-agent/src/main.rs
index d919123da..9d44a6729 100644
--- a/codex-rs/mock-acp-agent/src/main.rs
+++ b/codex-rs/mock-acp-agent/src/main.rs
@@ -164,11 +164,24 @@ impl acp::Agent for MockAgent {
}
}
- self.send_text_chunk(session_id.clone(), "Test message 1")
- .await?;
+ // Support custom response text for TUI testing
+ if let Ok(response) = std::env::var("MOCK_AGENT_RESPONSE") {
+ self.send_text_chunk(session_id.clone(), &response).await?;
+ } else {
+ // Default behavior
+ self.send_text_chunk(session_id.clone(), "Test message 1")
+ .await?;
+
+ self.send_text_chunk(session_id.clone(), "Test message 2")
+ .await?;
+ }
- self.send_text_chunk(session_id.clone(), "Test message 2")
- .await?;
+ // Support configurable delay for simulating realistic streaming
+ if let Ok(delay_str) = std::env::var("MOCK_AGENT_DELAY_MS")
+ && let Ok(delay) = delay_str.parse::()
+ {
+ sleep(Duration::from_millis(delay)).await;
+ }
if let Ok(file_path) = std::env::var("MOCK_AGENT_REQUEST_FILE") {
eprintln!("Mock agent: requesting file read: {}", file_path);
diff --git a/codex-rs/tui-integration-tests/Cargo.toml b/codex-rs/tui-integration-tests/Cargo.toml
new file mode 100644
index 000000000..879f7f661
--- /dev/null
+++ b/codex-rs/tui-integration-tests/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "tui-integration-tests"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+portable-pty = "0.8"
+vt100 = "0.15"
+insta = "1"
+anyhow = "1"
+tempfile = "3"
+
+[target.'cfg(unix)'.dependencies]
+nix = { version = "0.27", features = ["fs"] }
+libc = "0.2"
+
+[dev-dependencies]
+tempfile = "3"
diff --git a/codex-rs/tui-integration-tests/docs.md b/codex-rs/tui-integration-tests/docs.md
new file mode 100644
index 000000000..292d2f0f3
--- /dev/null
+++ b/codex-rs/tui-integration-tests/docs.md
@@ -0,0 +1,247 @@
+# Noridoc: TUI Integration Tests
+
+Path: @/codex-rs/tui-integration-tests
+
+### Overview
+
+- Black-box integration testing framework for the Codex TUI using PTY (pseudo-terminal) emulation
+- Spawns the real `codex` binary in a simulated terminal and exercises full application stack
+- Uses VT100 parser to capture and validate terminal screen output via snapshot testing
+- Provides programmatic keyboard input simulation and screen state polling
+
+### How it fits into the larger codebase
+
+- Tests the complete integration between `@/codex-rs/cli`, `@/codex-rs/tui`, `@/codex-rs/core`, and `@/codex-rs/acp`
+- Complements unit tests in `@/codex-rs/tui/src/chatwidget.rs` by testing full application behavior
+- Uses `@/codex-rs/mock-acp-agent` as the ACP backend for deterministic test scenarios
+- Validates CLI argument parsing, TUI event loop, ACP protocol communication, and terminal rendering
+- Part of the workspace at `@/codex-rs/Cargo.toml:46`
+
+### Core Implementation
+
+**Test Harness:** `TuiSession` in `@/codex-rs/tui-integration-tests/src/lib.rs`
+
+The main API provides:
+- `spawn(rows, cols)` - Launch codex binary with mock-acp-agent in PTY with automatic temp directory
+- `spawn_with_config(rows, cols, config)` - Launch with custom configuration and automatic temp directory
+- `send_str(text)` - Simulate typing text
+- `send_key(key)` - Send keyboard events (Enter, Escape, Ctrl-C, etc.)
+- `wait_for_text(needle, timeout)` - Poll screen until text appears
+- `wait_for(predicate, timeout)` - Poll screen until condition matches
+- `screen_contents()` - Get current terminal screen as string
+
+**Debugging Aids:**
+
+`TuiSession` implements `Drop` to print screen state when tests panic, making it easier to diagnose PTY timing issues:
+```rust
+impl Drop for TuiSession {
+ fn drop(&mut self) {
+ if std::thread::panicking() {
+ eprintln!("\n=== TUI Screen State at Panic ===");
+ eprintln!("{}", self.screen_contents());
+ eprintln!("=================================\n");
+ }
+ }
+}
+```
+
+The crate exports helper functions for consistent test patterns:
+- `TIMEOUT: Duration` - Standard 5-second timeout constant for use across all tests
+- `normalize_for_snapshot(contents: String) -> String` - Normalizes dynamic content for snapshot testing (see below)
+
+**Automatic Test Isolation:**
+
+All tests run in isolated temporary directories created in `/tmp/`:
+- Each `spawn()` or `spawn_with_config()` call creates a new temp directory
+- Directory contains a `hello.py` file with `print('Hello, World!')`
+- Temp directory is automatically cleaned up when `TuiSession` is dropped
+- Tests no longer run in user's home directory for better isolation
+
+**Architecture:**
+
+```
+Test Code
+ ↓
+TuiSession (portable_pty)
+ ↓
+PTY Master ←→ PTY Slave
+ ↓ ↓
+VT100 Parser codex binary (--model mock-acp-agent)
+ ↓ ↓
+Screen State ACP JSON-RPC over stdin/stdout
+ ↓
+ mock_acp_agent (env var configured)
+```
+
+**Key Input Handling:** `Key` enum in `@/codex-rs/tui-integration-tests/src/keys.rs`
+
+Converts high-level key events to ANSI escape sequences:
+- `Key::Enter` → `\r`
+- `Key::Escape` → `\x1b`
+- `Key::Up/Down/Left/Right` → `\x1b[A/B/D/C`
+- `Key::Backspace` → `\x7f`
+- `Key::Ctrl('c')` → Control character encoding
+
+**Session Configuration:** `SessionConfig` in `@/codex-rs/tui-integration-tests/src/lib.rs`
+
+Builder pattern for test environment setup:
+- `with_mock_response(text)` - Set `MOCK_AGENT_RESPONSE` env var
+- `with_stream_until_cancel()` - Set `MOCK_AGENT_STREAM_UNTIL_CANCEL=1`
+- `with_agent_env(key, value)` - Pass custom env vars to mock agent
+- `with_approval_policy(policy)` - Set approval policy (defaults to `OnFailure`)
+- `without_approval_policy()` - Remove approval policy to test trust screen
+- `cwd` field - Optional working directory (auto-created temp directory if None)
+
+**Approval Policy:** `ApprovalPolicy` enum controls when codex asks for command approval:
+- `Untrusted` - Only run trusted commands without approval
+- `OnFailure` - Ask for approval only when commands fail (default for tests)
+- `OnRequest` - Model decides when to ask for approval
+- `Never` - Never ask for approval
+
+By default, all spawned sessions use `ApprovalPolicy::OnFailure` which:
+- Skips the trust directory approval screen at startup
+- Allows tests to run without manual intervention
+- Sets both `--ask-for-approval on-failure` and `--sandbox workspace-write` flags
+
+### Things to Know
+
+**PTY Input Timing Pattern:**
+
+To avoid race conditions between sending input and the TUI processing it, tests add a 100ms delay after `send_str()` and `send_key()` operations when submitting prompts or navigating UI:
+
+```rust
+session.send_str("testing!!!").unwrap();
+std::thread::sleep(Duration::from_millis(100));
+session.send_key(Key::Enter).unwrap();
+std::thread::sleep(Duration::from_millis(100));
+```
+
+This delay allows the PTY subprocess time to process input and update the display before assertions check for results. The delay is added in test code (not in `TuiSession` methods) for flexibility—not all operations need delays.
+
+**Test Files Structure:**
+
+| File | Coverage |
+|------|----------|
+| `@/codex-rs/tui-integration-tests/tests/startup.rs` | TUI initialization, prompt display, trust screen skipping, snapshot testing for 4 startup scenarios, non-blocking PTY verification |
+| `@/codex-rs/tui-integration-tests/tests/prompt_flow.rs` | Prompt submission and agent responses |
+| `@/codex-rs/tui-integration-tests/tests/input_handling.rs` | Text editing, backspace, Ctrl-C clearing, arrow key navigation with snapshot testing |
+| `@/codex-rs/tui-integration-tests/tests/streaming.rs` | Prompt submission with timing delays, agent response streaming |
+
+**Snapshot Files:**
+
+| File | Test Coverage |
+|------|---------------|
+| `@/codex-rs/tui-integration-tests/tests/snapshots/startup__*.snap` | Various startup screen scenarios (welcome, dimensions, temp directory, trust screen) |
+| `@/codex-rs/tui-integration-tests/tests/snapshots/input_handling__*.snap` | Input handling scenarios (ctrl-c clear, typing/backspace, model changed) |
+| `@/codex-rs/tui-integration-tests/tests/snapshots/streaming__submit_input.snap` | Prompt submission and streaming response |
+
+**Snapshot Testing with Insta:**
+
+Tests use `insta::assert_snapshot!()` to capture terminal output for visual regression testing:
+```rust
+assert_snapshot!("startup_screen", normalize_for_snapshot(session.screen_contents()));
+```
+
+Snapshots stored in `@/codex-rs/tui-integration-tests/tests/snapshots/*.snap` for regression detection. Each snapshot captures the exact terminal output state at a specific test point.
+
+**Snapshot Normalization:**
+
+The `normalize_for_snapshot()` helper function exported from `@/codex-rs/tui-integration-tests/src/lib.rs` ensures stable snapshots across test runs by replacing dynamic content:
+
+Normalization rules:
+1. Temp directory paths (`/tmp/.tmpXXXXXX`) → `[TMP_DIR]` placeholder
+2. Random default prompts on lines starting with `› ` → `[DEFAULT_PROMPT]` placeholder
+ - Detects specific default prompt patterns: "Find and fix a bug", "Explain this codebase", "Write tests for", etc.
+ - Preserves user-entered prompts and UI text like "? for shortcuts"
+
+Implementation in `@/codex-rs/tui-integration-tests/src/lib.rs:456-488`:
+```rust
+pub fn normalize_for_snapshot(contents: String) -> String {
+ // Replace /tmp/.tmpXXXXXX with [TMP_DIR]
+ // Replace known default prompts with [DEFAULT_PROMPT]
+ // Preserves UI structure and user input
+}
+```
+
+This normalization allows snapshot assertions to focus on UI structure and static content rather than ephemeral runtime values. All tests import and use this function consistently: `use tui_integration_tests::{normalize_for_snapshot, ...};`
+
+**PTY Implementation Details:**
+
+- Uses `portable-pty` crate for cross-platform PTY support
+- PTY master is set to **non-blocking mode** using `fcntl(O_NONBLOCK)` on Unix systems
+- This prevents `read()` from blocking indefinitely when no data is available
+- Sets `TERM=xterm-256color` for terminal feature detection
+- NO_COLOR=1 by default for deterministic output parsing
+- Terminal size configurable (default 24x80, some tests use 40x120)
+
+**Polling Pattern:**
+
+`poll()` method performs non-blocking read from PTY master:
+- PTY file descriptor is set to non-blocking mode during session initialization
+- Reads up to 8KB buffer per poll
+- Intercepts and responds to terminal control sequences before parsing
+- Feeds processed data to VT100 parser incrementally
+- Returns immediately with `WouldBlock` error when no data is available
+- `wait_for()` loops with 50ms sleep between polls, checking timeout after each iteration
+- Timeout mechanism works correctly because `read()` never blocks indefinitely
+
+**Control Sequence Interception:**
+
+The `intercept_control_sequences()` method handles terminal queries that require responses:
+- Detects cursor position query (`ESC[6n`) in output stream from codex binary
+- Writes cursor position response (`ESC[1;1R`) back to PTY input
+- Removes control sequences from parser stream to avoid rendering artifacts
+- Enables crossterm terminal initialization without real terminal support
+
+**Mock Agent Integration:**
+
+Tests control mock agent behavior via environment variables:
+- `MOCK_AGENT_RESPONSE` - Custom response text instead of defaults
+- `MOCK_AGENT_DELAY_MS` - Simulate streaming delays
+- `MOCK_AGENT_STREAM_UNTIL_CANCEL` - Stream until Escape pressed
+
+See `@/codex-rs/mock-acp-agent/docs.md` for full list of env vars.
+
+**Binary Discovery:**
+
+`codex_binary_path()` locates the compiled binary:
+```
+test_exe: target/debug/deps/startup-abc123
+ ↓
+target/debug/deps (parent)
+ ↓
+target/debug (parent.parent)
+ ↓
+target/debug/codex (join "codex")
+```
+
+**Known Limitations:**
+
+- VT100 parser may not perfectly emulate all terminal behaviors
+- Terminal size changes after spawn not currently supported
+- Color codes disabled (NO_COLOR=1) for test determinism
+
+**Dependencies:**
+
+- `portable-pty = "0.8"` - PTY creation and management
+- `vt100 = "0.15"` - Terminal emulator/parser
+- `insta = "1"` - Snapshot testing framework
+- `anyhow = "1"` - Error handling
+- `tempfile = "3"` - Temporary directory creation for test isolation
+- `nix = "0.27"` (Unix only) - fcntl for non-blocking I/O setup
+- `libc = "0.2"` (Unix only) - Low-level fcntl operations
+
+**Debugging:**
+
+Set `DEBUG_TUI_PTY=1` environment variable to enable detailed logging of PTY operations:
+```bash
+DEBUG_TUI_PTY=1 cargo test test_name -- --nocapture
+```
+
+This shows:
+- Each `poll()` call and its duration
+- Read results (bytes read, WouldBlock, EOF)
+- `wait_for()` loop iterations and elapsed time
+- Screen contents preview at each iteration
+
+Created and maintained by Nori.
diff --git a/codex-rs/tui-integration-tests/src/keys.rs b/codex-rs/tui-integration-tests/src/keys.rs
new file mode 100644
index 000000000..d30ea96c3
--- /dev/null
+++ b/codex-rs/tui-integration-tests/src/keys.rs
@@ -0,0 +1,30 @@
+/// Key input types
+pub enum Key {
+ Enter,
+ Escape,
+ Up,
+ Down,
+ Left,
+ Right,
+ Backspace,
+ Tab,
+ Ctrl(char),
+ Char(char),
+}
+
+impl Key {
+ pub fn to_escape_sequence(&self) -> Vec {
+ match self {
+ Key::Enter => vec![b'\r'],
+ Key::Escape => vec![0x1b],
+ Key::Up => vec![0x1b, b'[', b'A'],
+ Key::Down => vec![0x1b, b'[', b'B'],
+ Key::Right => vec![0x1b, b'[', b'C'],
+ Key::Left => vec![0x1b, b'[', b'D'],
+ Key::Backspace => vec![0x7f],
+ Key::Tab => vec![b'\t'],
+ Key::Ctrl(c) => vec![(*c as u8) & 0x1f],
+ Key::Char(c) => c.to_string().into_bytes(),
+ }
+ }
+}
diff --git a/codex-rs/tui-integration-tests/src/lib.rs b/codex-rs/tui-integration-tests/src/lib.rs
new file mode 100644
index 000000000..e900e7e1b
--- /dev/null
+++ b/codex-rs/tui-integration-tests/src/lib.rs
@@ -0,0 +1,488 @@
+use anyhow::Result;
+use portable_pty::{native_pty_system, CommandBuilder, PtySize};
+use std::collections::HashMap;
+use std::io::{Read, Write};
+use std::time::{Duration, Instant};
+use vt100::Parser;
+
+#[cfg(unix)]
+/// Helper to set a file descriptor to non-blocking mode
+fn set_nonblocking(fd: std::os::unix::io::RawFd) -> Result<()> {
+ let flags = unsafe { libc::fcntl(fd, libc::F_GETFL) };
+ if flags < 0 {
+ return Err(std::io::Error::last_os_error().into());
+ }
+ let result = unsafe { libc::fcntl(fd, libc::F_SETFL, flags | libc::O_NONBLOCK) };
+ if result < 0 {
+ return Err(std::io::Error::last_os_error().into());
+ }
+ Ok(())
+}
+
+pub use keys::Key;
+mod keys;
+
+/// PTY session for driving the codex TUI
+pub struct TuiSession {
+ _master: Box,
+ reader: Box,
+ writer: Box,
+ parser: Parser,
+ _temp_dir: Option,
+}
+
+impl Drop for TuiSession {
+ fn drop(&mut self) {
+ if std::thread::panicking() {
+ eprintln!("\n=== TUI Screen State at Panic ===");
+ eprintln!("{}", self.screen_contents());
+ eprintln!("=================================\n");
+ }
+ }
+}
+
+impl TuiSession {
+ /// Spawn codex with mock-acp-agent in a temporary directory
+ pub fn spawn(rows: u16, cols: u16) -> Result {
+ let temp_dir = tempfile::tempdir()?;
+ let hello_py = temp_dir.path().join("hello.py");
+ std::fs::write(&hello_py, "print('Hello, World!')")?;
+
+ let config = SessionConfig {
+ cwd: Some(temp_dir.path().to_path_buf()),
+ ..Default::default()
+ };
+
+ Self::spawn_with_config_and_tempdir(rows, cols, config, Some(temp_dir))
+ }
+
+ /// Spawn with custom configuration
+ /// Creates a temp directory with hello.py if no cwd is specified in config
+ pub fn spawn_with_config(rows: u16, cols: u16, mut config: SessionConfig) -> Result {
+ if config.cwd.is_none() {
+ let temp_dir = tempfile::tempdir()?;
+ let hello_py = temp_dir.path().join("hello.py");
+ std::fs::write(&hello_py, "print('Hello, World!')")?;
+ config.cwd = Some(temp_dir.path().to_path_buf());
+ Self::spawn_with_config_and_tempdir(rows, cols, config, Some(temp_dir))
+ } else {
+ Self::spawn_with_config_and_tempdir(rows, cols, config, None)
+ }
+ }
+
+ /// Internal method to spawn with optional temp directory
+ fn spawn_with_config_and_tempdir(
+ rows: u16,
+ cols: u16,
+ config: SessionConfig,
+ temp_dir: Option,
+ ) -> Result {
+ let pty_system = native_pty_system();
+ let pair = pty_system.openpty(PtySize {
+ rows,
+ cols,
+ pixel_width: 0,
+ pixel_height: 0,
+ })?;
+
+ let mut cmd = CommandBuilder::new(codex_binary_path());
+
+ // Set working directory if provided
+ if let Some(cwd) = &config.cwd {
+ cmd.cwd(cwd);
+ }
+
+ // Use mock-acp-agent model
+ cmd.arg("--model");
+ cmd.arg(&config.model);
+
+ // Set approval policy if specified (also sets sandbox to allow test execution)
+ if let Some(approval) = &config.approval_policy {
+ cmd.arg("--ask-for-approval");
+ cmd.arg(approval.as_str());
+ }
+ // Also set sandbox to workspace-write to allow file operations in tests
+ if let Some(sandbox) = &config.sandbox {
+ cmd.arg("--sandbox");
+ cmd.arg(sandbox.as_str());
+ }
+
+ // Set TERM to enable terminal features
+ cmd.env("TERM", "xterm-256color");
+
+ // Pass through mock agent env vars
+ for (key, value) in config.mock_agent_env {
+ cmd.env(&key, &value);
+ }
+
+ // Disable color codes for easier parsing
+ if config.no_color {
+ cmd.env("NO_COLOR", "1");
+ }
+
+ let _child = pair.slave.spawn_command(cmd)?;
+
+ // Set master PTY to non-blocking mode before cloning reader
+ // This ensures the cloned reader FD inherits the non-blocking flag
+ #[cfg(unix)]
+ {
+ if let Some(master_fd) = pair.master.as_raw_fd() {
+ set_nonblocking(master_fd)?;
+ }
+ }
+
+ let reader = pair.master.try_clone_reader()?;
+ let writer = pair.master.take_writer()?;
+
+ Ok(Self {
+ _master: pair.master,
+ reader,
+ writer,
+ parser: Parser::new(rows, cols, 0),
+ _temp_dir: temp_dir,
+ })
+ }
+
+ /// Read any available output and update screen state
+ ///
+ /// This method attempts to read available data without blocking.
+ /// It uses a simple approach of reading with a small buffer which works
+ /// well for our polling-based test framework.
+ pub fn poll(&mut self) -> Result<()> {
+ // Create a small buffer for reading
+ let mut buf = [0u8; 8192];
+
+ if std::env::var("DEBUG_TUI_PTY").is_ok() {
+ eprintln!("[DEBUG poll] About to call read()...");
+ }
+ let read_start = Instant::now();
+
+ // The PTY reader is in non-blocking mode and will return immediately if no data is available
+ // We rely on the polling loop in wait_for() to handle timing
+ let read_result = self.reader.read(&mut buf);
+ let read_duration = read_start.elapsed();
+
+ if std::env::var("DEBUG_TUI_PTY").is_ok() {
+ eprintln!("[DEBUG poll] read() returned after {:?}", read_duration);
+ }
+
+ match read_result {
+ Ok(0) => {
+ if std::env::var("DEBUG_TUI_PTY").is_ok() {
+ eprintln!("[DEBUG poll] read() returned Ok(0) - EOF/process exited");
+ }
+ Ok(())
+ }
+ Ok(n) => {
+ if std::env::var("DEBUG_TUI_PTY").is_ok() {
+ eprintln!("[DEBUG poll] read() returned Ok({}) - {} bytes read", n, n);
+ }
+ // Intercept and respond to control sequences before parsing
+ let processed = self.intercept_control_sequences(&buf[..n])?;
+ self.parser.process(&processed);
+ Ok(())
+ }
+ Err(e) if e.kind() == std::io::ErrorKind::WouldBlock => {
+ if std::env::var("DEBUG_TUI_PTY").is_ok() {
+ eprintln!("[DEBUG poll] read() returned WouldBlock - no data available");
+ }
+ Ok(())
+ }
+ Err(e) => {
+ if std::env::var("DEBUG_TUI_PTY").is_ok() {
+ eprintln!("[DEBUG poll] read() returned Err: {}", e);
+ }
+ Err(e.into())
+ }
+ }
+ }
+
+ /// Intercept control sequences and inject responses
+ ///
+ /// Detects cursor position queries (ESC[6n) and writes responses back to the PTY
+ /// Returns filtered data with control sequences removed
+ fn intercept_control_sequences(&mut self, data: &[u8]) -> Result> {
+ let mut result = Vec::with_capacity(data.len());
+ let mut i = 0;
+
+ while i < data.len() {
+ // Detect cursor position query: ESC[6n
+ if i + 3 < data.len()
+ && data[i] == 0x1b // ESC
+ && data[i+1] == b'['
+ && data[i+2] == b'6'
+ && data[i+3] == b'n'
+ {
+ // Write response back to PTY: ESC[1;1R (cursor at row 1, col 1)
+ self.writer.write_all(b"\x1b[1;1R")?;
+ self.writer.flush()?;
+ // Skip the control sequence - don't pass it to the parser
+ i += 4;
+ } else {
+ result.push(data[i]);
+ i += 1;
+ }
+ }
+ Ok(result)
+ }
+
+ /// Wait for predicate with timeout
+ pub fn wait_for(&mut self, pred: F, timeout: Duration) -> Result<(), String>
+ where
+ F: Fn(&str) -> bool,
+ {
+ let debug = std::env::var("DEBUG_TUI_PTY").is_ok();
+ if debug {
+ eprintln!(
+ "[DEBUG wait_for] Starting wait_for with timeout {:?}",
+ timeout
+ );
+ }
+ let start = Instant::now();
+ let mut iteration = 0;
+
+ loop {
+ iteration += 1;
+ let elapsed = start.elapsed();
+ if debug {
+ eprintln!(
+ "[DEBUG wait_for] Iteration {}, elapsed: {:?}",
+ iteration, elapsed
+ );
+ eprintln!("[DEBUG wait_for] Calling poll()...");
+ }
+
+ self.poll().map_err(|e| e.to_string())?;
+
+ if debug {
+ eprintln!("[DEBUG wait_for] poll() completed");
+ }
+
+ let contents = self.screen_contents();
+ if debug {
+ eprintln!(
+ "[DEBUG wait_for] Screen contents length: {} bytes",
+ contents.len()
+ );
+ eprintln!(
+ "[DEBUG wait_for] Screen contents preview: {:?}",
+ &contents.chars().take(100).collect::()
+ );
+ }
+
+ if pred(&contents) {
+ if debug {
+ eprintln!(
+ "[DEBUG wait_for] Predicate matched! Success after {:?}",
+ elapsed
+ );
+ }
+ return Ok(());
+ }
+
+ if debug {
+ eprintln!("[DEBUG wait_for] Predicate did not match");
+ }
+
+ if start.elapsed() > timeout {
+ if debug {
+ eprintln!(
+ "[DEBUG wait_for] TIMEOUT REACHED after {:?}",
+ start.elapsed()
+ );
+ }
+ return Err(format!(
+ "Timeout waiting for condition.\nScreen contents:\n{}",
+ contents
+ ));
+ }
+
+ if debug {
+ eprintln!("[DEBUG wait_for] Sleeping 50ms before next iteration");
+ }
+ std::thread::sleep(Duration::from_millis(50));
+ }
+ }
+
+ /// Wait for specific text to appear
+ pub fn wait_for_text(&mut self, needle: &str, timeout: Duration) -> Result<(), String> {
+ self.wait_for(|s| s.contains(needle), timeout)
+ }
+
+ /// Get current screen contents as string
+ pub fn screen_contents(&self) -> String {
+ self.parser.screen().contents()
+ }
+
+ /// Type a string
+ pub fn send_str(&mut self, s: &str) -> std::io::Result<()> {
+ self.writer.write_all(s.as_bytes())?;
+ self.writer.flush()
+ }
+
+ /// Send a key event
+ pub fn send_key(&mut self, key: Key) -> std::io::Result<()> {
+ self.writer.write_all(&key.to_escape_sequence())?;
+ self.writer.flush()
+ }
+}
+
+/// Sandbox policy for codex session
+#[derive(Debug, Clone, Copy)]
+pub enum Sandbox {
+ // [possible values: read-only, workspace-write, danger-full-access]
+ ReadOnly,
+ WorkspaceWrite,
+ DangerFullAccess,
+}
+
+impl Sandbox {
+ fn as_str(&self) -> &'static str {
+ match self {
+ Sandbox::ReadOnly => "read-only",
+ Sandbox::WorkspaceWrite => "workspace-write",
+ Sandbox::DangerFullAccess => "danger-full-access",
+ }
+ }
+}
+
+/// Approval policy for codex session
+#[derive(Debug, Clone, Copy)]
+pub enum ApprovalPolicy {
+ /// Only run trusted commands without approval
+ Untrusted,
+ /// Run all commands, ask for approval on failure
+ OnFailure,
+ /// Model decides when to ask
+ OnRequest,
+ /// Never ask for approval
+ Never,
+}
+
+impl ApprovalPolicy {
+ fn as_str(&self) -> &'static str {
+ match self {
+ ApprovalPolicy::Untrusted => "untrusted",
+ ApprovalPolicy::OnFailure => "on-failure",
+ ApprovalPolicy::OnRequest => "on-request",
+ ApprovalPolicy::Never => "never",
+ }
+ }
+}
+
+/// Configuration for spawning a test session
+pub struct SessionConfig {
+ pub model: String,
+ pub mock_agent_env: HashMap,
+ pub no_color: bool,
+ pub approval_policy: Option,
+ pub sandbox: Option,
+ pub cwd: Option,
+}
+
+impl Default for SessionConfig {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl SessionConfig {
+ pub fn new() -> Self {
+ Self {
+ model: "mock-acp-agent".to_string(),
+ mock_agent_env: HashMap::new(),
+ no_color: true,
+ approval_policy: Some(ApprovalPolicy::OnFailure),
+ // [possible values: read-only, workspace-write, danger-full-access]
+ sandbox: Some(Sandbox::WorkspaceWrite),
+ cwd: None,
+ }
+ }
+
+ pub fn with_mock_response(mut self, response: impl Into) -> Self {
+ self.mock_agent_env
+ .insert("MOCK_AGENT_RESPONSE".to_string(), response.into());
+ self
+ }
+
+ pub fn with_stream_until_cancel(mut self) -> Self {
+ self.mock_agent_env.insert(
+ "MOCK_AGENT_STREAM_UNTIL_CANCEL".to_string(),
+ "1".to_string(),
+ );
+ self
+ }
+
+ pub fn with_agent_env(mut self, key: impl Into, value: impl Into) -> Self {
+ self.mock_agent_env.insert(key.into(), value.into());
+ self
+ }
+
+ pub fn with_approval_policy(mut self, policy: ApprovalPolicy) -> Self {
+ self.approval_policy = Some(policy);
+ self
+ }
+
+ pub fn without_approval_policy(mut self) -> Self {
+ self.approval_policy = None;
+ self
+ }
+
+ pub fn with_sandbox(mut self, sandbox: Sandbox) -> Self {
+ self.sandbox = Some(sandbox);
+ self
+ }
+
+ pub fn without_sandbox(mut self) -> Self {
+ self.sandbox = None;
+ self
+ }
+}
+
+/// Get path to codex binary
+fn codex_binary_path() -> String {
+ let test_exe = std::env::current_exe().expect("Failed to get current exe path");
+ test_exe
+ .parent() // deps
+ .and_then(|p| p.parent()) // debug or release
+ .expect("Failed to get target directory")
+ .join("codex")
+ .to_string_lossy()
+ .into_owned()
+}
+
+pub const TIMEOUT: Duration = Duration::from_secs(5);
+
+/// Normalize dynamic content in screen output for snapshot testing
+pub fn normalize_for_snapshot(contents: String) -> String {
+ let mut normalized = contents;
+
+ // Replace /tmp/.tmpXXXXXX with placeholder
+ if let Some(start) = normalized.find("/tmp/.tmp") {
+ if let Some(end) = normalized[start..].find(char::is_whitespace) {
+ normalized.replace_range(start..start + end, "[TMP_DIR]");
+ }
+ }
+
+ // Replace dynamic prompt text on lines starting with ›
+ let lines: Vec = normalized
+ .lines()
+ .map(|line| {
+ if line.trim_start().starts_with("› ")
+ && (line.trim_start().starts_with("› Find and fix a bug")
+ || line.trim_start().starts_with("› Explain this codebase")
+ || line.trim_start().starts_with("› Write tests for")
+ || line.trim_start().starts_with("› Improve documentation")
+ || line.trim_start().starts_with("› Summarize recent commits")
+ || line.trim_start().starts_with("› Implement {feature}")
+ || line.contains("@filename"))
+ {
+ "› [DEFAULT_PROMPT]".to_string()
+ } else {
+ line.to_string()
+ }
+ })
+ .collect();
+
+ lines.join("\n")
+}
diff --git a/codex-rs/tui-integration-tests/tests/input_handling.rs b/codex-rs/tui-integration-tests/tests/input_handling.rs
new file mode 100644
index 000000000..a005a7979
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/input_handling.rs
@@ -0,0 +1,69 @@
+use insta::assert_snapshot;
+use std::time::Duration;
+use tui_integration_tests::{normalize_for_snapshot, Key, TuiSession, TIMEOUT};
+
+#[test]
+fn test_ctrl_c_clears_input() {
+ let mut session = TuiSession::spawn(24, 80).unwrap();
+ session.wait_for_text("To get started", TIMEOUT).unwrap();
+
+ // Type some text
+ session.send_str("draft message").unwrap();
+ session.wait_for_text("draft message", TIMEOUT).unwrap();
+
+ // Ctrl-C should clear
+ session.send_key(Key::Ctrl('c')).unwrap();
+
+ // Verify cleared
+ session
+ .wait_for(|s| !s.contains("draft message"), TIMEOUT)
+ .expect("Input was not cleared");
+
+ assert_snapshot!(
+ "ctrl_c_clears",
+ normalize_for_snapshot(session.screen_contents())
+ );
+}
+
+#[test]
+fn test_backspace() {
+ let mut session = TuiSession::spawn(24, 80).unwrap();
+ session.wait_for_text("To get started", TIMEOUT).unwrap();
+
+ session.send_str("Hello").unwrap();
+ session.wait_for_text("Hello", TIMEOUT).unwrap();
+
+ // Backspace twice
+ session.send_key(Key::Backspace).unwrap();
+ session.send_key(Key::Backspace).unwrap();
+
+ // Should have "Hel" remaining
+ session.wait_for_text("Hel", TIMEOUT).unwrap();
+ session.wait_for(|s| !s.contains("Hello"), TIMEOUT).unwrap();
+
+ assert_snapshot!(
+ "typing_and_backspace",
+ normalize_for_snapshot(session.screen_contents())
+ );
+}
+
+#[test]
+fn test_arrows() {
+ let mut session = TuiSession::spawn(40, 80).unwrap();
+ session.wait_for_text("›", TIMEOUT).unwrap();
+
+ session.send_str("/model").unwrap();
+ session.wait_for_text("/model", TIMEOUT).unwrap();
+
+ session.send_key(Key::Enter).unwrap();
+ std::thread::sleep(Duration::from_millis(100));
+ session.send_key(Key::Down).unwrap();
+ std::thread::sleep(Duration::from_millis(100));
+ session.send_key(Key::Down).unwrap();
+ std::thread::sleep(Duration::from_millis(100));
+
+ assert_snapshot!(
+ "model_changed",
+ normalize_for_snapshot(session.screen_contents())
+ );
+}
diff --git a/codex-rs/tui-integration-tests/tests/prompt_flow.rs b/codex-rs/tui-integration-tests/tests/prompt_flow.rs
new file mode 100644
index 000000000..b6bf64212
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/prompt_flow.rs
@@ -0,0 +1,70 @@
+use insta::assert_snapshot;
+use std::time::Duration;
+use tui_integration_tests::{Key, SessionConfig, TuiSession};
+
+const TIMEOUT: Duration = Duration::from_secs(10);
+
+#[test]
+fn test_submit_prompt_default_response() {
+ let mut session = TuiSession::spawn(24, 80).expect("Failed to spawn codex");
+
+ session.wait_for_text("To get started", TIMEOUT).unwrap();
+
+ // Type prompt
+ session.send_str("Hello").unwrap();
+ std::thread::sleep(Duration::from_millis(100));
+ session.wait_for_text("Hello", TIMEOUT).unwrap();
+
+ // Submit
+ session.send_key(Key::Enter).unwrap();
+ std::thread::sleep(Duration::from_millis(100));
+
+ // Wait for default mock responses
+ session
+ .wait_for_text("Test message 1", TIMEOUT)
+ .expect("Did not receive mock response");
+ session
+ .wait_for_text("Test message 2", TIMEOUT)
+ .expect("Did not receive second mock response");
+
+ assert_snapshot!("prompt_submitted", session.screen_contents());
+}
+
+#[test]
+fn test_submit_prompt_custom_response() {
+ let config = SessionConfig::new()
+ .with_mock_response("This is a custom test response from the mock agent.");
+
+ let mut session = TuiSession::spawn_with_config(24, 80, config).expect("Failed to spawn codex");
+
+ session.wait_for_text("To get started", TIMEOUT).unwrap();
+
+ session.send_str("test prompt").unwrap();
+ std::thread::sleep(Duration::from_millis(100));
+ session.send_key(Key::Enter).unwrap();
+ std::thread::sleep(Duration::from_millis(100));
+
+ session
+ .wait_for_text("This is a custom test response", TIMEOUT)
+ .expect("Did not receive custom response");
+
+ assert_snapshot!("custom_response", session.screen_contents());
+}
+
+#[test]
+fn test_multiline_input() {
+ let mut session = TuiSession::spawn(24, 80).unwrap();
+ session.wait_for_text("To get started", TIMEOUT).unwrap();
+
+ // Type multiline prompt
+ session.send_str("Line 1").unwrap();
+ session.send_key(Key::Enter).unwrap();
+ session.send_str("Line 2").unwrap();
+ session.send_key(Key::Enter).unwrap();
+ session.send_str("Line 3").unwrap();
+
+ // Verify all lines visible
+ session.wait_for_text("Line 1", TIMEOUT).unwrap();
+ session.wait_for_text("Line 2", TIMEOUT).unwrap();
+ session.wait_for_text("Line 3", TIMEOUT).unwrap();
+}
diff --git a/codex-rs/tui-integration-tests/tests/snapshots/cancellation__submit_input.snap b/codex-rs/tui-integration-tests/tests/snapshots/cancellation__submit_input.snap
new file mode 100644
index 000000000..6da728e17
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/snapshots/cancellation__submit_input.snap
@@ -0,0 +1,28 @@
+---
+source: tui-integration-tests/tests/cancellation.rs
+expression: normalize_for_snapshot(session.screen_contents())
+---
+│ directory: [TMP_DIR] │
+╰──────────────────────────────────────────────────╯
+
+ To get started, describe a task or try one of these commands:
+
+ /init - create an AGENTS.md file with instructions for Codex
+ /status - show current session configuration
+ /approvals - choose what Codex can do without approval
+ /model - choose what model and reasoning effort to use
+ /review - review any changes and find issues
+
+
+› testing!!!
+
+
+■ Missing environment variable: `GOOGLE_API_KEY`. Get your API key from https://
+aistudio.google.com/app/apikey
+
+• Snapshots disabled: current directory is not a Git repository. (0s • esc to in
+
+
+› [DEFAULT_PROMPT]
+
+ 100% context left · ? for shortcuts
diff --git a/codex-rs/tui-integration-tests/tests/snapshots/input_handling__ctrl_c_clears.snap b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__ctrl_c_clears.snap
new file mode 100644
index 000000000..59df3bd43
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__ctrl_c_clears.snap
@@ -0,0 +1,23 @@
+---
+source: tui-integration-tests/tests/input_handling.rs
+expression: normalize_for_snapshot(session.screen_contents())
+---
+╭──────────────────────────────────────────────────╮
+│ >_ OpenAI Codex (v0.0.0) │
+│ │
+│ model: mock-acp-agent low /model to change │
+│ directory: [TMP_DIR] │
+╰──────────────────────────────────────────────────╯
+
+ To get started, describe a task or try one of these commands:
+
+ /init - create an AGENTS.md file with instructions for Codex
+ /status - show current session configuration
+ /approvals - choose what Codex can do without approval
+ /model - choose what model and reasoning effort to use
+ /review - review any changes and find issues
+
+
+› [DEFAULT_PROMPT]
+
+ ctrl + c again to quit
diff --git a/codex-rs/tui-integration-tests/tests/snapshots/input_handling__model_changed.snap b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__model_changed.snap
new file mode 100644
index 000000000..17016c0de
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__model_changed.snap
@@ -0,0 +1,7 @@
+---
+source: tui-integration-tests/tests/input_handling.rs
+expression: normalize_for_snapshot(session.screen_contents())
+---
+› /model
+
+ /model choose what model and reasoning effort to use
diff --git a/codex-rs/tui-integration-tests/tests/snapshots/input_handling__typing_and_backspace.snap b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__typing_and_backspace.snap
new file mode 100644
index 000000000..0a28bbe16
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__typing_and_backspace.snap
@@ -0,0 +1,23 @@
+---
+source: tui-integration-tests/tests/input_handling.rs
+expression: normalize_for_snapshot(session.screen_contents())
+---
+╭──────────────────────────────────────────────────╮
+│ >_ OpenAI Codex (v0.0.0) │
+│ │
+│ model: mock-acp-agent low /model to change │
+│ directory: [TMP_DIR] │
+╰──────────────────────────────────────────────────╯
+
+ To get started, describe a task or try one of these commands:
+
+ /init - create an AGENTS.md file with instructions for Codex
+ /status - show current session configuration
+ /approvals - choose what Codex can do without approval
+ /model - choose what model and reasoning effort to use
+ /review - review any changes and find issues
+
+
+› Hel
+
+ 100% context left
diff --git a/codex-rs/tui-integration-tests/tests/snapshots/startup__runs_in_temp_directory.snap b/codex-rs/tui-integration-tests/tests/snapshots/startup__runs_in_temp_directory.snap
new file mode 100644
index 000000000..8fc3c5312
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/snapshots/startup__runs_in_temp_directory.snap
@@ -0,0 +1,29 @@
+---
+source: tui-integration-tests/tests/startup.rs
+assertion_line: 108
+expression: normalize_for_snapshot(session.screen_contents())
+---
+
+ _._:=++==+,_
+ _=,/*\+/+\=||=_ _"+_
+ ,|*|+**"^` `"*`"~=~||+
+ ;*_\*',,_ /*|;|,
+ \^;/'^|\`\\ ".|\\,
+ ~* +` |*/;||, '.\||,
+ +^"-* '\|*/"|_ ! |/|
+ ||_|` ,//|;|* "`|
+ |=~'` ;||^\|".~++++++_+, =" |
+ _~;* _;+` /* |"|___.:,,,|/,/,|
+ \^_"^ ^\,./` `^*''* ^*"/,;_/
+ *^, ", ` ,'/*_|
+ ^\,`\+_ _=_+|_+"
+ ^*,\_!*+:;=;;.=*+_,|*
+ `*"*|~~___,_;+*"
+
+
+ Welcome to Codex, OpenAI's command-line coding agent
+
+> You are running Codex in [TMP_DIR]
+
+ Since this folder is not version controlled, we recommend requiring approval
+ of all edits and commands.
diff --git a/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_screen.snap b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_screen.snap
new file mode 100644
index 000000000..b27038950
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_screen.snap
@@ -0,0 +1,28 @@
+---
+source: tui-integration-tests/tests/startup.rs
+expression: session.screen_contents()
+---
+
+ _._:=++==+,_
+ _=,/*\+/+\=||=_ _"+_
+ ,|*|+**"^` `"*`"~=~||+
+ ;*_\*',,_ /*|;|,
+ \^;/'^|\`\\ ".|\\,
+ ~* +` |*/;||, '.\||,
+ +^"-* '\|*/"|_ ! |/|
+ ||_|` ,//|;|* "`|
+ |=~'` ;||^\|".~++++++_+, =" |
+ _~;* _;+` /* |"|___.:,,,|/,/,|
+ \^_"^ ^\,./` `^*''* ^*"/,;_/
+ *^, ", ` ,'/*_|
+ ^\,`\+_ _=_+|_+"
+ ^*,\_!*+:;=;;.=*+_,|*
+ `*"*|~~___,_;+*"
+
+
+ Welcome to Codex, OpenAI's command-line coding agent
+
+> You are running Codex in [TMP_DIR]
+
+ Since this folder is not version controlled, we recommend requiring approval
+ of all edits and commands.
diff --git a/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_shows_welcome.snap b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_shows_welcome.snap
new file mode 100644
index 000000000..dda65f12e
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_shows_welcome.snap
@@ -0,0 +1,29 @@
+---
+source: tui-integration-tests/tests/startup.rs
+assertion_line: 52
+expression: normalize_for_snapshot(session.screen_contents())
+---
+
+ _._:=++==+,_
+ _=,/*\+/+\=||=_ _"+_
+ ,|*|+**"^` `"*`"~=~||+
+ ;*_\*',,_ /*|;|,
+ \^;/'^|\`\\ ".|\\,
+ ~* +` |*/;||, '.\||,
+ +^"-* '\|*/"|_ ! |/|
+ ||_|` ,//|;|* "`|
+ |=~'` ;||^\|".~++++++_+, =" |
+ _~;* _;+` /* |"|___.:,,,|/,/,|
+ \^_"^ ^\,./` `^*''* ^*"/,;_/
+ *^, ", ` ,'/*_|
+ ^\,`\+_ _=_+|_+"
+ ^*,\_!*+:;=;;.=*+_,|*
+ `*"*|~~___,_;+*"
+
+
+ Welcome to Codex, OpenAI's command-line coding agent
+
+> You are running Codex in [TMP_DIR]
+
+ Since this folder is not version controlled, we recommend requiring approval
+ of all edits and commands.
diff --git a/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_welcome_dimensions_40x120.snap b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_welcome_dimensions_40x120.snap
new file mode 100644
index 000000000..105059b62
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_welcome_dimensions_40x120.snap
@@ -0,0 +1,32 @@
+---
+source: tui-integration-tests/tests/startup.rs
+expression: normalize_for_snapshot(session.screen_contents())
+---
+
+ _._:=++==+,_
+ _=,/*\+/+\=||=_ _"+_
+ ,|*|+**"^` `"*`"~=~||+
+ ;*_\*',,_ /*|;|,
+ \^;/'^|\`\\ ".|\\,
+ ~* +` |*/;||, '.\||,
+ +^"-* '\|*/"|_ ! |/|
+ ||_|` ,//|;|* "`|
+ |=~'` ;||^\|".~++++++_+, =" |
+ _~;* _;+` /* |"|___.:,,,|/,/,|
+ \^_"^ ^\,./` `^*''* ^*"/,;_/
+ *^, ", ` ,'/*_|
+ ^\,`\+_ _=_+|_+"
+ ^*,\_!*+:;=;;.=*+_,|*
+ `*"*|~~___,_;+*"
+
+
+ Welcome to Codex, OpenAI's command-line coding agent
+
+> You are running Codex in [TMP_DIR]
+
+ Since this folder is not version controlled, we recommend requiring approval of all edits and commands.
+
+ 1. Allow Codex to work in this folder without asking for approval
+› 2. Require approval of edits and commands
+
+ Press enter to continue
diff --git a/codex-rs/tui-integration-tests/tests/snapshots/startup__trust_screen_skipped.snap b/codex-rs/tui-integration-tests/tests/snapshots/startup__trust_screen_skipped.snap
new file mode 100644
index 000000000..cd7301ee1
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/snapshots/startup__trust_screen_skipped.snap
@@ -0,0 +1,8 @@
+---
+source: tui-integration-tests/tests/startup.rs
+assertion_line: 135
+expression: normalize_for_snapshot(session.screen_contents())
+---
+› [DEFAULT_PROMPT]
+
+ 100% context left · ? for shortcuts
diff --git a/codex-rs/tui-integration-tests/tests/snapshots/streaming__submit_input.snap b/codex-rs/tui-integration-tests/tests/snapshots/streaming__submit_input.snap
new file mode 100644
index 000000000..0ef557d94
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/snapshots/streaming__submit_input.snap
@@ -0,0 +1,28 @@
+---
+source: tui-integration-tests/tests/streaming.rs
+expression: normalize_for_snapshot(session.screen_contents())
+---
+│ directory: [TMP_DIR] │
+╰──────────────────────────────────────────────────╯
+
+ To get started, describe a task or try one of these commands:
+
+ /init - create an AGENTS.md file with instructions for Codex
+ /status - show current session configuration
+ /approvals - choose what Codex can do without approval
+ /model - choose what model and reasoning effort to use
+ /review - review any changes and find issues
+
+
+› testing!!!
+
+
+■ Missing environment variable: `GOOGLE_API_KEY`. Get your API key from https://
+aistudio.google.com/app/apikey
+
+• Snapshots disabled: current directory is not a Git repository. (0s • esc to in
+
+
+› [DEFAULT_PROMPT]
+
+ 100% context left · ? for shortcuts
diff --git a/codex-rs/tui-integration-tests/tests/startup.rs b/codex-rs/tui-integration-tests/tests/startup.rs
new file mode 100644
index 000000000..e17f3539e
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/startup.rs
@@ -0,0 +1,159 @@
+use insta::assert_snapshot;
+use std::time::{Duration, Instant};
+use tui_integration_tests::{normalize_for_snapshot, SessionConfig, TuiSession, TIMEOUT};
+
+#[test]
+fn test_startup_shows_welcome() {
+ let mut session = TuiSession::spawn_with_config(
+ 24,
+ 80,
+ SessionConfig::default()
+ // Don't include the values that would bypass welcome
+ .without_approval_policy()
+ .without_sandbox(),
+ )
+ .expect("Failed to spawn codex");
+
+ session
+ .wait_for_text("Welcome", TIMEOUT)
+ .expect("Prompt did not appear");
+
+ let contents = session.screen_contents();
+ assert!(contents.contains("Welcome to Codex"));
+ assert!(contents.contains("/tmp/"));
+ assert_snapshot!(
+ "startup_shows_welcome",
+ normalize_for_snapshot(session.screen_contents())
+ );
+}
+
+#[test]
+fn test_startup_welcome_with_dimensions() {
+ let mut session = TuiSession::spawn_with_config(
+ 40,
+ 120,
+ SessionConfig::default()
+ // Don't include the values that would bypass welcome
+ .without_approval_policy()
+ .without_sandbox(),
+ )
+ .expect("Failed to spawn codex");
+
+ session
+ .wait_for_text("Welcome", TIMEOUT)
+ .expect("Prompt did not appear");
+
+ // Verify terminal size is respected
+ let contents = session.screen_contents();
+ assert!(contents.lines().count() <= 40);
+ assert_snapshot!(
+ "startup_welcome_dimensions_40x120",
+ normalize_for_snapshot(session.screen_contents())
+ );
+}
+
+#[test]
+fn test_runs_in_temp_directory_by_default() {
+ let mut session = TuiSession::spawn_with_config(
+ 24,
+ 80,
+ SessionConfig::default()
+ // Don't include the values that would bypass welcome
+ .without_approval_policy()
+ .without_sandbox(),
+ )
+ .expect("Failed to spawn codex");
+
+ session
+ .wait_for_text("Welcome", TIMEOUT)
+ .expect("Prompt did not appear");
+
+ let contents = session.screen_contents();
+
+ // Should run in /tmp/, not home directory
+ assert!(
+ contents.contains("/tmp/"),
+ "Expected session to run in /tmp/, but got: {}",
+ contents
+ );
+
+ // Should NOT run in home directory
+ assert!(
+ !contents.contains("/home/"),
+ "Session should not run in home directory, but got: {}",
+ contents
+ );
+ assert_snapshot!(
+ "runs_in_temp_directory",
+ normalize_for_snapshot(session.screen_contents())
+ );
+}
+
+#[test]
+fn test_trust_screen_is_skipped_with_default_config() {
+ let mut session = TuiSession::spawn(24, 80).expect("Failed to spawn codex");
+
+ // Wait for the prompt to appear (indicated by the chevron character)
+ session
+ .wait_for_text("›", TIMEOUT)
+ .expect("Prompt did not appear");
+
+ let contents = session.screen_contents();
+
+ // Should NOT show the trust directory approval screen
+ assert!(
+ !contents.contains("Since this folder is not version controlled"),
+ "Trust screen should be skipped when approval policy is set, but got: {}",
+ contents
+ );
+
+ // Should show the main prompt directly (skipping onboarding)
+ assert!(
+ contents.contains("›") && contents.contains("context left"),
+ "Should show main prompt with context indicator, got: {}",
+ contents
+ );
+ assert_snapshot!(
+ "trust_screen_skipped",
+ normalize_for_snapshot(session.screen_contents())
+ );
+}
+
+#[test]
+fn test_poll_does_not_block_when_no_data() {
+ // RED phase: This test verifies that poll() returns quickly when no data is available,
+ // proving the PTY reader is in non-blocking mode
+ let mut session = TuiSession::spawn(24, 80).expect("Failed to spawn codex");
+
+ // Wait for initial startup to complete
+ session
+ .wait_for_text("›", TIMEOUT)
+ .expect("Initial startup failed");
+
+ // Wait for screen to stabilize - keep polling until contents don't change
+ let mut prev_contents = String::new();
+ for _ in 0..20 {
+ session.poll().expect("Poll failed during stabilization");
+ std::thread::sleep(Duration::from_millis(100));
+ let contents = session.screen_contents();
+ if contents == prev_contents {
+ // No change for 100ms, screen is stable
+ break;
+ }
+ prev_contents = contents;
+ }
+
+ // Now codex is truly waiting for input, no more data will come
+ // Poll should return immediately without blocking
+ let start = Instant::now();
+ session.poll().expect("Poll failed");
+ let elapsed = start.elapsed();
+
+ // Assert poll() completed in < 50ms (proves non-blocking)
+ // If blocking, would wait indefinitely and this would timeout
+ assert!(
+ elapsed < Duration::from_millis(50),
+ "poll() took {:?}, expected < 50ms. Reader appears to be blocking!",
+ elapsed
+ );
+}
diff --git a/codex-rs/tui-integration-tests/tests/streaming.rs b/codex-rs/tui-integration-tests/tests/streaming.rs
new file mode 100644
index 000000000..cc3162561
--- /dev/null
+++ b/codex-rs/tui-integration-tests/tests/streaming.rs
@@ -0,0 +1,61 @@
+use insta::assert_snapshot;
+use std::time::Duration;
+use tui_integration_tests::{normalize_for_snapshot, Key, SessionConfig, TuiSession, TIMEOUT};
+
+#[test]
+fn test_submit_text() {
+ let config = SessionConfig::new().with_stream_until_cancel();
+
+ let mut session = TuiSession::spawn_with_config(24, 80, config).unwrap();
+ session.wait_for_text("To get started", TIMEOUT).unwrap();
+
+ // Submit prompt
+ session.send_str("testing!!!").unwrap();
+ session.wait_for_text("testing!!!", TIMEOUT).unwrap();
+ std::thread::sleep(Duration::from_millis(100));
+ session.send_key(Key::Enter).unwrap();
+ std::thread::sleep(Duration::from_millis(100));
+ session.wait_for_text("GOOGLE_API_KEY", TIMEOUT).unwrap();
+
+ assert_snapshot!(
+ "submit_input",
+ normalize_for_snapshot(session.screen_contents())
+ );
+}
+
+// #[test]
+// fn test_escape_cancels_streaming() {
+// let config = SessionConfig::new().with_stream_until_cancel();
+//
+// let mut session = TuiSession::spawn_with_config(24, 80, config).unwrap();
+// session.wait_for_text("To get started", TIMEOUT).unwrap();
+//
+// // Submit prompt
+// session.send_str("testing!!!").unwrap();
+// session.wait_for_text("testing!!!", TIMEOUT).unwrap();
+// std::thread::sleep(Duration::from_millis(100));
+// session.send_key(Key::Enter).unwrap();
+// std::thread::sleep(Duration::from_millis(100));
+//
+// // Wait for streaming to start
+// session
+// .wait_for_text("Streaming...", TIMEOUT)
+// .expect("Streaming did not start");
+//
+// // Press Escape to cancel
+// session.send_key(Key::Escape).unwrap();
+//
+// // Verify cancellation completed
+// // (exact behavior depends on TUI implementation)
+// session
+// .wait_for(
+// |s| s.contains("Cancelled") || s.contains("Stopped"),
+// TIMEOUT,
+// )
+// .ok(); // May not show explicit message
+//
+// assert_snapshot!(
+// "cancelled_stream",
+// normalize_for_snapshot(session.screen_contents())
+// )
+// }
diff --git a/codex-rs/tui/docs.md b/codex-rs/tui/docs.md
index 38ecfb6f6..59c2c6395 100644
--- a/codex-rs/tui/docs.md
+++ b/codex-rs/tui/docs.md
@@ -99,6 +99,8 @@ The `color.rs` and `terminal_palette.rs` modules handle terminal color detection
- `test_backend.rs`: Test terminal backend for snapshot testing
- Uses `insta` for snapshot tests of rendered output
- `AGENTS.md` documents testing conventions
+- Black-box integration tests in `@/codex-rs/tui-integration-tests` test full TUI via PTY
+- Integration tests spawn real `codex` binary with `mock-acp-agent` backend
**Configuration Flow:**
diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs
index 94b7c5dd6..4bd4b4aaa 100644
--- a/codex-rs/tui/src/app.rs
+++ b/codex-rs/tui/src/app.rs
@@ -21,10 +21,10 @@ use codex_common::model_presets::ModelUpgrade;
use codex_common::model_presets::all_model_presets;
use codex_core::AuthManager;
use codex_core::ConversationManager;
+use codex_core::GEMINI_ACP_PROVIDER_ID;
use codex_core::config::Config;
use codex_core::config::edit::ConfigEditsBuilder;
use codex_core::model_family::find_family_for_model;
-use codex_core::GEMINI_ACP_PROVIDER_ID;
use codex_core::protocol::FinalOutput;
use codex_core::protocol::SessionSource;
use codex_core::protocol::TokenUsage;