diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 000000000..28f2963e9 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,3 @@ +{ + "$schema": "https://json.schemastore.org/claude-code-settings.json" +} \ No newline at end of file diff --git a/.claude/skills/pty-tui-testing/SKILL.md b/.claude/skills/pty-tui-testing/SKILL.md new file mode 100644 index 000000000..6ec80feb6 --- /dev/null +++ b/.claude/skills/pty-tui-testing/SKILL.md @@ -0,0 +1,345 @@ +--- +name: pty-tui-testing +description: Use this skill to build features that require testing the TUI with the PTY-based integration test harness. +--- + + +*CRITICAL* Add the following steps to your Todo list using TodoWrite: + +From this point on, ignore any existing tests until you have a working example validated through a new PTY-based integration test. +1. Identify the TUI behavior to test (startup, input handling, agent responses, screen updates, etc.) +2. Write a new Rust integration test in `codex-rs/tui-integration-tests/tests/` + - Use `TuiSession::spawn()` or `TuiSession::spawn_with_config()` to launch codex in a PTY + - Configure terminal dimensions (rows, cols) appropriate for the test + - Configure `SessionConfig` with mock agent behavior if needed +3. Follow these steps in a loop until the test passes: + - Add debug logging using `DEBUG_TUI_PTY=1` environment variable + - Run the specific test: `cargo test test_name -- --nocapture` + - Examine the PTY polling behavior, screen contents, and timing + - Update the test expectations or fix the TUI code +If you get stuck: did you add DEBUG_TUI_PTY=1 logging? +4. Review snapshots if using `insta::assert_snapshot!()` and accept with `cargo insta review` +5. Run all TUI integration tests to ensure nothing broke: `cargo test -p tui-integration-tests` + + +# PTY-Based TUI Integration Testing + +To test the Codex terminal user interface, write Rust integration tests using the `tui-integration-tests` harness. This framework spawns the real `codex` binary in a pseudo-terminal (PTY) and validates terminal output through screen content assertions. + +## Core Workflow + +**Test Structure:** + +All tests follow this pattern: +1. Spawn a TUI session in a PTY with configured dimensions +2. Wait for expected screen content to appear +3. Send keyboard input to simulate user interactions +4. Poll and validate screen state changes +5. Optionally capture snapshots for regression testing + +**TUI Session Lifecycle:** + +```rust +use tui_integration_tests::{TuiSession, SessionConfig, Key}; +use std::time::Duration; + +const TIMEOUT: Duration = Duration::from_secs(5); + +#[test] +fn test_tui_behavior() { + // Spawn codex in a 24x80 terminal with default config + let mut session = TuiSession::spawn(24, 80) + .expect("Failed to spawn codex"); + + // Wait for welcome message to appear + session.wait_for_text("To get started", TIMEOUT) + .expect("Welcome message did not appear"); + + // Simulate user typing + session.send_str("Hello").unwrap(); + + // Submit with Enter key + session.send_key(Key::Enter).unwrap(); + + // Wait for agent response + session.wait_for_text("Test message", TIMEOUT) + .expect("Agent response did not appear"); + + // Assert final screen state + let contents = session.screen_contents(); + assert!(contents.contains("expected text")); +} +``` + +**Session Configuration:** + +Use `SessionConfig` to control test environment: + +```rust +use tui_integration_tests::{TuiSession, SessionConfig, ApprovalPolicy}; + +let config = SessionConfig::new() + .with_mock_response("Custom agent response") + .with_approval_policy(ApprovalPolicy::Never) + .with_agent_env("MOCK_AGENT_DELAY_MS", "100"); + +let mut session = TuiSession::spawn_with_config(40, 120, config) + .expect("Failed to spawn codex"); +``` + +## Key Testing Patterns + +**Pattern 1: Startup and Initialization** + +Test that the TUI displays correct welcome screens and skips onboarding appropriately: + +```rust +#[test] +fn test_startup_shows_welcome() { + let mut session = TuiSession::spawn_with_config( + 24, 80, + SessionConfig::default() + .without_approval_policy() + .without_sandbox(), + ).expect("Failed to spawn codex"); + + session.wait_for_text("Welcome", TIMEOUT) + .expect("Welcome did not appear"); + + let contents = session.screen_contents(); + assert!(contents.contains("Welcome to Codex")); + assert!(contents.contains("/tmp/")); +} +``` + +**Pattern 2: Input Handling and Screen Updates** + +Test keyboard input, character echo, and text editing: + +```rust +#[test] +fn test_typing_and_backspace() { + let mut session = TuiSession::spawn(24, 80).unwrap(); + session.wait_for_text("›", TIMEOUT).unwrap(); + + // Type text + session.send_str("Hello World").unwrap(); + session.wait_for_text("Hello World", TIMEOUT).unwrap(); + + // Backspace to remove "World" + for _ in 0..5 { + session.send_key(Key::Backspace).unwrap(); + } + std::thread::sleep(Duration::from_millis(100)); + + // Verify deletion + let contents = session.screen_contents(); + assert!(contents.contains("Hello")); + assert!(!contents.contains("World")); +} +``` + +**Pattern 3: Agent Interaction and Streaming** + +Test agent responses with custom mock behavior: + +```rust +#[test] +fn test_agent_response_streaming() { + let config = SessionConfig::new() + .with_mock_response("Response line 1\nResponse line 2"); + + let mut session = TuiSession::spawn_with_config(24, 80, config).unwrap(); + session.wait_for_text("›", TIMEOUT).unwrap(); + + session.send_str("test prompt").unwrap(); + session.send_key(Key::Enter).unwrap(); + + // Wait for both lines to stream in + session.wait_for_text("Response line 1", TIMEOUT).unwrap(); + session.wait_for_text("Response line 2", TIMEOUT).unwrap(); +} +``` + +**Pattern 4: Cancellation and Control Flow** + +Test Escape key cancellation and Ctrl-C behavior: + +```rust +#[test] +fn test_cancel_streaming_with_escape() { + let config = SessionConfig::new() + .with_stream_until_cancel(); + + let mut session = TuiSession::spawn_with_config(24, 80, config).unwrap(); + session.wait_for_text("›", TIMEOUT).unwrap(); + + session.send_str("test").unwrap(); + session.send_key(Key::Enter).unwrap(); + + // Wait for streaming to start + session.wait_for_text("streaming", TIMEOUT).unwrap(); + + // Cancel with Escape + session.send_key(Key::Escape).unwrap(); + + // Verify cancellation message appears + session.wait_for_text("Cancelled", TIMEOUT).unwrap(); +} +``` + +**Pattern 5: Snapshot Testing** + +Capture and validate complete screen state: + +```rust +use insta::assert_snapshot; + +#[test] +fn test_screen_layout() { + let mut session = TuiSession::spawn(40, 120).unwrap(); + session.wait_for_text("›", TIMEOUT).unwrap(); + + session.send_str("test prompt").unwrap(); + session.send_key(Key::Enter).unwrap(); + session.wait_for_text("Test message", TIMEOUT).unwrap(); + + // Capture full screen state for regression testing + assert_snapshot!("prompt_submitted", session.screen_contents()); +} +``` + +Review snapshots with `cargo insta review` after first run. + +**Normalizing Dynamic Content in Snapshots** + +When tests include dynamic content (temp paths, timestamps, random prompts), normalize before snapshotting to prevent spurious failures: + +```rust +/// Normalize dynamic content in screen output for snapshot testing +fn normalize_for_snapshot(contents: String) -> String { + let mut normalized = contents; + + // Replace /tmp/.tmpXXXXXX with placeholder + if let Some(start) = normalized.find("/tmp/.tmp") { + if let Some(end) = normalized[start..].find(char::is_whitespace) { + normalized.replace_range(start..start + end, "[TMP_DIR]"); + } + } + + // Replace dynamic prompt text on lines starting with › + let lines: Vec = normalized + .lines() + .map(|line| { + if line.trim_start().starts_with("›") && !line.contains("for shortcuts") { + "› [DEFAULT_PROMPT]".to_string() + } else { + line.to_string() + } + }) + .collect(); + + lines.join("\n") +} + +#[test] +fn test_with_normalized_snapshot() { + let mut session = TuiSession::spawn(24, 80).unwrap(); + session.wait_for_text("Welcome", TIMEOUT).unwrap(); + + // Normalize before asserting to handle dynamic temp paths + assert_snapshot!( + "welcome_screen", + normalize_for_snapshot(session.screen_contents()) + ); +} +``` + +**Common Dynamic Content to Normalize:** + +- Temp directory paths: `/tmp/.tmpXXXXXX` → `[TMP_DIR]` +- Random default prompts: `› Improve documentation...` → `› [DEFAULT_PROMPT]` +- Timestamps: `2025-01-15 10:30:45` → `[TIMESTAMP]` +- Session IDs, PIDs, or other ephemeral identifiers + +This pattern ensures snapshots focus on UI structure and static content rather than runtime-specific values. See `@/codex-rs/tui-integration-tests/tests/startup.rs` for reference implementation. + +## Configuration Options + +**SessionConfig Methods:** + +| Method | Purpose | +|--------|---------| +| `with_mock_response(text)` | Set custom agent response instead of defaults | +| `with_stream_until_cancel()` | Make agent stream continuously until Escape pressed | +| `with_agent_env(key, val)` | Pass environment variables to mock agent | +| `with_approval_policy(policy)` | Control approval prompts (Untrusted, OnFailure, OnRequest, Never) | +| `without_approval_policy()` | Remove approval policy to test trust screens | +| `with_sandbox(sandbox)` | Set sandbox level (ReadOnly, WorkspaceWrite, DangerFullAccess) | +| `without_sandbox()` | Remove sandbox to test trust screens | + +## TuiSession API + +**Spawning:** + +- `TuiSession::spawn(rows, cols)` - Launch with defaults in temp directory +- `TuiSession::spawn_with_config(rows, cols, config)` - Launch with custom config + +**Input:** + +- `send_str(text)` - Simulate typing a string +- `send_key(key)` - Send a keyboard event (Enter, Escape, Backspace, Arrow keys, Ctrl+key) + +**Polling and Waiting:** + +- `wait_for_text(needle, timeout)` - Poll until text appears on screen +- `wait_for(predicate, timeout)` - Poll until custom condition matches +- `poll()` - Manually read available output and update screen state +- `screen_contents()` - Get current terminal screen as string + +**Available Keys:** + +- `Key::Enter`, `Key::Escape`, `Key::Backspace` +- `Key::Up`, `Key::Down`, `Key::Left`, `Key::Right` +- `Key::Ctrl('c')`, `Key::Ctrl('d')`, etc. + +## Debugging + +**Enable Debug Logging:** + +```bash +DEBUG_TUI_PTY=1 cargo test test_name -- --nocapture +``` + +This shows: +- Each `poll()` call and duration +- Read results (bytes read, WouldBlock, EOF) +- `wait_for()` loop iterations and elapsed time +- Screen contents preview at each iteration + +**Common Issues:** + +1. **Test times out waiting for text** + - Add `DEBUG_TUI_PTY=1` to see polling behavior + - Check if text appears but with different formatting/spacing + - Verify mock agent is configured correctly + - Increase timeout for slower operations + +2. **Snapshot differences** + - Run `cargo insta review` to inspect changes + - Check for timing-dependent content (e.g., timestamps) + - Verify terminal dimensions match snapshot expectations + +3. **PTY blocking issues** + - Poll returns immediately even when no data (non-blocking mode) + - Use `wait_for()` which polls in a loop with 50ms sleep + - Don't rely on `poll()` alone for synchronization + +4. **Control sequence artifacts** + - PTY harness intercepts cursor position queries automatically + - If seeing escape sequences in output, may need additional interception + - Check `intercept_control_sequences()` in lib.rs + +## Testing Philosophy + +These are black-box integration tests that exercise the full executable stack (CLI → TUI → Core → ACP). Each test runs in isolation with deterministic mock agent responses, validating external behavior through screen content assertions. diff --git a/.claude/skills/pty-tui-testing/SKILL.md.bak b/.claude/skills/pty-tui-testing/SKILL.md.bak new file mode 100644 index 000000000..b9afc03d4 --- /dev/null +++ b/.claude/skills/pty-tui-testing/SKILL.md.bak @@ -0,0 +1,244 @@ +--- +name: pty-tui-testing +description: Use this skill to build features that require testing the TUI with the PTY-based integration test harness. +--- + + +*CRITICAL* Add the following steps to your Todo list using TodoWrite: + +From this point on, ignore any existing tests until you have a working example validated through a new test file. +1. Ensure the codex binary is built with `cargo build` from the top directory of the cargo workspace. +2. Write and run a PTY-based integration test that lets you interact with the TUI. +3. Follow these steps in a loop until the feature works correctly: + - Add eprintln! debug logs to the TUI code. You *MUST* do this on every loop. + - Run the test with `cargo test --test -- --nocapture` to see debug output. + - Observe the terminal screen contents and timing in test output. + - Update the test to exercise the next scenario. +If you get stuck: did you add debug logs? Are you checking the actual screen contents? +4. Clean up debug logs when the feature is working. +5. Update snapshots with `cargo insta review` if using snapshot testing. +6. Make sure all integration tests pass with `cargo test` in the tui-integration-tests directory. + + +# PTY-Based TUI Integration Testing + +To test terminal user interfaces, write Rust integration tests using the `tui-integration-tests` crate at `@/codex-rs/tui-integration-tests`. Your testing should drive the real binary in a pseudo-terminal to be as close to 'real' as possible. + +## Test Harness Overview + +The `TuiSession` API provides: +- `spawn(rows, cols)` - Launch codex in a PTY with default config (temp directory, mock agent) +- `spawn_with_config(rows, cols, config)` - Launch with custom configuration (like flags for the executable) +- `send_str(text)` - Type text into the terminal +- `send_key(key)` - Send keyboard events (Enter, Escape, Ctrl-C, Up/Down arrows) +- `wait_for_text(needle, timeout)` - Poll until text appears on screen +- `wait_for(predicate, timeout)` - Poll until custom condition matches +- `screen_contents()` - Get current terminal screen as string + +All tests automatically run in isolated temporary directories under `/tmp/` with a sample `hello.py` file. + +## Basic Test Example + +Create a test file in `@/codex-rs/tui-integration-tests/tests/`: + +```rust +use std::time::Duration; +use tui_integration_tests::{SessionConfig, TuiSession, Key}; + +const TIMEOUT: Duration = Duration::from_secs(5); + +#[test] +fn test_user_can_type_prompt() { + // Spawn with default config (24x80 terminal, OnFailure approval policy) + let mut session = TuiSession::spawn(24, 80) + .expect("Failed to spawn codex"); + + // Wait for the prompt indicator to appear + session + .wait_for_text("›", TIMEOUT) + .expect("Prompt did not appear"); + + // Type a message + session.send_str("help me write a function").unwrap(); + + // Send Enter key + session.send_key(Key::Enter).unwrap(); + + // Wait for mock agent response + session + .wait_for_text("I can help", TIMEOUT) + .expect("Response did not appear"); +} +``` + +## Custom Configuration + +Use `SessionConfig` to customize the test environment: + +```rust +let config = SessionConfig::default() + .with_mock_response("Custom mock agent response") + .with_stream_until_cancel() // Stream until Escape is pressed + .without_approval_policy(); // Show trust screen for testing + +let mut session = TuiSession::spawn_with_config(40, 120, config) + .expect("Failed to spawn"); +``` + +## Keyboard Input + +Use the `Key` enum for special keys: + +```rust +session.send_key(Key::Enter).unwrap(); +session.send_key(Key::Escape).unwrap(); +session.send_key(Key::Ctrl('c')).unwrap(); +session.send_key(Key::Up).unwrap(); +session.send_key(Key::Down).unwrap(); +session.send_key(Key::Backspace).unwrap(); +``` + +## Polling and Waiting + +The polling mechanism reads from PTY in a loop: + +```rust +// Wait for specific text (polls every 50ms) +session.wait_for_text("Welcome", Duration::from_secs(5))?; + +// Wait for custom condition +session.wait_for( + |screen| screen.contains("Ready") && screen.lines().count() > 5, + Duration::from_secs(10) +)?; + +// Get current screen state +let contents = session.screen_contents(); +assert!(contents.contains("expected text")); +``` + +## Snapshot Testing + +Use `insta` for regression testing of terminal output: + +```rust +use insta::assert_snapshot; + +#[test] +fn test_welcome_screen() { + let mut session = TuiSession::spawn(24, 80).unwrap(); + session.wait_for_text("Welcome", TIMEOUT).unwrap(); + + // Snapshot the screen contents + assert_snapshot!("welcome_screen", session.screen_contents()); +} +``` + +Review and update snapshots: +```bash +cargo insta review +``` + +## Mock Agent Control + +Configure mock agent behavior via `SessionConfig`: + +```rust +let config = SessionConfig::default() + .with_mock_response("I'll help with that") + .with_agent_env("MOCK_AGENT_DELAY_MS", "100") + .with_agent_env("MOCK_AGENT_STREAM_UNTIL_CANCEL", "1"); +``` + +Common mock agent environment variables: +- `MOCK_AGENT_RESPONSE` - Custom response text +- `MOCK_AGENT_DELAY_MS` - Simulate streaming delay +- `MOCK_AGENT_STREAM_UNTIL_CANCEL` - Stream until Escape pressed + +See `@/codex-rs/mock-acp-agent/docs.md` for full list. + +## Running Tests + +```bash +# Run all integration tests +cd codex-rs/tui-integration-tests +cargo test + +# Run specific test with output +cargo test --test startup test_startup_shows_welcome -- --nocapture + +# Run with debug logging +cargo test --test startup -- --nocapture 2>&1 | grep DEBUG +``` + +## Debugging Tips + +1. **Add debug output to TUI code:** + ```rust + eprintln!("[DEBUG] Current state: {:?}", self.state); + ``` + +2. **Check screen contents in tests:** + ```rust + eprintln!("Screen: {}", session.screen_contents()); + ``` + +3. **Use longer timeouts when debugging:** + ```rust + const DEBUG_TIMEOUT: Duration = Duration::from_secs(30); + ``` + +4. **Verify terminal dimensions:** + ```rust + let contents = session.screen_contents(); + eprintln!("Lines: {}", contents.lines().count()); + ``` + +## Test Isolation + +- Each test runs in a unique `/tmp/` directory +- Temp directory contains a `hello.py` file with `print('Hello, World!')` +- Temp directory is automatically cleaned up when `TuiSession` is dropped +- Tests are completely isolated from user's home directory and each other + +## Architecture Reminder + +``` +Test Code (Rust) + ↓ +TuiSession (portable_pty) + ↓ +PTY Master ←→ PTY Slave + ↓ ↓ +VT100 Parser codex binary (--model mock-acp-agent) + ↓ ↓ +Screen State ACP JSON-RPC over stdin/stdout + ↓ + mock_acp_agent (env var configured) +``` + +## Common Pitfalls + +- **Not waiting for text before assertions:** Always use `wait_for_text()` before checking screen contents +- **Timing issues:** PTY operations are asynchronous; use polling with timeouts +- **Screen dimensions:** Ensure test terminal size matches expected layout (default 24x80) +- **NO_COLOR=1:** Color codes are disabled by default for test determinism +- **Forgot to build:** Tests run the real binary, so run `cargo build` first + +## Anti-Patterns + +DO NOT: +- ❌ Skip waiting and immediately check screen contents +- ❌ Use `thread::sleep()` instead of `wait_for_text()` +- ❌ Test with hardcoded absolute paths +- ❌ Ignore the screen contents in error messages +- ❌ Add test-only code to production TUI components + +DO: +- ✅ Always poll/wait before assertions +- ✅ Use relative timeouts based on operation complexity +- ✅ Check actual terminal output, not internal state +- ✅ Add `eprintln!` debug logs to understand timing +- ✅ Use snapshot testing for complex screen layouts + +If tests are flaky, did you wait long enough? Did you check what's actually on the screen? diff --git a/.gitignore b/.gitignore index 70d6dbcbf..7479be233 100644 --- a/.gitignore +++ b/.gitignore @@ -29,7 +29,10 @@ result # cli tools CLAUDE.md -.claude/ +# .claude/ +.claude/settings.local.json +.claude/agents/ +.claude/commands/ AGENTS.override.md # caches diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 51f73ea53..f606f80a9 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1582,7 +1582,7 @@ dependencies = [ "unicode-segmentation", "unicode-width 0.2.1", "url", - "vt100", + "vt100 0.16.2", ] [[package]] @@ -1620,7 +1620,7 @@ name = "codex-utils-pty" version = "0.0.0" dependencies = [ "anyhow", - "portable-pty", + "portable-pty 0.9.0", "tokio", ] @@ -3396,6 +3396,15 @@ dependencies = [ "libc", ] +[[package]] +name = "ioctl-rs" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7970510895cee30b3e9128319f2cefd4bde883a39f38baa279567ba3a7eb97d" +dependencies = [ + "libc", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -3915,6 +3924,31 @@ dependencies = [ "smallvec", ] +[[package]] +name = "nix" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +dependencies = [ + "autocfg", + "bitflags 1.3.2", + "cfg-if", + "libc", + "memoffset 0.6.5", + "pin-utils", +] + +[[package]] +name = "nix" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +dependencies = [ + "bitflags 2.10.0", + "cfg-if", + "libc", +] + [[package]] name = "nix" version = "0.28.0" @@ -4607,6 +4641,27 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "portable-pty" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "806ee80c2a03dbe1a9fb9534f8d19e4c0546b790cde8fd1fea9d6390644cb0be" +dependencies = [ + "anyhow", + "bitflags 1.3.2", + "downcast-rs", + "filedescriptor", + "lazy_static", + "libc", + "log", + "nix 0.25.1", + "serial", + "shared_library", + "shell-words", + "winapi", + "winreg", +] + [[package]] name = "portable-pty" version = "0.9.0" @@ -5762,6 +5817,48 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "serial" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1237a96570fc377c13baa1b88c7589ab66edced652e43ffb17088f003db3e86" +dependencies = [ + "serial-core", + "serial-unix", + "serial-windows", +] + +[[package]] +name = "serial-core" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f46209b345401737ae2125fe5b19a77acce90cd53e1658cda928e4fe9a64581" +dependencies = [ + "libc", +] + +[[package]] +name = "serial-unix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f03fbca4c9d866e24a459cbca71283f545a37f8e3e002ad8c70593871453cab7" +dependencies = [ + "ioctl-rs", + "libc", + "serial-core", + "termios", +] + +[[package]] +name = "serial-windows" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15c6d3b776267a75d31bbdfd5d36c0ca051251caafc285827052bc53bcdc8162" +dependencies = [ + "libc", + "serial-core", +] + [[package]] name = "serial2" version = "0.2.31" @@ -6264,6 +6361,15 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "termios" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5d9cf598a6d7ce700a4e6a9199da127e6819a61e64b68609683cc9a01b5683a" +dependencies = [ + "libc", +] + [[package]] name = "termtree" version = "0.5.1" @@ -6911,6 +7017,19 @@ dependencies = [ "termcolor", ] +[[package]] +name = "tui-integration-tests" +version = "0.1.0" +dependencies = [ + "anyhow", + "insta", + "libc", + "nix 0.27.1", + "portable-pty 0.8.1", + "tempfile", + "vt100 0.15.2", +] + [[package]] name = "typenum" version = "1.18.0" @@ -7070,6 +7189,18 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "vt100" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84cd863bf0db7e392ba3bd04994be3473491b31e66340672af5d11943c6274de" +dependencies = [ + "itoa", + "log", + "unicode-width 0.1.14", + "vte 0.11.1", +] + [[package]] name = "vt100" version = "0.16.2" @@ -7078,7 +7209,18 @@ checksum = "054ff75fb8fa83e609e685106df4faeffdf3a735d3c74ebce97ec557d5d36fd9" dependencies = [ "itoa", "unicode-width 0.2.1", - "vte", + "vte 0.15.0", +] + +[[package]] +name = "vte" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5022b5fbf9407086c180e9557be968742d839e68346af7792b8592489732197" +dependencies = [ + "arrayvec", + "utf8parse", + "vte_generate_state_changes", ] [[package]] @@ -7091,6 +7233,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "vte_generate_state_changes" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e369bee1b05d510a7b4ed645f5faa90619e05437111783ea5848f28d97d3c2e" +dependencies = [ + "proc-macro2", + "quote", +] + [[package]] name = "wait-timeout" version = "0.2.1" diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index 6b9da6df9..c66b3549f 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -43,6 +43,7 @@ members = [ "utils/tokenizer", "acp", "mock-acp-agent", + "tui-integration-tests", ] resolver = "2" diff --git a/codex-rs/acp/docs.md b/codex-rs/acp/docs.md index e12bc854c..9be560dc8 100644 --- a/codex-rs/acp/docs.md +++ b/codex-rs/acp/docs.md @@ -100,5 +100,6 @@ Each `AcpModelClient::stream()` call spawns a fresh agent process: - Thin slice integration tests in `@/codex-rs/acp/tests/thin_slice.rs` verify end-to-end streaming with mock agent - Unit tests in `agent.rs` use shell commands to test stderr capture, buffer overflow, and line truncation - Integration tests in `@/codex-rs/acp/tests/integration.rs` test with actual mock-acp-agent binary +- TUI black-box tests in `@/codex-rs/tui-integration-tests` exercise full application flow including ACP protocol Created and maintained by Nori. diff --git a/codex-rs/docs.md b/codex-rs/docs.md index 8ec4a3e97..49f4d67af 100644 --- a/codex-rs/docs.md +++ b/codex-rs/docs.md @@ -28,6 +28,7 @@ The workspace is organized into crate categories: | Patch System | `apply-patch` | Structured file modification | | MCP | `mcp-types`, `rmcp-client` | Model Context Protocol support | | ACP | `acp`, `mock-acp-agent` | Agent Context Protocol support | +| Testing | `tui-integration-tests` | PTY-based black-box TUI testing | | Utilities | `utils/*`, `async-utils`, `ansi-escape`, `feedback` | Helper libraries | Key architectural patterns: diff --git a/codex-rs/mock-acp-agent/docs.md b/codex-rs/mock-acp-agent/docs.md index a444017c3..1fc851216 100644 --- a/codex-rs/mock-acp-agent/docs.md +++ b/codex-rs/mock-acp-agent/docs.md @@ -11,6 +11,7 @@ Path: @/codex-rs/mock-acp-agent ### How it fits into the larger codebase - Used by integration tests in `@/codex-rs/acp/tests/integration.rs` to test ACP protocol flow +- Used by TUI black-box tests in `@/codex-rs/tui-integration-tests` as the `--model mock-acp-agent` backend - Enables end-to-end testing of `AgentProcess` without requiring real AI providers - Produces diagnostic stderr output that tests use to verify stderr capture functionality - Not shipped in production; exists solely for development and CI testing @@ -51,6 +52,8 @@ Path: @/codex-rs/mock-acp-agent | `MOCK_AGENT_REQUEST_FILE` | Reads file path via client during prompt | | `MOCK_AGENT_STREAM_UNTIL_CANCEL` | Continuously streams until cancel notification | | `MOCK_AGENT_STDERR_COUNT` | Emits N lines of `MOCK_AGENT_STDERR_LINE:{i}` to stderr during prompt | +| `MOCK_AGENT_RESPONSE` | Custom response text instead of default "Test message 1/2" (added for TUI testing) | +| `MOCK_AGENT_DELAY_MS` | Millisecond delay before completing stream to simulate realistic streaming (added for TUI testing) | **Stderr Output for Testing:** diff --git a/codex-rs/mock-acp-agent/src/main.rs b/codex-rs/mock-acp-agent/src/main.rs index d919123da..9d44a6729 100644 --- a/codex-rs/mock-acp-agent/src/main.rs +++ b/codex-rs/mock-acp-agent/src/main.rs @@ -164,11 +164,24 @@ impl acp::Agent for MockAgent { } } - self.send_text_chunk(session_id.clone(), "Test message 1") - .await?; + // Support custom response text for TUI testing + if let Ok(response) = std::env::var("MOCK_AGENT_RESPONSE") { + self.send_text_chunk(session_id.clone(), &response).await?; + } else { + // Default behavior + self.send_text_chunk(session_id.clone(), "Test message 1") + .await?; + + self.send_text_chunk(session_id.clone(), "Test message 2") + .await?; + } - self.send_text_chunk(session_id.clone(), "Test message 2") - .await?; + // Support configurable delay for simulating realistic streaming + if let Ok(delay_str) = std::env::var("MOCK_AGENT_DELAY_MS") + && let Ok(delay) = delay_str.parse::() + { + sleep(Duration::from_millis(delay)).await; + } if let Ok(file_path) = std::env::var("MOCK_AGENT_REQUEST_FILE") { eprintln!("Mock agent: requesting file read: {}", file_path); diff --git a/codex-rs/tui-integration-tests/Cargo.toml b/codex-rs/tui-integration-tests/Cargo.toml new file mode 100644 index 000000000..879f7f661 --- /dev/null +++ b/codex-rs/tui-integration-tests/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "tui-integration-tests" +version = "0.1.0" +edition = "2021" + +[dependencies] +portable-pty = "0.8" +vt100 = "0.15" +insta = "1" +anyhow = "1" +tempfile = "3" + +[target.'cfg(unix)'.dependencies] +nix = { version = "0.27", features = ["fs"] } +libc = "0.2" + +[dev-dependencies] +tempfile = "3" diff --git a/codex-rs/tui-integration-tests/docs.md b/codex-rs/tui-integration-tests/docs.md new file mode 100644 index 000000000..292d2f0f3 --- /dev/null +++ b/codex-rs/tui-integration-tests/docs.md @@ -0,0 +1,247 @@ +# Noridoc: TUI Integration Tests + +Path: @/codex-rs/tui-integration-tests + +### Overview + +- Black-box integration testing framework for the Codex TUI using PTY (pseudo-terminal) emulation +- Spawns the real `codex` binary in a simulated terminal and exercises full application stack +- Uses VT100 parser to capture and validate terminal screen output via snapshot testing +- Provides programmatic keyboard input simulation and screen state polling + +### How it fits into the larger codebase + +- Tests the complete integration between `@/codex-rs/cli`, `@/codex-rs/tui`, `@/codex-rs/core`, and `@/codex-rs/acp` +- Complements unit tests in `@/codex-rs/tui/src/chatwidget.rs` by testing full application behavior +- Uses `@/codex-rs/mock-acp-agent` as the ACP backend for deterministic test scenarios +- Validates CLI argument parsing, TUI event loop, ACP protocol communication, and terminal rendering +- Part of the workspace at `@/codex-rs/Cargo.toml:46` + +### Core Implementation + +**Test Harness:** `TuiSession` in `@/codex-rs/tui-integration-tests/src/lib.rs` + +The main API provides: +- `spawn(rows, cols)` - Launch codex binary with mock-acp-agent in PTY with automatic temp directory +- `spawn_with_config(rows, cols, config)` - Launch with custom configuration and automatic temp directory +- `send_str(text)` - Simulate typing text +- `send_key(key)` - Send keyboard events (Enter, Escape, Ctrl-C, etc.) +- `wait_for_text(needle, timeout)` - Poll screen until text appears +- `wait_for(predicate, timeout)` - Poll screen until condition matches +- `screen_contents()` - Get current terminal screen as string + +**Debugging Aids:** + +`TuiSession` implements `Drop` to print screen state when tests panic, making it easier to diagnose PTY timing issues: +```rust +impl Drop for TuiSession { + fn drop(&mut self) { + if std::thread::panicking() { + eprintln!("\n=== TUI Screen State at Panic ==="); + eprintln!("{}", self.screen_contents()); + eprintln!("=================================\n"); + } + } +} +``` + +The crate exports helper functions for consistent test patterns: +- `TIMEOUT: Duration` - Standard 5-second timeout constant for use across all tests +- `normalize_for_snapshot(contents: String) -> String` - Normalizes dynamic content for snapshot testing (see below) + +**Automatic Test Isolation:** + +All tests run in isolated temporary directories created in `/tmp/`: +- Each `spawn()` or `spawn_with_config()` call creates a new temp directory +- Directory contains a `hello.py` file with `print('Hello, World!')` +- Temp directory is automatically cleaned up when `TuiSession` is dropped +- Tests no longer run in user's home directory for better isolation + +**Architecture:** + +``` +Test Code + ↓ +TuiSession (portable_pty) + ↓ +PTY Master ←→ PTY Slave + ↓ ↓ +VT100 Parser codex binary (--model mock-acp-agent) + ↓ ↓ +Screen State ACP JSON-RPC over stdin/stdout + ↓ + mock_acp_agent (env var configured) +``` + +**Key Input Handling:** `Key` enum in `@/codex-rs/tui-integration-tests/src/keys.rs` + +Converts high-level key events to ANSI escape sequences: +- `Key::Enter` → `\r` +- `Key::Escape` → `\x1b` +- `Key::Up/Down/Left/Right` → `\x1b[A/B/D/C` +- `Key::Backspace` → `\x7f` +- `Key::Ctrl('c')` → Control character encoding + +**Session Configuration:** `SessionConfig` in `@/codex-rs/tui-integration-tests/src/lib.rs` + +Builder pattern for test environment setup: +- `with_mock_response(text)` - Set `MOCK_AGENT_RESPONSE` env var +- `with_stream_until_cancel()` - Set `MOCK_AGENT_STREAM_UNTIL_CANCEL=1` +- `with_agent_env(key, value)` - Pass custom env vars to mock agent +- `with_approval_policy(policy)` - Set approval policy (defaults to `OnFailure`) +- `without_approval_policy()` - Remove approval policy to test trust screen +- `cwd` field - Optional working directory (auto-created temp directory if None) + +**Approval Policy:** `ApprovalPolicy` enum controls when codex asks for command approval: +- `Untrusted` - Only run trusted commands without approval +- `OnFailure` - Ask for approval only when commands fail (default for tests) +- `OnRequest` - Model decides when to ask for approval +- `Never` - Never ask for approval + +By default, all spawned sessions use `ApprovalPolicy::OnFailure` which: +- Skips the trust directory approval screen at startup +- Allows tests to run without manual intervention +- Sets both `--ask-for-approval on-failure` and `--sandbox workspace-write` flags + +### Things to Know + +**PTY Input Timing Pattern:** + +To avoid race conditions between sending input and the TUI processing it, tests add a 100ms delay after `send_str()` and `send_key()` operations when submitting prompts or navigating UI: + +```rust +session.send_str("testing!!!").unwrap(); +std::thread::sleep(Duration::from_millis(100)); +session.send_key(Key::Enter).unwrap(); +std::thread::sleep(Duration::from_millis(100)); +``` + +This delay allows the PTY subprocess time to process input and update the display before assertions check for results. The delay is added in test code (not in `TuiSession` methods) for flexibility—not all operations need delays. + +**Test Files Structure:** + +| File | Coverage | +|------|----------| +| `@/codex-rs/tui-integration-tests/tests/startup.rs` | TUI initialization, prompt display, trust screen skipping, snapshot testing for 4 startup scenarios, non-blocking PTY verification | +| `@/codex-rs/tui-integration-tests/tests/prompt_flow.rs` | Prompt submission and agent responses | +| `@/codex-rs/tui-integration-tests/tests/input_handling.rs` | Text editing, backspace, Ctrl-C clearing, arrow key navigation with snapshot testing | +| `@/codex-rs/tui-integration-tests/tests/streaming.rs` | Prompt submission with timing delays, agent response streaming | + +**Snapshot Files:** + +| File | Test Coverage | +|------|---------------| +| `@/codex-rs/tui-integration-tests/tests/snapshots/startup__*.snap` | Various startup screen scenarios (welcome, dimensions, temp directory, trust screen) | +| `@/codex-rs/tui-integration-tests/tests/snapshots/input_handling__*.snap` | Input handling scenarios (ctrl-c clear, typing/backspace, model changed) | +| `@/codex-rs/tui-integration-tests/tests/snapshots/streaming__submit_input.snap` | Prompt submission and streaming response | + +**Snapshot Testing with Insta:** + +Tests use `insta::assert_snapshot!()` to capture terminal output for visual regression testing: +```rust +assert_snapshot!("startup_screen", normalize_for_snapshot(session.screen_contents())); +``` + +Snapshots stored in `@/codex-rs/tui-integration-tests/tests/snapshots/*.snap` for regression detection. Each snapshot captures the exact terminal output state at a specific test point. + +**Snapshot Normalization:** + +The `normalize_for_snapshot()` helper function exported from `@/codex-rs/tui-integration-tests/src/lib.rs` ensures stable snapshots across test runs by replacing dynamic content: + +Normalization rules: +1. Temp directory paths (`/tmp/.tmpXXXXXX`) → `[TMP_DIR]` placeholder +2. Random default prompts on lines starting with `› ` → `[DEFAULT_PROMPT]` placeholder + - Detects specific default prompt patterns: "Find and fix a bug", "Explain this codebase", "Write tests for", etc. + - Preserves user-entered prompts and UI text like "? for shortcuts" + +Implementation in `@/codex-rs/tui-integration-tests/src/lib.rs:456-488`: +```rust +pub fn normalize_for_snapshot(contents: String) -> String { + // Replace /tmp/.tmpXXXXXX with [TMP_DIR] + // Replace known default prompts with [DEFAULT_PROMPT] + // Preserves UI structure and user input +} +``` + +This normalization allows snapshot assertions to focus on UI structure and static content rather than ephemeral runtime values. All tests import and use this function consistently: `use tui_integration_tests::{normalize_for_snapshot, ...};` + +**PTY Implementation Details:** + +- Uses `portable-pty` crate for cross-platform PTY support +- PTY master is set to **non-blocking mode** using `fcntl(O_NONBLOCK)` on Unix systems +- This prevents `read()` from blocking indefinitely when no data is available +- Sets `TERM=xterm-256color` for terminal feature detection +- NO_COLOR=1 by default for deterministic output parsing +- Terminal size configurable (default 24x80, some tests use 40x120) + +**Polling Pattern:** + +`poll()` method performs non-blocking read from PTY master: +- PTY file descriptor is set to non-blocking mode during session initialization +- Reads up to 8KB buffer per poll +- Intercepts and responds to terminal control sequences before parsing +- Feeds processed data to VT100 parser incrementally +- Returns immediately with `WouldBlock` error when no data is available +- `wait_for()` loops with 50ms sleep between polls, checking timeout after each iteration +- Timeout mechanism works correctly because `read()` never blocks indefinitely + +**Control Sequence Interception:** + +The `intercept_control_sequences()` method handles terminal queries that require responses: +- Detects cursor position query (`ESC[6n`) in output stream from codex binary +- Writes cursor position response (`ESC[1;1R`) back to PTY input +- Removes control sequences from parser stream to avoid rendering artifacts +- Enables crossterm terminal initialization without real terminal support + +**Mock Agent Integration:** + +Tests control mock agent behavior via environment variables: +- `MOCK_AGENT_RESPONSE` - Custom response text instead of defaults +- `MOCK_AGENT_DELAY_MS` - Simulate streaming delays +- `MOCK_AGENT_STREAM_UNTIL_CANCEL` - Stream until Escape pressed + +See `@/codex-rs/mock-acp-agent/docs.md` for full list of env vars. + +**Binary Discovery:** + +`codex_binary_path()` locates the compiled binary: +``` +test_exe: target/debug/deps/startup-abc123 + ↓ +target/debug/deps (parent) + ↓ +target/debug (parent.parent) + ↓ +target/debug/codex (join "codex") +``` + +**Known Limitations:** + +- VT100 parser may not perfectly emulate all terminal behaviors +- Terminal size changes after spawn not currently supported +- Color codes disabled (NO_COLOR=1) for test determinism + +**Dependencies:** + +- `portable-pty = "0.8"` - PTY creation and management +- `vt100 = "0.15"` - Terminal emulator/parser +- `insta = "1"` - Snapshot testing framework +- `anyhow = "1"` - Error handling +- `tempfile = "3"` - Temporary directory creation for test isolation +- `nix = "0.27"` (Unix only) - fcntl for non-blocking I/O setup +- `libc = "0.2"` (Unix only) - Low-level fcntl operations + +**Debugging:** + +Set `DEBUG_TUI_PTY=1` environment variable to enable detailed logging of PTY operations: +```bash +DEBUG_TUI_PTY=1 cargo test test_name -- --nocapture +``` + +This shows: +- Each `poll()` call and its duration +- Read results (bytes read, WouldBlock, EOF) +- `wait_for()` loop iterations and elapsed time +- Screen contents preview at each iteration + +Created and maintained by Nori. diff --git a/codex-rs/tui-integration-tests/src/keys.rs b/codex-rs/tui-integration-tests/src/keys.rs new file mode 100644 index 000000000..d30ea96c3 --- /dev/null +++ b/codex-rs/tui-integration-tests/src/keys.rs @@ -0,0 +1,30 @@ +/// Key input types +pub enum Key { + Enter, + Escape, + Up, + Down, + Left, + Right, + Backspace, + Tab, + Ctrl(char), + Char(char), +} + +impl Key { + pub fn to_escape_sequence(&self) -> Vec { + match self { + Key::Enter => vec![b'\r'], + Key::Escape => vec![0x1b], + Key::Up => vec![0x1b, b'[', b'A'], + Key::Down => vec![0x1b, b'[', b'B'], + Key::Right => vec![0x1b, b'[', b'C'], + Key::Left => vec![0x1b, b'[', b'D'], + Key::Backspace => vec![0x7f], + Key::Tab => vec![b'\t'], + Key::Ctrl(c) => vec![(*c as u8) & 0x1f], + Key::Char(c) => c.to_string().into_bytes(), + } + } +} diff --git a/codex-rs/tui-integration-tests/src/lib.rs b/codex-rs/tui-integration-tests/src/lib.rs new file mode 100644 index 000000000..e900e7e1b --- /dev/null +++ b/codex-rs/tui-integration-tests/src/lib.rs @@ -0,0 +1,488 @@ +use anyhow::Result; +use portable_pty::{native_pty_system, CommandBuilder, PtySize}; +use std::collections::HashMap; +use std::io::{Read, Write}; +use std::time::{Duration, Instant}; +use vt100::Parser; + +#[cfg(unix)] +/// Helper to set a file descriptor to non-blocking mode +fn set_nonblocking(fd: std::os::unix::io::RawFd) -> Result<()> { + let flags = unsafe { libc::fcntl(fd, libc::F_GETFL) }; + if flags < 0 { + return Err(std::io::Error::last_os_error().into()); + } + let result = unsafe { libc::fcntl(fd, libc::F_SETFL, flags | libc::O_NONBLOCK) }; + if result < 0 { + return Err(std::io::Error::last_os_error().into()); + } + Ok(()) +} + +pub use keys::Key; +mod keys; + +/// PTY session for driving the codex TUI +pub struct TuiSession { + _master: Box, + reader: Box, + writer: Box, + parser: Parser, + _temp_dir: Option, +} + +impl Drop for TuiSession { + fn drop(&mut self) { + if std::thread::panicking() { + eprintln!("\n=== TUI Screen State at Panic ==="); + eprintln!("{}", self.screen_contents()); + eprintln!("=================================\n"); + } + } +} + +impl TuiSession { + /// Spawn codex with mock-acp-agent in a temporary directory + pub fn spawn(rows: u16, cols: u16) -> Result { + let temp_dir = tempfile::tempdir()?; + let hello_py = temp_dir.path().join("hello.py"); + std::fs::write(&hello_py, "print('Hello, World!')")?; + + let config = SessionConfig { + cwd: Some(temp_dir.path().to_path_buf()), + ..Default::default() + }; + + Self::spawn_with_config_and_tempdir(rows, cols, config, Some(temp_dir)) + } + + /// Spawn with custom configuration + /// Creates a temp directory with hello.py if no cwd is specified in config + pub fn spawn_with_config(rows: u16, cols: u16, mut config: SessionConfig) -> Result { + if config.cwd.is_none() { + let temp_dir = tempfile::tempdir()?; + let hello_py = temp_dir.path().join("hello.py"); + std::fs::write(&hello_py, "print('Hello, World!')")?; + config.cwd = Some(temp_dir.path().to_path_buf()); + Self::spawn_with_config_and_tempdir(rows, cols, config, Some(temp_dir)) + } else { + Self::spawn_with_config_and_tempdir(rows, cols, config, None) + } + } + + /// Internal method to spawn with optional temp directory + fn spawn_with_config_and_tempdir( + rows: u16, + cols: u16, + config: SessionConfig, + temp_dir: Option, + ) -> Result { + let pty_system = native_pty_system(); + let pair = pty_system.openpty(PtySize { + rows, + cols, + pixel_width: 0, + pixel_height: 0, + })?; + + let mut cmd = CommandBuilder::new(codex_binary_path()); + + // Set working directory if provided + if let Some(cwd) = &config.cwd { + cmd.cwd(cwd); + } + + // Use mock-acp-agent model + cmd.arg("--model"); + cmd.arg(&config.model); + + // Set approval policy if specified (also sets sandbox to allow test execution) + if let Some(approval) = &config.approval_policy { + cmd.arg("--ask-for-approval"); + cmd.arg(approval.as_str()); + } + // Also set sandbox to workspace-write to allow file operations in tests + if let Some(sandbox) = &config.sandbox { + cmd.arg("--sandbox"); + cmd.arg(sandbox.as_str()); + } + + // Set TERM to enable terminal features + cmd.env("TERM", "xterm-256color"); + + // Pass through mock agent env vars + for (key, value) in config.mock_agent_env { + cmd.env(&key, &value); + } + + // Disable color codes for easier parsing + if config.no_color { + cmd.env("NO_COLOR", "1"); + } + + let _child = pair.slave.spawn_command(cmd)?; + + // Set master PTY to non-blocking mode before cloning reader + // This ensures the cloned reader FD inherits the non-blocking flag + #[cfg(unix)] + { + if let Some(master_fd) = pair.master.as_raw_fd() { + set_nonblocking(master_fd)?; + } + } + + let reader = pair.master.try_clone_reader()?; + let writer = pair.master.take_writer()?; + + Ok(Self { + _master: pair.master, + reader, + writer, + parser: Parser::new(rows, cols, 0), + _temp_dir: temp_dir, + }) + } + + /// Read any available output and update screen state + /// + /// This method attempts to read available data without blocking. + /// It uses a simple approach of reading with a small buffer which works + /// well for our polling-based test framework. + pub fn poll(&mut self) -> Result<()> { + // Create a small buffer for reading + let mut buf = [0u8; 8192]; + + if std::env::var("DEBUG_TUI_PTY").is_ok() { + eprintln!("[DEBUG poll] About to call read()..."); + } + let read_start = Instant::now(); + + // The PTY reader is in non-blocking mode and will return immediately if no data is available + // We rely on the polling loop in wait_for() to handle timing + let read_result = self.reader.read(&mut buf); + let read_duration = read_start.elapsed(); + + if std::env::var("DEBUG_TUI_PTY").is_ok() { + eprintln!("[DEBUG poll] read() returned after {:?}", read_duration); + } + + match read_result { + Ok(0) => { + if std::env::var("DEBUG_TUI_PTY").is_ok() { + eprintln!("[DEBUG poll] read() returned Ok(0) - EOF/process exited"); + } + Ok(()) + } + Ok(n) => { + if std::env::var("DEBUG_TUI_PTY").is_ok() { + eprintln!("[DEBUG poll] read() returned Ok({}) - {} bytes read", n, n); + } + // Intercept and respond to control sequences before parsing + let processed = self.intercept_control_sequences(&buf[..n])?; + self.parser.process(&processed); + Ok(()) + } + Err(e) if e.kind() == std::io::ErrorKind::WouldBlock => { + if std::env::var("DEBUG_TUI_PTY").is_ok() { + eprintln!("[DEBUG poll] read() returned WouldBlock - no data available"); + } + Ok(()) + } + Err(e) => { + if std::env::var("DEBUG_TUI_PTY").is_ok() { + eprintln!("[DEBUG poll] read() returned Err: {}", e); + } + Err(e.into()) + } + } + } + + /// Intercept control sequences and inject responses + /// + /// Detects cursor position queries (ESC[6n) and writes responses back to the PTY + /// Returns filtered data with control sequences removed + fn intercept_control_sequences(&mut self, data: &[u8]) -> Result> { + let mut result = Vec::with_capacity(data.len()); + let mut i = 0; + + while i < data.len() { + // Detect cursor position query: ESC[6n + if i + 3 < data.len() + && data[i] == 0x1b // ESC + && data[i+1] == b'[' + && data[i+2] == b'6' + && data[i+3] == b'n' + { + // Write response back to PTY: ESC[1;1R (cursor at row 1, col 1) + self.writer.write_all(b"\x1b[1;1R")?; + self.writer.flush()?; + // Skip the control sequence - don't pass it to the parser + i += 4; + } else { + result.push(data[i]); + i += 1; + } + } + Ok(result) + } + + /// Wait for predicate with timeout + pub fn wait_for(&mut self, pred: F, timeout: Duration) -> Result<(), String> + where + F: Fn(&str) -> bool, + { + let debug = std::env::var("DEBUG_TUI_PTY").is_ok(); + if debug { + eprintln!( + "[DEBUG wait_for] Starting wait_for with timeout {:?}", + timeout + ); + } + let start = Instant::now(); + let mut iteration = 0; + + loop { + iteration += 1; + let elapsed = start.elapsed(); + if debug { + eprintln!( + "[DEBUG wait_for] Iteration {}, elapsed: {:?}", + iteration, elapsed + ); + eprintln!("[DEBUG wait_for] Calling poll()..."); + } + + self.poll().map_err(|e| e.to_string())?; + + if debug { + eprintln!("[DEBUG wait_for] poll() completed"); + } + + let contents = self.screen_contents(); + if debug { + eprintln!( + "[DEBUG wait_for] Screen contents length: {} bytes", + contents.len() + ); + eprintln!( + "[DEBUG wait_for] Screen contents preview: {:?}", + &contents.chars().take(100).collect::() + ); + } + + if pred(&contents) { + if debug { + eprintln!( + "[DEBUG wait_for] Predicate matched! Success after {:?}", + elapsed + ); + } + return Ok(()); + } + + if debug { + eprintln!("[DEBUG wait_for] Predicate did not match"); + } + + if start.elapsed() > timeout { + if debug { + eprintln!( + "[DEBUG wait_for] TIMEOUT REACHED after {:?}", + start.elapsed() + ); + } + return Err(format!( + "Timeout waiting for condition.\nScreen contents:\n{}", + contents + )); + } + + if debug { + eprintln!("[DEBUG wait_for] Sleeping 50ms before next iteration"); + } + std::thread::sleep(Duration::from_millis(50)); + } + } + + /// Wait for specific text to appear + pub fn wait_for_text(&mut self, needle: &str, timeout: Duration) -> Result<(), String> { + self.wait_for(|s| s.contains(needle), timeout) + } + + /// Get current screen contents as string + pub fn screen_contents(&self) -> String { + self.parser.screen().contents() + } + + /// Type a string + pub fn send_str(&mut self, s: &str) -> std::io::Result<()> { + self.writer.write_all(s.as_bytes())?; + self.writer.flush() + } + + /// Send a key event + pub fn send_key(&mut self, key: Key) -> std::io::Result<()> { + self.writer.write_all(&key.to_escape_sequence())?; + self.writer.flush() + } +} + +/// Sandbox policy for codex session +#[derive(Debug, Clone, Copy)] +pub enum Sandbox { + // [possible values: read-only, workspace-write, danger-full-access] + ReadOnly, + WorkspaceWrite, + DangerFullAccess, +} + +impl Sandbox { + fn as_str(&self) -> &'static str { + match self { + Sandbox::ReadOnly => "read-only", + Sandbox::WorkspaceWrite => "workspace-write", + Sandbox::DangerFullAccess => "danger-full-access", + } + } +} + +/// Approval policy for codex session +#[derive(Debug, Clone, Copy)] +pub enum ApprovalPolicy { + /// Only run trusted commands without approval + Untrusted, + /// Run all commands, ask for approval on failure + OnFailure, + /// Model decides when to ask + OnRequest, + /// Never ask for approval + Never, +} + +impl ApprovalPolicy { + fn as_str(&self) -> &'static str { + match self { + ApprovalPolicy::Untrusted => "untrusted", + ApprovalPolicy::OnFailure => "on-failure", + ApprovalPolicy::OnRequest => "on-request", + ApprovalPolicy::Never => "never", + } + } +} + +/// Configuration for spawning a test session +pub struct SessionConfig { + pub model: String, + pub mock_agent_env: HashMap, + pub no_color: bool, + pub approval_policy: Option, + pub sandbox: Option, + pub cwd: Option, +} + +impl Default for SessionConfig { + fn default() -> Self { + Self::new() + } +} + +impl SessionConfig { + pub fn new() -> Self { + Self { + model: "mock-acp-agent".to_string(), + mock_agent_env: HashMap::new(), + no_color: true, + approval_policy: Some(ApprovalPolicy::OnFailure), + // [possible values: read-only, workspace-write, danger-full-access] + sandbox: Some(Sandbox::WorkspaceWrite), + cwd: None, + } + } + + pub fn with_mock_response(mut self, response: impl Into) -> Self { + self.mock_agent_env + .insert("MOCK_AGENT_RESPONSE".to_string(), response.into()); + self + } + + pub fn with_stream_until_cancel(mut self) -> Self { + self.mock_agent_env.insert( + "MOCK_AGENT_STREAM_UNTIL_CANCEL".to_string(), + "1".to_string(), + ); + self + } + + pub fn with_agent_env(mut self, key: impl Into, value: impl Into) -> Self { + self.mock_agent_env.insert(key.into(), value.into()); + self + } + + pub fn with_approval_policy(mut self, policy: ApprovalPolicy) -> Self { + self.approval_policy = Some(policy); + self + } + + pub fn without_approval_policy(mut self) -> Self { + self.approval_policy = None; + self + } + + pub fn with_sandbox(mut self, sandbox: Sandbox) -> Self { + self.sandbox = Some(sandbox); + self + } + + pub fn without_sandbox(mut self) -> Self { + self.sandbox = None; + self + } +} + +/// Get path to codex binary +fn codex_binary_path() -> String { + let test_exe = std::env::current_exe().expect("Failed to get current exe path"); + test_exe + .parent() // deps + .and_then(|p| p.parent()) // debug or release + .expect("Failed to get target directory") + .join("codex") + .to_string_lossy() + .into_owned() +} + +pub const TIMEOUT: Duration = Duration::from_secs(5); + +/// Normalize dynamic content in screen output for snapshot testing +pub fn normalize_for_snapshot(contents: String) -> String { + let mut normalized = contents; + + // Replace /tmp/.tmpXXXXXX with placeholder + if let Some(start) = normalized.find("/tmp/.tmp") { + if let Some(end) = normalized[start..].find(char::is_whitespace) { + normalized.replace_range(start..start + end, "[TMP_DIR]"); + } + } + + // Replace dynamic prompt text on lines starting with › + let lines: Vec = normalized + .lines() + .map(|line| { + if line.trim_start().starts_with("› ") + && (line.trim_start().starts_with("› Find and fix a bug") + || line.trim_start().starts_with("› Explain this codebase") + || line.trim_start().starts_with("› Write tests for") + || line.trim_start().starts_with("› Improve documentation") + || line.trim_start().starts_with("› Summarize recent commits") + || line.trim_start().starts_with("› Implement {feature}") + || line.contains("@filename")) + { + "› [DEFAULT_PROMPT]".to_string() + } else { + line.to_string() + } + }) + .collect(); + + lines.join("\n") +} diff --git a/codex-rs/tui-integration-tests/tests/input_handling.rs b/codex-rs/tui-integration-tests/tests/input_handling.rs new file mode 100644 index 000000000..a005a7979 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/input_handling.rs @@ -0,0 +1,69 @@ +use insta::assert_snapshot; +use std::time::Duration; +use tui_integration_tests::{normalize_for_snapshot, Key, TuiSession, TIMEOUT}; + +#[test] +fn test_ctrl_c_clears_input() { + let mut session = TuiSession::spawn(24, 80).unwrap(); + session.wait_for_text("To get started", TIMEOUT).unwrap(); + + // Type some text + session.send_str("draft message").unwrap(); + session.wait_for_text("draft message", TIMEOUT).unwrap(); + + // Ctrl-C should clear + session.send_key(Key::Ctrl('c')).unwrap(); + + // Verify cleared + session + .wait_for(|s| !s.contains("draft message"), TIMEOUT) + .expect("Input was not cleared"); + + assert_snapshot!( + "ctrl_c_clears", + normalize_for_snapshot(session.screen_contents()) + ); +} + +#[test] +fn test_backspace() { + let mut session = TuiSession::spawn(24, 80).unwrap(); + session.wait_for_text("To get started", TIMEOUT).unwrap(); + + session.send_str("Hello").unwrap(); + session.wait_for_text("Hello", TIMEOUT).unwrap(); + + // Backspace twice + session.send_key(Key::Backspace).unwrap(); + session.send_key(Key::Backspace).unwrap(); + + // Should have "Hel" remaining + session.wait_for_text("Hel", TIMEOUT).unwrap(); + session.wait_for(|s| !s.contains("Hello"), TIMEOUT).unwrap(); + + assert_snapshot!( + "typing_and_backspace", + normalize_for_snapshot(session.screen_contents()) + ); +} + +#[test] +fn test_arrows() { + let mut session = TuiSession::spawn(40, 80).unwrap(); + session.wait_for_text("›", TIMEOUT).unwrap(); + + session.send_str("/model").unwrap(); + session.wait_for_text("/model", TIMEOUT).unwrap(); + + session.send_key(Key::Enter).unwrap(); + std::thread::sleep(Duration::from_millis(100)); + session.send_key(Key::Down).unwrap(); + std::thread::sleep(Duration::from_millis(100)); + session.send_key(Key::Down).unwrap(); + std::thread::sleep(Duration::from_millis(100)); + + assert_snapshot!( + "model_changed", + normalize_for_snapshot(session.screen_contents()) + ); +} diff --git a/codex-rs/tui-integration-tests/tests/prompt_flow.rs b/codex-rs/tui-integration-tests/tests/prompt_flow.rs new file mode 100644 index 000000000..b6bf64212 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/prompt_flow.rs @@ -0,0 +1,70 @@ +use insta::assert_snapshot; +use std::time::Duration; +use tui_integration_tests::{Key, SessionConfig, TuiSession}; + +const TIMEOUT: Duration = Duration::from_secs(10); + +#[test] +fn test_submit_prompt_default_response() { + let mut session = TuiSession::spawn(24, 80).expect("Failed to spawn codex"); + + session.wait_for_text("To get started", TIMEOUT).unwrap(); + + // Type prompt + session.send_str("Hello").unwrap(); + std::thread::sleep(Duration::from_millis(100)); + session.wait_for_text("Hello", TIMEOUT).unwrap(); + + // Submit + session.send_key(Key::Enter).unwrap(); + std::thread::sleep(Duration::from_millis(100)); + + // Wait for default mock responses + session + .wait_for_text("Test message 1", TIMEOUT) + .expect("Did not receive mock response"); + session + .wait_for_text("Test message 2", TIMEOUT) + .expect("Did not receive second mock response"); + + assert_snapshot!("prompt_submitted", session.screen_contents()); +} + +#[test] +fn test_submit_prompt_custom_response() { + let config = SessionConfig::new() + .with_mock_response("This is a custom test response from the mock agent."); + + let mut session = TuiSession::spawn_with_config(24, 80, config).expect("Failed to spawn codex"); + + session.wait_for_text("To get started", TIMEOUT).unwrap(); + + session.send_str("test prompt").unwrap(); + std::thread::sleep(Duration::from_millis(100)); + session.send_key(Key::Enter).unwrap(); + std::thread::sleep(Duration::from_millis(100)); + + session + .wait_for_text("This is a custom test response", TIMEOUT) + .expect("Did not receive custom response"); + + assert_snapshot!("custom_response", session.screen_contents()); +} + +#[test] +fn test_multiline_input() { + let mut session = TuiSession::spawn(24, 80).unwrap(); + session.wait_for_text("To get started", TIMEOUT).unwrap(); + + // Type multiline prompt + session.send_str("Line 1").unwrap(); + session.send_key(Key::Enter).unwrap(); + session.send_str("Line 2").unwrap(); + session.send_key(Key::Enter).unwrap(); + session.send_str("Line 3").unwrap(); + + // Verify all lines visible + session.wait_for_text("Line 1", TIMEOUT).unwrap(); + session.wait_for_text("Line 2", TIMEOUT).unwrap(); + session.wait_for_text("Line 3", TIMEOUT).unwrap(); +} diff --git a/codex-rs/tui-integration-tests/tests/snapshots/cancellation__submit_input.snap b/codex-rs/tui-integration-tests/tests/snapshots/cancellation__submit_input.snap new file mode 100644 index 000000000..6da728e17 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/snapshots/cancellation__submit_input.snap @@ -0,0 +1,28 @@ +--- +source: tui-integration-tests/tests/cancellation.rs +expression: normalize_for_snapshot(session.screen_contents()) +--- +│ directory: [TMP_DIR] │ +╰──────────────────────────────────────────────────╯ + + To get started, describe a task or try one of these commands: + + /init - create an AGENTS.md file with instructions for Codex + /status - show current session configuration + /approvals - choose what Codex can do without approval + /model - choose what model and reasoning effort to use + /review - review any changes and find issues + + +› testing!!! + + +■ Missing environment variable: `GOOGLE_API_KEY`. Get your API key from https:// +aistudio.google.com/app/apikey + +• Snapshots disabled: current directory is not a Git repository. (0s • esc to in + + +› [DEFAULT_PROMPT] + + 100% context left · ? for shortcuts diff --git a/codex-rs/tui-integration-tests/tests/snapshots/input_handling__ctrl_c_clears.snap b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__ctrl_c_clears.snap new file mode 100644 index 000000000..59df3bd43 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__ctrl_c_clears.snap @@ -0,0 +1,23 @@ +--- +source: tui-integration-tests/tests/input_handling.rs +expression: normalize_for_snapshot(session.screen_contents()) +--- +╭──────────────────────────────────────────────────╮ +│ >_ OpenAI Codex (v0.0.0) │ +│ │ +│ model: mock-acp-agent low /model to change │ +│ directory: [TMP_DIR] │ +╰──────────────────────────────────────────────────╯ + + To get started, describe a task or try one of these commands: + + /init - create an AGENTS.md file with instructions for Codex + /status - show current session configuration + /approvals - choose what Codex can do without approval + /model - choose what model and reasoning effort to use + /review - review any changes and find issues + + +› [DEFAULT_PROMPT] + + ctrl + c again to quit diff --git a/codex-rs/tui-integration-tests/tests/snapshots/input_handling__model_changed.snap b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__model_changed.snap new file mode 100644 index 000000000..17016c0de --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__model_changed.snap @@ -0,0 +1,7 @@ +--- +source: tui-integration-tests/tests/input_handling.rs +expression: normalize_for_snapshot(session.screen_contents()) +--- +› /model + + /model choose what model and reasoning effort to use diff --git a/codex-rs/tui-integration-tests/tests/snapshots/input_handling__typing_and_backspace.snap b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__typing_and_backspace.snap new file mode 100644 index 000000000..0a28bbe16 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/snapshots/input_handling__typing_and_backspace.snap @@ -0,0 +1,23 @@ +--- +source: tui-integration-tests/tests/input_handling.rs +expression: normalize_for_snapshot(session.screen_contents()) +--- +╭──────────────────────────────────────────────────╮ +│ >_ OpenAI Codex (v0.0.0) │ +│ │ +│ model: mock-acp-agent low /model to change │ +│ directory: [TMP_DIR] │ +╰──────────────────────────────────────────────────╯ + + To get started, describe a task or try one of these commands: + + /init - create an AGENTS.md file with instructions for Codex + /status - show current session configuration + /approvals - choose what Codex can do without approval + /model - choose what model and reasoning effort to use + /review - review any changes and find issues + + +› Hel + + 100% context left diff --git a/codex-rs/tui-integration-tests/tests/snapshots/startup__runs_in_temp_directory.snap b/codex-rs/tui-integration-tests/tests/snapshots/startup__runs_in_temp_directory.snap new file mode 100644 index 000000000..8fc3c5312 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/snapshots/startup__runs_in_temp_directory.snap @@ -0,0 +1,29 @@ +--- +source: tui-integration-tests/tests/startup.rs +assertion_line: 108 +expression: normalize_for_snapshot(session.screen_contents()) +--- +                                       +             _._:=++==+,_              +         _=,/*\+/+\=||=_ _"+_          +       ,|*|+**"^`   `"*`"~=~||+        +      ;*_\*',,_            /*|;|,      +     \^;/'^|\`\\            ".|\\,     +    ~* +`  |*/;||,           '.\||,    +   +^"-*    '\|*/"|_          ! |/|    +   ||_|`     ,//|;|*            "`|    +   |=~'`    ;||^\|".~++++++_+, =" |    +    _~;*  _;+` /* |"|___.:,,,|/,/,|    +    \^_"^ ^\,./`   `^*''* ^*"/,;_/     +     *^, ", `              ,'/*_|      +       ^\,`\+_          _=_+|_+"       +         ^*,\_!*+:;=;;.=*+_,|*         +           `*"*|~~___,_;+*"            +                                       + + Welcome to Codex, OpenAI's command-line coding agent + +> You are running Codex in [TMP_DIR] + + Since this folder is not version controlled, we recommend requiring approval + of all edits and commands. diff --git a/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_screen.snap b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_screen.snap new file mode 100644 index 000000000..b27038950 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_screen.snap @@ -0,0 +1,28 @@ +--- +source: tui-integration-tests/tests/startup.rs +expression: session.screen_contents() +--- +                                       +             _._:=++==+,_              +         _=,/*\+/+\=||=_ _"+_          +       ,|*|+**"^`   `"*`"~=~||+        +      ;*_\*',,_            /*|;|,      +     \^;/'^|\`\\            ".|\\,     +    ~* +`  |*/;||,           '.\||,    +   +^"-*    '\|*/"|_          ! |/|    +   ||_|`     ,//|;|*            "`|    +   |=~'`    ;||^\|".~++++++_+, =" |    +    _~;*  _;+` /* |"|___.:,,,|/,/,|    +    \^_"^ ^\,./`   `^*''* ^*"/,;_/     +     *^, ", `              ,'/*_|      +       ^\,`\+_          _=_+|_+"       +         ^*,\_!*+:;=;;.=*+_,|*         +           `*"*|~~___,_;+*"            +                                       + + Welcome to Codex, OpenAI's command-line coding agent + +> You are running Codex in [TMP_DIR] + + Since this folder is not version controlled, we recommend requiring approval + of all edits and commands. diff --git a/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_shows_welcome.snap b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_shows_welcome.snap new file mode 100644 index 000000000..dda65f12e --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_shows_welcome.snap @@ -0,0 +1,29 @@ +--- +source: tui-integration-tests/tests/startup.rs +assertion_line: 52 +expression: normalize_for_snapshot(session.screen_contents()) +--- +                                       +             _._:=++==+,_              +         _=,/*\+/+\=||=_ _"+_          +       ,|*|+**"^`   `"*`"~=~||+        +      ;*_\*',,_            /*|;|,      +     \^;/'^|\`\\            ".|\\,     +    ~* +`  |*/;||,           '.\||,    +   +^"-*    '\|*/"|_          ! |/|    +   ||_|`     ,//|;|*            "`|    +   |=~'`    ;||^\|".~++++++_+, =" |    +    _~;*  _;+` /* |"|___.:,,,|/,/,|    +    \^_"^ ^\,./`   `^*''* ^*"/,;_/     +     *^, ", `              ,'/*_|      +       ^\,`\+_          _=_+|_+"       +         ^*,\_!*+:;=;;.=*+_,|*         +           `*"*|~~___,_;+*"            +                                       + + Welcome to Codex, OpenAI's command-line coding agent + +> You are running Codex in [TMP_DIR] + + Since this folder is not version controlled, we recommend requiring approval + of all edits and commands. diff --git a/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_welcome_dimensions_40x120.snap b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_welcome_dimensions_40x120.snap new file mode 100644 index 000000000..105059b62 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/snapshots/startup__startup_welcome_dimensions_40x120.snap @@ -0,0 +1,32 @@ +--- +source: tui-integration-tests/tests/startup.rs +expression: normalize_for_snapshot(session.screen_contents()) +--- +                                       +             _._:=++==+,_              +         _=,/*\+/+\=||=_ _"+_          +       ,|*|+**"^`   `"*`"~=~||+        +      ;*_\*',,_            /*|;|,      +     \^;/'^|\`\\            ".|\\,     +    ~* +`  |*/;||,           '.\||,    +   +^"-*    '\|*/"|_          ! |/|    +   ||_|`     ,//|;|*            "`|    +   |=~'`    ;||^\|".~++++++_+, =" |    +    _~;*  _;+` /* |"|___.:,,,|/,/,|    +    \^_"^ ^\,./`   `^*''* ^*"/,;_/     +     *^, ", `              ,'/*_|      +       ^\,`\+_          _=_+|_+"       +         ^*,\_!*+:;=;;.=*+_,|*         +           `*"*|~~___,_;+*"            +                                       + + Welcome to Codex, OpenAI's command-line coding agent + +> You are running Codex in [TMP_DIR] + + Since this folder is not version controlled, we recommend requiring approval of all edits and commands. + + 1. Allow Codex to work in this folder without asking for approval +› 2. Require approval of edits and commands + + Press enter to continue diff --git a/codex-rs/tui-integration-tests/tests/snapshots/startup__trust_screen_skipped.snap b/codex-rs/tui-integration-tests/tests/snapshots/startup__trust_screen_skipped.snap new file mode 100644 index 000000000..cd7301ee1 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/snapshots/startup__trust_screen_skipped.snap @@ -0,0 +1,8 @@ +--- +source: tui-integration-tests/tests/startup.rs +assertion_line: 135 +expression: normalize_for_snapshot(session.screen_contents()) +--- +› [DEFAULT_PROMPT] + + 100% context left · ? for shortcuts diff --git a/codex-rs/tui-integration-tests/tests/snapshots/streaming__submit_input.snap b/codex-rs/tui-integration-tests/tests/snapshots/streaming__submit_input.snap new file mode 100644 index 000000000..0ef557d94 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/snapshots/streaming__submit_input.snap @@ -0,0 +1,28 @@ +--- +source: tui-integration-tests/tests/streaming.rs +expression: normalize_for_snapshot(session.screen_contents()) +--- +│ directory: [TMP_DIR] │ +╰──────────────────────────────────────────────────╯ + + To get started, describe a task or try one of these commands: + + /init - create an AGENTS.md file with instructions for Codex + /status - show current session configuration + /approvals - choose what Codex can do without approval + /model - choose what model and reasoning effort to use + /review - review any changes and find issues + + +› testing!!! + + +■ Missing environment variable: `GOOGLE_API_KEY`. Get your API key from https:// +aistudio.google.com/app/apikey + +• Snapshots disabled: current directory is not a Git repository. (0s • esc to in + + +› [DEFAULT_PROMPT] + + 100% context left · ? for shortcuts diff --git a/codex-rs/tui-integration-tests/tests/startup.rs b/codex-rs/tui-integration-tests/tests/startup.rs new file mode 100644 index 000000000..e17f3539e --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/startup.rs @@ -0,0 +1,159 @@ +use insta::assert_snapshot; +use std::time::{Duration, Instant}; +use tui_integration_tests::{normalize_for_snapshot, SessionConfig, TuiSession, TIMEOUT}; + +#[test] +fn test_startup_shows_welcome() { + let mut session = TuiSession::spawn_with_config( + 24, + 80, + SessionConfig::default() + // Don't include the values that would bypass welcome + .without_approval_policy() + .without_sandbox(), + ) + .expect("Failed to spawn codex"); + + session + .wait_for_text("Welcome", TIMEOUT) + .expect("Prompt did not appear"); + + let contents = session.screen_contents(); + assert!(contents.contains("Welcome to Codex")); + assert!(contents.contains("/tmp/")); + assert_snapshot!( + "startup_shows_welcome", + normalize_for_snapshot(session.screen_contents()) + ); +} + +#[test] +fn test_startup_welcome_with_dimensions() { + let mut session = TuiSession::spawn_with_config( + 40, + 120, + SessionConfig::default() + // Don't include the values that would bypass welcome + .without_approval_policy() + .without_sandbox(), + ) + .expect("Failed to spawn codex"); + + session + .wait_for_text("Welcome", TIMEOUT) + .expect("Prompt did not appear"); + + // Verify terminal size is respected + let contents = session.screen_contents(); + assert!(contents.lines().count() <= 40); + assert_snapshot!( + "startup_welcome_dimensions_40x120", + normalize_for_snapshot(session.screen_contents()) + ); +} + +#[test] +fn test_runs_in_temp_directory_by_default() { + let mut session = TuiSession::spawn_with_config( + 24, + 80, + SessionConfig::default() + // Don't include the values that would bypass welcome + .without_approval_policy() + .without_sandbox(), + ) + .expect("Failed to spawn codex"); + + session + .wait_for_text("Welcome", TIMEOUT) + .expect("Prompt did not appear"); + + let contents = session.screen_contents(); + + // Should run in /tmp/, not home directory + assert!( + contents.contains("/tmp/"), + "Expected session to run in /tmp/, but got: {}", + contents + ); + + // Should NOT run in home directory + assert!( + !contents.contains("/home/"), + "Session should not run in home directory, but got: {}", + contents + ); + assert_snapshot!( + "runs_in_temp_directory", + normalize_for_snapshot(session.screen_contents()) + ); +} + +#[test] +fn test_trust_screen_is_skipped_with_default_config() { + let mut session = TuiSession::spawn(24, 80).expect("Failed to spawn codex"); + + // Wait for the prompt to appear (indicated by the chevron character) + session + .wait_for_text("›", TIMEOUT) + .expect("Prompt did not appear"); + + let contents = session.screen_contents(); + + // Should NOT show the trust directory approval screen + assert!( + !contents.contains("Since this folder is not version controlled"), + "Trust screen should be skipped when approval policy is set, but got: {}", + contents + ); + + // Should show the main prompt directly (skipping onboarding) + assert!( + contents.contains("›") && contents.contains("context left"), + "Should show main prompt with context indicator, got: {}", + contents + ); + assert_snapshot!( + "trust_screen_skipped", + normalize_for_snapshot(session.screen_contents()) + ); +} + +#[test] +fn test_poll_does_not_block_when_no_data() { + // RED phase: This test verifies that poll() returns quickly when no data is available, + // proving the PTY reader is in non-blocking mode + let mut session = TuiSession::spawn(24, 80).expect("Failed to spawn codex"); + + // Wait for initial startup to complete + session + .wait_for_text("›", TIMEOUT) + .expect("Initial startup failed"); + + // Wait for screen to stabilize - keep polling until contents don't change + let mut prev_contents = String::new(); + for _ in 0..20 { + session.poll().expect("Poll failed during stabilization"); + std::thread::sleep(Duration::from_millis(100)); + let contents = session.screen_contents(); + if contents == prev_contents { + // No change for 100ms, screen is stable + break; + } + prev_contents = contents; + } + + // Now codex is truly waiting for input, no more data will come + // Poll should return immediately without blocking + let start = Instant::now(); + session.poll().expect("Poll failed"); + let elapsed = start.elapsed(); + + // Assert poll() completed in < 50ms (proves non-blocking) + // If blocking, would wait indefinitely and this would timeout + assert!( + elapsed < Duration::from_millis(50), + "poll() took {:?}, expected < 50ms. Reader appears to be blocking!", + elapsed + ); +} diff --git a/codex-rs/tui-integration-tests/tests/streaming.rs b/codex-rs/tui-integration-tests/tests/streaming.rs new file mode 100644 index 000000000..cc3162561 --- /dev/null +++ b/codex-rs/tui-integration-tests/tests/streaming.rs @@ -0,0 +1,61 @@ +use insta::assert_snapshot; +use std::time::Duration; +use tui_integration_tests::{normalize_for_snapshot, Key, SessionConfig, TuiSession, TIMEOUT}; + +#[test] +fn test_submit_text() { + let config = SessionConfig::new().with_stream_until_cancel(); + + let mut session = TuiSession::spawn_with_config(24, 80, config).unwrap(); + session.wait_for_text("To get started", TIMEOUT).unwrap(); + + // Submit prompt + session.send_str("testing!!!").unwrap(); + session.wait_for_text("testing!!!", TIMEOUT).unwrap(); + std::thread::sleep(Duration::from_millis(100)); + session.send_key(Key::Enter).unwrap(); + std::thread::sleep(Duration::from_millis(100)); + session.wait_for_text("GOOGLE_API_KEY", TIMEOUT).unwrap(); + + assert_snapshot!( + "submit_input", + normalize_for_snapshot(session.screen_contents()) + ); +} + +// #[test] +// fn test_escape_cancels_streaming() { +// let config = SessionConfig::new().with_stream_until_cancel(); +// +// let mut session = TuiSession::spawn_with_config(24, 80, config).unwrap(); +// session.wait_for_text("To get started", TIMEOUT).unwrap(); +// +// // Submit prompt +// session.send_str("testing!!!").unwrap(); +// session.wait_for_text("testing!!!", TIMEOUT).unwrap(); +// std::thread::sleep(Duration::from_millis(100)); +// session.send_key(Key::Enter).unwrap(); +// std::thread::sleep(Duration::from_millis(100)); +// +// // Wait for streaming to start +// session +// .wait_for_text("Streaming...", TIMEOUT) +// .expect("Streaming did not start"); +// +// // Press Escape to cancel +// session.send_key(Key::Escape).unwrap(); +// +// // Verify cancellation completed +// // (exact behavior depends on TUI implementation) +// session +// .wait_for( +// |s| s.contains("Cancelled") || s.contains("Stopped"), +// TIMEOUT, +// ) +// .ok(); // May not show explicit message +// +// assert_snapshot!( +// "cancelled_stream", +// normalize_for_snapshot(session.screen_contents()) +// ) +// } diff --git a/codex-rs/tui/docs.md b/codex-rs/tui/docs.md index 38ecfb6f6..59c2c6395 100644 --- a/codex-rs/tui/docs.md +++ b/codex-rs/tui/docs.md @@ -99,6 +99,8 @@ The `color.rs` and `terminal_palette.rs` modules handle terminal color detection - `test_backend.rs`: Test terminal backend for snapshot testing - Uses `insta` for snapshot tests of rendered output - `AGENTS.md` documents testing conventions +- Black-box integration tests in `@/codex-rs/tui-integration-tests` test full TUI via PTY +- Integration tests spawn real `codex` binary with `mock-acp-agent` backend **Configuration Flow:** diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index 94b7c5dd6..4bd4b4aaa 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -21,10 +21,10 @@ use codex_common::model_presets::ModelUpgrade; use codex_common::model_presets::all_model_presets; use codex_core::AuthManager; use codex_core::ConversationManager; +use codex_core::GEMINI_ACP_PROVIDER_ID; use codex_core::config::Config; use codex_core::config::edit::ConfigEditsBuilder; use codex_core::model_family::find_family_for_model; -use codex_core::GEMINI_ACP_PROVIDER_ID; use codex_core::protocol::FinalOutput; use codex_core::protocol::SessionSource; use codex_core::protocol::TokenUsage;