Skip to content
50 changes: 46 additions & 4 deletions lib/screentracker/pty_conversation.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package screentracker
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"os"
Expand All @@ -16,6 +17,24 @@ import (
"golang.org/x/xerrors"
)

const (
// writeStabilizeEchoTimeout is the maximum time to wait for
// the screen to change after writing message text to the PTY
// (echo detection). This is intentionally short: terminal
// echo is near-instant when it occurs. Non-echoing agents
// (e.g. TUI agents using bracketed paste) will hit this
// timeout, which is non-fatal — see the Phase 1 error
// handler in writeStabilize. Move to PTYConversationConfig
// if agents need different echo detection windows.
writeStabilizeEchoTimeout = 2 * time.Second
Comment thread
johnstcn marked this conversation as resolved.
Outdated

// writeStabilizeProcessTimeout is the maximum time to wait
// for the screen to change after sending a carriage return.
// This detects whether the agent is actually processing the
// input.
writeStabilizeProcessTimeout = 15 * time.Second
)

// A screenSnapshot represents a snapshot of the PTY at a specific time.
type screenSnapshot struct {
timestamp time.Time
Expand Down Expand Up @@ -411,7 +430,19 @@ func (c *PTYConversation) sendMessage(ctx context.Context, messageParts ...Messa
return nil
}

// writeStabilize writes messageParts to the screen and waits for the screen to stabilize after the message is written.
// writeStabilize writes messageParts to the PTY and waits for
// the agent to process them. It operates in two phases:
//
// Phase 1 (echo detection): writes the message text and waits
// for the screen to change and stabilize. This detects agents
// that echo typed input. If the screen doesn't change within
// writeStabilizeEchoTimeout, this is non-fatal — many TUI
// agents buffer bracketed-paste input without rendering it.
//
// Phase 2 (processing detection): writes a carriage return
// and waits for the screen to change, indicating the agent
// started processing. This phase is fatal on timeout — if the
// agent doesn't react to Enter, it's unresponsive.
func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...MessagePart) error {
screenBeforeMessage := c.cfg.AgentIO.ReadScreen()
for _, part := range messageParts {
Expand All @@ -421,7 +452,7 @@ func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...Me
}
// wait for the screen to stabilize after the message is written
Comment thread
johnstcn marked this conversation as resolved.
Outdated
if err := util.WaitFor(ctx, util.WaitTimeout{
Timeout: 15 * time.Second,
Timeout: writeStabilizeEchoTimeout,
MinInterval: 50 * time.Millisecond,
InitialWait: true,
Clock: c.cfg.Clock,
Expand All @@ -438,14 +469,25 @@ func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...Me
}
return false, nil
}); err != nil {
return xerrors.Errorf("failed to wait for screen to stabilize: %w", err)
if !errors.Is(err, util.WaitTimedOut) {
Comment thread
johnstcn marked this conversation as resolved.
Comment thread
johnstcn marked this conversation as resolved.
// Context cancellation or condition errors are fatal.
return xerrors.Errorf("failed to wait for screen to stabilize: %w", err)
Comment thread
johnstcn marked this conversation as resolved.
}
// Phase 1 timeout is non-fatal: the agent may not echo
// input (e.g. TUI agents buffer bracketed-paste content
// internally). Proceed to Phase 2 to send the carriage
// return.
c.cfg.Logger.Info(
Comment thread
johnstcn marked this conversation as resolved.
"screen did not stabilize after writing message, proceeding to send carriage return",
"timeout", writeStabilizeEchoTimeout,
)
Comment thread
johnstcn marked this conversation as resolved.
}

// wait for the screen to change after the carriage return is written
screenBeforeCarriageReturn := c.cfg.AgentIO.ReadScreen()
Comment thread
johnstcn marked this conversation as resolved.
lastCarriageReturnTime := time.Time{}
if err := util.WaitFor(ctx, util.WaitTimeout{
Timeout: 15 * time.Second,
Timeout: writeStabilizeProcessTimeout,
MinInterval: 25 * time.Millisecond,
Clock: c.cfg.Clock,
}, func() (bool, error) {
Expand Down
137 changes: 136 additions & 1 deletion lib/screentracker/pty_conversation_test.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package screentracker_test

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"sync"
"sync/atomic"
"testing"
"time"

Expand Down Expand Up @@ -447,7 +449,140 @@ func TestMessages(t *testing.T) {
c, _, _ := newConversation(context.Background(), t)
assert.ErrorIs(t, c.Send(st.MessagePartText{Content: ""}), st.ErrMessageValidationEmpty)
})
}

t.Run("send-no-echo-agent-reacts", func(t *testing.T) {
Comment thread
johnstcn marked this conversation as resolved.
Outdated
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
t.Cleanup(cancel)

// Given: an agent that doesn't echo typed input but
// reacts to carriage return by updating the screen.
agent := &testAgent{screen: "prompt"}
agent.onWrite = func(data []byte) {
if string(data) == "\r" {
agent.screen = "processing..."
}
}
mClock := quartz.NewMock(t)
mClock.Set(time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC))
cfg := st.PTYConversationConfig{
Clock: mClock,
AgentIO: agent,
SnapshotInterval: interval,
ScreenStabilityLength: 200 * time.Millisecond,
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
}
c := st.NewPTY(ctx, cfg, &testEmitter{})
c.Start(ctx)
advanceFor(ctx, t, mClock, interval*threshold)

// When: a message is sent. Phase 1 times out (no echo),
// Phase 2 writes \r and the agent reacts.
sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "hello"})

// Then: Send succeeds and the user message is recorded.
msgs := c.Messages()
require.True(t, len(msgs) >= 2)
var foundUserMsg bool
for _, msg := range msgs {
if msg.Role == st.ConversationRoleUser && msg.Message == "hello" {
foundUserMsg = true
break
}
}
assert.True(t, foundUserMsg, "expected user message 'hello' in conversation")
})
t.Run("send-no-echo-no-react", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
t.Cleanup(cancel)

// Given: an agent that is completely unresponsive — it
// neither echoes input nor reacts to carriage return.
agent := &testAgent{screen: "prompt"}
agent.onWrite = func(data []byte) {}
mClock := quartz.NewMock(t)
mClock.Set(time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC))
cfg := st.PTYConversationConfig{
Clock: mClock,
AgentIO: agent,
SnapshotInterval: interval,
ScreenStabilityLength: 200 * time.Millisecond,
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
}
c := st.NewPTY(ctx, cfg, &testEmitter{})
c.Start(ctx)
advanceFor(ctx, t, mClock, interval*threshold)

// When: a message is sent. Both Phase 1 (echo) and
// Phase 2 (processing) time out.
// Note: can't use sendAndAdvance here because it calls
// require.NoError internally.
var sendErr error
var sendDone atomic.Bool
go func() {
sendErr = c.Send(st.MessagePartText{Content: "hello"})
sendDone.Store(true)
}()
advanceUntil(ctx, t, mClock, func() bool { return sendDone.Load() })

// Then: Send fails with a Phase 2 error (not Phase 1).
require.Error(t, sendErr)
assert.Contains(t, sendErr.Error(), "failed to wait for processing to start")
})
t.Run("send-tui-selection-esc-cancels", func(t *testing.T) {
// Documents a known limitation: when a TUI agent shows a
// selection prompt, sending a user message wraps it in
// bracketed paste. The ESC (\x1b) in the paste-start
// sequence cancels the selection widget. The user's
// intended choice never reaches the selection handler.
// For selection prompts, callers should use
// MessageTypeRaw to send raw keystrokes directly.
//
// See lib/httpapi/claude.go formatClaudeCodeMessage for
// the full format; this test focuses on the ESC
// invariant only.
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
t.Cleanup(cancel)

// Given: a TUI agent showing a selection prompt where
// ESC cancels the selection and changes the screen.
agent := &testAgent{screen: "selection prompt"}
selectionCancelled := false
agent.onWrite = func(data []byte) {
if bytes.Contains(data, []byte("\x1b")) {
selectionCancelled = true
agent.screen = "selection cancelled"
} else if string(data) == "\r" {
agent.screen = "post-cancel"
}
}
mClock := quartz.NewMock(t)
mClock.Set(time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC))
cfg := st.PTYConversationConfig{
Clock: mClock,
AgentIO: agent,
SnapshotInterval: interval,
ScreenStabilityLength: 200 * time.Millisecond,
Logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
}
c := st.NewPTY(ctx, cfg, &testEmitter{})
c.Start(ctx)
advanceFor(ctx, t, mClock, interval*threshold)

// When: a message is sent using bracketed paste, which
// contains ESC in the start sequence (\x1b[200~).
sendAndAdvance(ctx, t, c, mClock,
st.MessagePartText{Content: "\x1b[200~", Hidden: true},
st.MessagePartText{Content: "2"},
st.MessagePartText{Content: "\x1b[201~", Hidden: true},
)

// Then: Send succeeds, but the selection was cancelled
// by ESC — option "2" was never delivered to the
// selection handler.
assert.True(t, selectionCancelled,
"ESC in bracketed paste cancels TUI selection prompts; "+
"use MessageTypeRaw for selection prompts instead")
})}
Comment thread
johnstcn marked this conversation as resolved.
Outdated
Comment thread
johnstcn marked this conversation as resolved.
Outdated

func TestStatePersistence(t *testing.T) {
t.Run("SaveState creates file with correct structure", func(t *testing.T) {
Expand Down
Loading