Skip to content
5 changes: 5 additions & 0 deletions lib/httpapi/claude.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ import (
st "github.com/coder/agentapi/lib/screentracker"
)

// formatPaste wraps message in bracketed paste escape sequences.
// These sequences start with ESC (\x1b), which TUI selection
// widgets (e.g. Claude Code's numbered-choice prompt) interpret
// as "cancel". For selection prompts, callers should use
// MessageTypeRaw to send raw keystrokes directly instead.
Comment thread
johnstcn marked this conversation as resolved.
func formatPaste(message string) []st.MessagePart {
return []st.MessagePart{
// Bracketed paste mode start sequence
Expand Down
52 changes: 48 additions & 4 deletions lib/screentracker/pty_conversation.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package screentracker
import (
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"os"
Expand All @@ -16,6 +17,26 @@ import (
"golang.org/x/xerrors"
)

const (
// writeStabilizeEchoTimeout is the timeout for the echo
// detection WaitFor loop in writeStabilize Phase 1. The
// effective ceiling may be slightly longer because the 1s
Comment thread
johnstcn marked this conversation as resolved.
Outdated
// stability check inside the condition runs outside
// WaitFor's timeout select. Non-echoing agents (e.g. TUI
// agents using bracketed paste) will hit this timeout,
// which is non-fatal.
//
// TODO: move to PTYConversationConfig if agents need
// different echo detection windows.
writeStabilizeEchoTimeout = 2 * time.Second

// writeStabilizeProcessTimeout is the maximum time to wait
// for the screen to change after sending a carriage return.
// This detects whether the agent is actually processing the
// input.
writeStabilizeProcessTimeout = 15 * time.Second
)

// A screenSnapshot represents a snapshot of the PTY at a specific time.
type screenSnapshot struct {
timestamp time.Time
Expand Down Expand Up @@ -411,7 +432,19 @@ func (c *PTYConversation) sendMessage(ctx context.Context, messageParts ...Messa
return nil
}

// writeStabilize writes messageParts to the screen and waits for the screen to stabilize after the message is written.
// writeStabilize writes messageParts to the PTY and waits for
// the agent to process them. It operates in two phases:
//
// Phase 1 (echo detection): writes the message text and waits
// for the screen to change and stabilize. This detects agents
// that echo typed input. If the screen doesn't change within
// writeStabilizeEchoTimeout, this is non-fatal — many TUI
// agents buffer bracketed-paste input without rendering it.
//
// Phase 2 (processing detection): writes a carriage return
// and waits for the screen to change, indicating the agent
// started processing. This phase is fatal on timeout — if the
// agent doesn't react to Enter, it's unresponsive.
func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...MessagePart) error {
screenBeforeMessage := c.cfg.AgentIO.ReadScreen()
for _, part := range messageParts {
Expand All @@ -421,7 +454,7 @@ func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...Me
}
// wait for the screen to stabilize after the message is written
Comment thread
johnstcn marked this conversation as resolved.
Outdated
if err := util.WaitFor(ctx, util.WaitTimeout{
Timeout: 15 * time.Second,
Timeout: writeStabilizeEchoTimeout,
MinInterval: 50 * time.Millisecond,
InitialWait: true,
Clock: c.cfg.Clock,
Expand All @@ -438,14 +471,25 @@ func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...Me
}
return false, nil
}); err != nil {
return xerrors.Errorf("failed to wait for screen to stabilize: %w", err)
if !errors.Is(err, util.WaitTimedOut) {
Comment thread
johnstcn marked this conversation as resolved.
Comment thread
johnstcn marked this conversation as resolved.
// Context cancellation or condition errors are fatal.
return xerrors.Errorf("failed to wait for screen to stabilize: %w", err)
Comment thread
johnstcn marked this conversation as resolved.
}
// Phase 1 timeout is non-fatal: the agent may not echo
// input (e.g. TUI agents buffer bracketed-paste content
// internally). Proceed to Phase 2 to send the carriage
// return.
c.cfg.Logger.Info(
Comment thread
johnstcn marked this conversation as resolved.
"echo detection timed out, sending carriage return",
"timeout", writeStabilizeEchoTimeout,
)
Comment thread
johnstcn marked this conversation as resolved.
}

// wait for the screen to change after the carriage return is written
screenBeforeCarriageReturn := c.cfg.AgentIO.ReadScreen()
Comment thread
johnstcn marked this conversation as resolved.
lastCarriageReturnTime := time.Time{}
if err := util.WaitFor(ctx, util.WaitTimeout{
Timeout: 15 * time.Second,
Timeout: writeStabilizeProcessTimeout,
MinInterval: 25 * time.Millisecond,
Clock: c.cfg.Clock,
}, func() (bool, error) {
Expand Down
125 changes: 125 additions & 0 deletions lib/screentracker/pty_conversation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"log/slog"
"os"
"sync"
"sync/atomic"
"testing"
"time"

Expand Down Expand Up @@ -447,6 +448,130 @@ func TestMessages(t *testing.T) {
c, _, _ := newConversation(context.Background(), t)
assert.ErrorIs(t, c.Send(st.MessagePartText{Content: ""}), st.ErrMessageValidationEmpty)
})

t.Run("send-message-no-echo-agent-reacts", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
t.Cleanup(cancel)

// Given: an agent that doesn't echo typed input but
// reacts to carriage return by updating the screen.
c, _, mClock := newConversation(ctx, t, func(cfg *st.PTYConversationConfig) {
a := &testAgent{screen: "prompt"}
a.onWrite = func(data []byte) {
if string(data) == "\r" {
a.screen = "processing..."
}
}
cfg.AgentIO = a
})
advanceFor(ctx, t, mClock, interval*threshold)

// When: a message is sent. Phase 1 times out (no echo),
// Phase 2 writes \r and the agent reacts.
sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "hello"})

// Then: Send succeeds and the user message is recorded.
msgs := c.Messages()
require.True(t, len(msgs) >= 2)
var foundUserMsg bool
Comment thread
johnstcn marked this conversation as resolved.
Outdated
for _, msg := range msgs {
if msg.Role == st.ConversationRoleUser && msg.Message == "hello" {
foundUserMsg = true
break
}
}
assert.True(t, foundUserMsg, "expected user message 'hello' in conversation")
})

t.Run("send-message-no-echo-no-react", func(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
t.Cleanup(cancel)

// Given: an agent that is completely unresponsive — it
// neither echoes input nor reacts to carriage return.
c, _, mClock := newConversation(ctx, t, func(cfg *st.PTYConversationConfig) {
a := &testAgent{screen: "prompt"}
a.onWrite = func(data []byte) {}
cfg.AgentIO = a
})
advanceFor(ctx, t, mClock, interval*threshold)

// When: a message is sent. Both Phase 1 (echo) and
// Phase 2 (processing) time out.
// Note: can't use sendAndAdvance here because it calls
// require.NoError internally.
var sendErr error
var sendDone atomic.Bool
go func() {
sendErr = c.Send(st.MessagePartText{Content: "hello"})
sendDone.Store(true)
}()
advanceUntil(ctx, t, mClock, func() bool { return sendDone.Load() })

// Then: Send fails with a Phase 2 error (not Phase 1).
require.Error(t, sendErr)
assert.Contains(t, sendErr.Error(), "failed to wait for processing to start")
})

t.Run("send-message-no-echo-context-cancelled", func(t *testing.T) {
// Given: a non-echoing agent and a cancellable context.
// The onWrite signals when writeStabilize starts writing
// message parts — this is used to synchronize the cancel.
sendCtx, sendCancel := context.WithCancel(context.Background())
t.Cleanup(sendCancel)

writeStarted := make(chan struct{}, 1)
c, _, mClock := newConversation(sendCtx, t, func(cfg *st.PTYConversationConfig) {
a := &testAgent{screen: "prompt"}
a.onWrite = func(data []byte) {
select {
case writeStarted <- struct{}{}:
default:
}
}
cfg.AgentIO = a
})
advanceFor(sendCtx, t, mClock, interval*threshold)

// When: a message is sent and the context is cancelled
// during Phase 1 (after the message is written to the
// PTY, before echo detection completes).
var sendErr error
var sendDone atomic.Bool
go func() {
sendErr = c.Send(st.MessagePartText{Content: "hello"})
sendDone.Store(true)
}()

// Advance tick-by-tick until writeStabilize starts
// (onWrite fires). This gives the send loop goroutine
// scheduling time between advances.
advanceUntil(sendCtx, t, mClock, func() bool {
select {
case <-writeStarted:
return true
default:
return false
}
})

// writeStabilize Phase 1 is now running. Its WaitFor is
// blocked on a mock timer sleep select. Cancel: the
// select sees ctx.Done() immediately.
sendCancel()

// WaitFor returns ctx.Err(). The errors.Is guard in
// Phase 1 propagates it as fatal. Use Eventually since
// the goroutine needs scheduling time.
require.Eventually(t, sendDone.Load, 5*time.Second, 10*time.Millisecond)

// Then: the error wraps context.Canceled, not a Phase 2
// timeout. This validates the errors.Is(WaitTimedOut)
// guard.
require.Error(t, sendErr)
assert.ErrorIs(t, sendErr, context.Canceled)
assert.NotContains(t, sendErr.Error(), "failed to wait for processing to start")
})
}

func TestStatePersistence(t *testing.T) {
Expand Down
Loading