feat: add /strands test command for TUI testing via MCP harness

Hweinstock · Hweinstock · commit 0b335d06be22 · 2026-03-28T00:09:03.000Z
- Add tester mode to process-inputs.cjs (routes /strands test)
- Add task-tester.sop.md with TUI testing instructions
- Add tui-test-flows.md with 5 test flows
- Add Node.js setup + build steps for tester mode in workflow
- Wire TUI harness MCP server (stdio) into the Strands agent
diff --git a/.github/agent-sops/task-tester.sop.md b/.github/agent-sops/task-tester.sop.md
@@ -0,0 +1,87 @@
+# Task Tester SOP
+
+## Role
+
+You are a TUI Tester. Your goal is to verify the AgentCore CLI's interactive TUI behavior by driving it through
+predefined test flows using the TUI harness MCP tools. You post results as PR comments.
+
+You MUST NOT modify any code, create branches, or push commits. Your only output is test result comments.
+
+## Tools Available
+
+You have TUI harness MCP tools: `tui_launch`, `tui_send_keys`, `tui_action`, `tui_wait_for`, `tui_screenshot`,
+`tui_read_screen`, `tui_close`, `tui_list_sessions`.
+
+You also have `shell` for setup commands and GitHub tools for posting comments.
+
+## Steps
+
+### 1. Setup
+
+- Read the test spec file at `.github/agent-sops/tui-test-flows.md`
+- The CLI is already built and available. Launch TUI sessions from the repo root using the default command (which runs
+  `node dist/cli/index.mjs`).
+
+### 2. Run Test Flows
+
+For each flow in the test spec:
+
+1. Create any required setup (e.g., temp directories, minimal projects) using `shell`
+2. Use `tui_launch` to start the CLI with the specified arguments and `cwd`
+3. Follow the flow steps: use `tui_action` (preferred — combines send + wait + read in one call) or `tui_wait_for` +
+   `tui_send_keys` for multi-step interactions
+4. Verify each expectation against the screen content
+5. On **pass**: record the flow name as passed
+6. On **failure**: use `tui_screenshot` to capture the terminal state, record the flow name, expected behavior, actual
+   behavior, and the screenshot text
+7. Always `tui_close` the session when done, even on failure
+
+**Constraints:**
+
+- Use `timeoutMs: 10000` (10 seconds) minimum for all `tui_wait_for` and `tui_action` pattern waits
+- Use small terminal dimensions: `cols: 100, rows: 24`
+- If a wait times out, retry once before declaring failure
+- Use text format screenshots only (not SVG)
+- Keep terminal dimensions consistent across all flows
+
+### 3. Post Results
+
+Post a single summary comment on the PR with this format:
+
+```markdown
+## 🧪 TUI Test Results
+
+**X/Y flows passed**
+
+### ✅ Passed
+
+- Flow name 1
+- Flow name 2
+
+### ❌ Failed
+
+#### Flow name 3
+
+**Expected:** description of what should have happened **Actual:** description of what happened
+
+<details>
+<summary>Screenshot</summary>
+```
+
+(terminal screenshot here)
+
+```
+
+</details>
+```
+
+If all flows pass, omit the Failed section.
+
+## Forbidden Actions
+
+- You MUST NOT modify, create, or delete any source files
+- You MUST NOT run git add, git commit, or git push
+- You MUST NOT create or update branches
+- You MUST NOT approve or merge the pull request
+- You MUST NOT run deploy, invoke, or any command that creates AWS resources
+- Your ONLY output is test result comments on the pull request
diff --git a/.github/agent-sops/tui-test-flows.md b/.github/agent-sops/tui-test-flows.md
@@ -0,0 +1,50 @@
+# TUI Test Flows
+
+Each flow describes a user interaction to verify. The tester agent drives these using the TUI harness MCP tools.
+
+---
+
+## Flow: Help text lists all subcommands
+
+1. Launch: `agentcore --help` (use `tui_launch` with args `["--help"]`)
+2. Wait for: "Usage:" on screen
+3. Expect all of these subcommands visible: `create`, `deploy`, `invoke`, `status`, `logs`, `add`, `remove`
+4. Close session
+
+---
+
+## Flow: Create wizard prompts for project name
+
+1. Launch: `agentcore create` (no flags, in a temp directory)
+2. Wait for: a prompt asking for the project name (look for "name" or "project")
+3. Expect: an input field or prompt is visible
+4. Close session (Ctrl+C)
+
+---
+
+## Flow: Create with --json produces valid JSON
+
+1. In a temp directory, run via shell:
+   `agentcore create --name TestProj --language Python --framework Strands --model-provider Bedrock --memory none --json`
+2. Expect: stdout contains valid JSON with `"success": true` and `"projectPath"`
+3. Verify the project directory was created
+
+---
+
+## Flow: Add agent shows framework selection
+
+1. First create a project via shell: `agentcore create --name AgentTest --no-agent --json` (in a temp directory)
+2. Launch: `agentcore add agent` in the created project directory
+3. Wait for: agent name prompt
+4. Type a name, press Enter
+5. Wait for: framework or language selection to appear
+6. Expect: at least "Strands" and "LangChain_LangGraph" visible as options
+7. Close session (Ctrl+C)
+
+---
+
+## Flow: Invalid project name shows error
+
+1. In a temp directory, run via shell:
+   `agentcore create --name "123invalid" --language Python --framework Strands --model-provider Bedrock --memory none --json`
+2. Expect: exit code is non-zero OR output contains an error about the project name (must start with a letter)
diff --git a/.github/scripts/javascript/process-inputs.cjs b/.github/scripts/javascript/process-inputs.cjs
@@ -78,6 +78,7 @@ function buildPrompts(mode, issueId, isPullRequest, command, branchName, inputs)
     implementer: '.github/agent-sops/task-implementer.sop.md',
     reviewer: '.github/agent-sops/task-reviewer.sop.md',
     refiner: '.github/agent-sops/task-refiner.sop.md',
+    tester: '.github/agent-sops/task-tester.sop.md',
   };
   const scriptFile = sopFiles[mode] || sopFiles.refiner;
 
@@ -94,11 +95,13 @@ module.exports = async (context, github, core, inputs) => {
     const { issueId, command, issue } = await getIssueInfo(github, context, inputs);
 
     const isPullRequest = !!issue.data.pull_request;
-    const mode = command.startsWith('review')
-      ? 'reviewer'
-      : isPullRequest || command.startsWith('implement')
-        ? 'implementer'
-        : 'refiner';
+    const mode = command.startsWith('test')
+      ? 'tester'
+      : command.startsWith('review')
+        ? 'reviewer'
+        : isPullRequest || command.startsWith('implement')
+          ? 'implementer'
+          : 'refiner';
     console.log(`Is PR: ${isPullRequest}, Mode: ${mode}`);
 
     const branchName = await determineBranch(github, context, issueId, mode, isPullRequest);
@@ -113,6 +116,7 @@ module.exports = async (context, github, core, inputs) => {
     core.setOutput('session_id', sessionId);
     core.setOutput('system_prompt', systemPrompt);
     core.setOutput('prompt', prompt);
+    core.setOutput('mode', mode);
   } catch (error) {
     const errorMsg = `Failed: ${error.message}`;
     console.error(errorMsg);
diff --git a/.github/workflows/strands-command.yml b/.github/workflows/strands-command.yml
@@ -94,6 +94,20 @@ jobs:
             };
             await processInputs(context, github, core, inputs);
 
+      - name: Setup Node.js (tester mode)
+        if: steps.process-inputs.outputs.mode == 'tester'
+        uses: actions/setup-node@v6
+        with:
+          node-version: 20.x
+          cache: 'npm'
+
+      - name: Build CLI and TUI harness (tester mode)
+        if: steps.process-inputs.outputs.mode == 'tester'
+        run: |
+          npm ci
+          npm run build
+          npm run build:harness
+
       - name: Run Strands Agent
         uses: ./.github/actions/strands-action
         with:
@@ -102,6 +116,9 @@ jobs:
           provider: 'bedrock'
           model: 'us.anthropic.claude-sonnet-4-5-20250929-v1:0'
           tools: 'strands_tools:shell,retrieve'
+          mcp_servers:
+            ${{ steps.process-inputs.outputs.mode == 'tester' &&
+            '{"mcpServers":{"tui-harness":{"command":"node","args":["dist/mcp-harness/index.mjs"]}}}' || '' }}
           aws_role_arn: ${{ secrets.AWS_ROLE_ARN }}
           aws_region: 'us-west-2'
           pat_token: ${{ secrets.GITHUB_TOKEN }}