diff --git a/.changeset/fix-anthropic-cua-triple-click.md b/.changeset/fix-anthropic-cua-triple-click.md new file mode 100644 index 0000000000..6f2d57fe6f --- /dev/null +++ b/.changeset/fix-anthropic-cua-triple-click.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Fix Anthropic CUA `triple_click` action mapping. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6f74bdd3f2..6891afed40 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -335,6 +335,24 @@ jobs: - name: Run CLI Tests run: pnpm exec turbo run test:cli --filter=@browserbasehq/browse-cli + run-evals-unit-tests: + name: Evals Unit Tests + runs-on: ubuntu-latest + needs: [run-build, determine-changes] + if: needs.determine-changes.outputs.evals == 'true' + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 1 + + - uses: ./.github/actions/setup-node-pnpm-turbo + with: + use-prebuilt-artifacts: "true" + restore-turbo-cache: "false" + + - name: Run Evals Unit Tests + run: pnpm --filter @browserbasehq/stagehand-evals run test:unit + discover-core-tests: runs-on: ubuntu-latest needs: [determine-changes] diff --git a/packages/core/lib/v3/agent/AnthropicCUAClient.ts b/packages/core/lib/v3/agent/AnthropicCUAClient.ts index 4bbe2d47bc..752d208e22 100644 --- a/packages/core/lib/v3/agent/AnthropicCUAClient.ts +++ b/packages/core/lib/v3/agent/AnthropicCUAClient.ts @@ -901,6 +901,17 @@ export class AnthropicCUAClient extends AgentClient { (input.coordinate ? (input.coordinate as number[])[1] : 0), ...input, }; + } else if (action === "triple_click" || action === "tripleClick") { + return { + type: "tripleClick", + x: + (input.x as number) || + (input.coordinate ? (input.coordinate as number[])[0] : 0), + y: + (input.y as number) || + (input.coordinate ? (input.coordinate as number[])[1] : 0), + ...input, + }; } else if (action === "scroll") { // Convert Anthropic's coordinate, scroll_amount and scroll_direction into scroll_x and scroll_y const x = diff --git a/packages/core/lib/v3/handlers/v3CuaAgentHandler.ts b/packages/core/lib/v3/handlers/v3CuaAgentHandler.ts index 6cefa4b4dd..af3a3dad87 100644 --- a/packages/core/lib/v3/handlers/v3CuaAgentHandler.ts +++ b/packages/core/lib/v3/handlers/v3CuaAgentHandler.ts @@ -324,6 +324,7 @@ export class V3CuaAgentHandler { } return { success: true }; } + case "triple_click": case "tripleClick": { const { x, y } = action; if (recording) { diff --git a/packages/core/tests/unit/anthropic-cua-triple-click.test.ts b/packages/core/tests/unit/anthropic-cua-triple-click.test.ts new file mode 100644 index 0000000000..fe07561da6 --- /dev/null +++ b/packages/core/tests/unit/anthropic-cua-triple-click.test.ts @@ -0,0 +1,104 @@ +import { describe, expect, it, vi, beforeEach } from "vitest"; +import { AnthropicCUAClient } from "../../lib/v3/agent/AnthropicCUAClient.js"; +import Anthropic from "@anthropic-ai/sdk"; + +vi.mock("@anthropic-ai/sdk", () => { + const mockCreate = vi.fn(); + + return { + default: class MockAnthropic { + beta = { + messages: { + create: mockCreate, + }, + }; + }, + }; +}); + +describe("AnthropicCUAClient triple_click handling", () => { + let mockCreate: ReturnType; + let client: AnthropicCUAClient; + let executedActions: Array>; + + beforeEach(() => { + vi.clearAllMocks(); + const anthropic = new Anthropic({ apiKey: "test" }); + mockCreate = anthropic.beta.messages.create as ReturnType; + + client = new AnthropicCUAClient( + "anthropic", + "claude-sonnet-4-5-20250929", + undefined, + { + apiKey: "test-key", + }, + ); + client.setViewport(1280, 720); + client.setScreenshotProvider(async () => "fake-base64-screenshot"); + + executedActions = []; + client.setActionHandler(async (action) => { + executedActions.push({ ...action }); + }); + }); + + it("should convert triple_click with coordinate array to tripleClick action", async () => { + mockCreate.mockResolvedValue({ + id: "test-id", + content: [ + { + type: "tool_use", + id: "tool-1", + name: "computer", + input: { + action: "triple_click", + coordinate: [640, 360], + }, + }, + ], + usage: { input_tokens: 10, output_tokens: 20 }, + }); + + const logger = vi.fn(); + await client.executeStep( + [{ role: "user", content: "triple click the paragraph" }], + logger, + ); + + expect(executedActions).toHaveLength(1); + expect(executedActions[0].type).toBe("tripleClick"); + expect(executedActions[0].x).toBe(640); + expect(executedActions[0].y).toBe(360); + }); + + it("should convert triple_click with x/y fields to tripleClick action", async () => { + mockCreate.mockResolvedValue({ + id: "test-id", + content: [ + { + type: "tool_use", + id: "tool-2", + name: "computer", + input: { + action: "triple_click", + x: 100, + y: 200, + }, + }, + ], + usage: { input_tokens: 10, output_tokens: 20 }, + }); + + const logger = vi.fn(); + await client.executeStep( + [{ role: "user", content: "triple click the line" }], + logger, + ); + + expect(executedActions).toHaveLength(1); + expect(executedActions[0].type).toBe("tripleClick"); + expect(executedActions[0].x).toBe(100); + expect(executedActions[0].y).toBe(200); + }); +});