test(e2e): add continuation re-execution tool call arguments tests

DiegoGBrisa · claude · DiegoGBrisa · commit 8974a32a6903 · 2026-04-10T20:43:53.000+02:00
Add 3 E2E tests verifying tool call arguments are preserved during
continuation re-executions. Without TOOL_CALL_START/ARGS emission,
clients store tool calls with empty {} arguments.

Tests:
- Single client tool args preserved after continuation
- Sequential client tool args preserved across multiple continuations
- Parallel client tool args preserved in batch continuation

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/testing/e2e/tests/tools-test/continuation-args.spec.ts b/testing/e2e/tests/tools-test/continuation-args.spec.ts
@@ -0,0 +1,220 @@
+import { test, expect } from '../fixtures'
+import {
+  selectScenario,
+  runTest,
+  waitForTestComplete,
+  getMetadata,
+  getEventLog,
+  getToolCalls,
+} from './helpers'
+
+/**
+ * Continuation Re-execution — Tool Call Arguments E2E Tests
+ *
+ * These tests verify that tool call arguments are correctly preserved during
+ * continuation re-executions. When a client tool completes and the conversation
+ * continues, the server re-processes message history containing pending tool
+ * calls. Without the fix (emitting TOOL_CALL_START + TOOL_CALL_ARGS before
+ * TOOL_CALL_END), tool-call parts would arrive at the client with empty
+ * arguments {}, potentially causing infinite re-execution loops.
+ *
+ * These tests validate the end-to-end observable effect: after client tool
+ * continuation completes, every tool-call part in the final messages must
+ * have its original arguments — not empty {}.
+ */
+
+interface ToolCallPart {
+  type: 'tool-call'
+  id: string
+  name: string
+  arguments: Record<string, unknown> | string
+  state?: string
+  output?: unknown
+}
+
+interface Message {
+  id: string
+  role: string
+  parts: Array<{ type: string; [key: string]: unknown }>
+}
+
+/**
+ * Parse messages from the #messages-json-content element.
+ */
+async function getMessages(page: import('@playwright/test').Page) {
+  const messagesJson = await page
+    .locator('#messages-json-content')
+    .textContent()
+  return JSON.parse(messagesJson || '[]') as Array<Message>
+}
+
+/**
+ * Extract all tool-call parts from messages.
+ */
+function extractToolCallParts(messages: Array<Message>): Array<ToolCallPart> {
+  return messages.flatMap((msg) =>
+    msg.parts.filter((p): p is ToolCallPart => p.type === 'tool-call'),
+  )
+}
+
+/**
+ * Normalize tool call arguments to a parsed object for comparison.
+ */
+function parseArgs(args: Record<string, unknown> | string): unknown {
+  if (typeof args === 'string') {
+    return JSON.parse(args)
+  }
+  return args
+}
+
+test.describe('Continuation Re-execution — Tool Call Arguments', () => {
+  test('single client tool arguments preserved after continuation', async ({
+    page,
+    testId,
+    aimockPort,
+  }) => {
+    await selectScenario(page, 'client-tool-single', testId, aimockPort)
+    await runTest(page)
+    await waitForTestComplete(page)
+
+    // Verify completion
+    const metadata = await getMetadata(page)
+    expect(metadata.testComplete).toBe('true')
+    expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(1)
+
+    // Parse messages and find tool-call parts
+    const messages = await getMessages(page)
+    const toolCallParts = extractToolCallParts(messages)
+
+    expect(toolCallParts.length).toBeGreaterThanOrEqual(1)
+
+    // The show_notification tool call must have its original arguments
+    const notificationCall = toolCallParts.find(
+      (tc) => tc.name === 'show_notification',
+    )
+    expect(notificationCall).toBeDefined()
+
+    const args = parseArgs(notificationCall!.arguments)
+    expect(args).toEqual({
+      message: 'Hello from the AI!',
+      type: 'info',
+    })
+  })
+
+  test('sequential client tool arguments preserved across multiple continuations', async ({
+    page,
+    testId,
+    aimockPort,
+  }) => {
+    await selectScenario(page, 'sequential-client-tools', testId, aimockPort)
+    await runTest(page)
+    await waitForTestComplete(page, 15000, 2)
+
+    // Wait for execution events to propagate
+    await page.waitForFunction(
+      () => {
+        const el = document.querySelector('#test-metadata')
+        return (
+          parseInt(el?.getAttribute('data-execution-complete-count') || '0') >=
+          2
+        )
+      },
+      { timeout: 10000 },
+    )
+
+    const metadata = await getMetadata(page)
+    expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(2)
+
+    // Parse messages and find all show_notification tool calls
+    const messages = await getMessages(page)
+    const toolCallParts = extractToolCallParts(messages).filter(
+      (tc) => tc.name === 'show_notification',
+    )
+
+    expect(toolCallParts.length).toBeGreaterThanOrEqual(2)
+
+    // Collect the arguments from all notification tool calls
+    const allArgs = toolCallParts.map((tc) => parseArgs(tc.arguments))
+
+    // Both sets of arguments must be present (order may vary)
+    expect(allArgs).toContainEqual({
+      message: 'First notification',
+      type: 'info',
+    })
+    expect(allArgs).toContainEqual({
+      message: 'Second notification',
+      type: 'warning',
+    })
+
+    // No tool call should have empty arguments
+    for (const tc of toolCallParts) {
+      const parsed = parseArgs(tc.arguments)
+      expect(parsed).not.toEqual({})
+    }
+  })
+
+  test('parallel client tool arguments preserved in batch continuation', async ({
+    page,
+    testId,
+    aimockPort,
+  }) => {
+    await selectScenario(page, 'parallel-client-tools', testId, aimockPort)
+    await runTest(page)
+    await waitForTestComplete(page, 15000, 2)
+
+    const metadata = await getMetadata(page)
+    expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(2)
+
+    // Parse messages and find tool-call parts
+    const messages = await getMessages(page)
+    const toolCallParts = extractToolCallParts(messages)
+
+    expect(toolCallParts.length).toBeGreaterThanOrEqual(2)
+
+    // Find each tool by name
+    const notificationCall = toolCallParts.find(
+      (tc) => tc.name === 'show_notification',
+    )
+    const chartCall = toolCallParts.find((tc) => tc.name === 'display_chart')
+
+    expect(notificationCall).toBeDefined()
+    expect(chartCall).toBeDefined()
+
+    // Verify show_notification arguments
+    expect(parseArgs(notificationCall!.arguments)).toEqual({
+      message: 'Parallel 1',
+      type: 'info',
+    })
+
+    // Verify display_chart arguments
+    expect(parseArgs(chartCall!.arguments)).toEqual({
+      type: 'bar',
+      data: [1, 2, 3],
+    })
+
+    // No tool call should have empty arguments
+    for (const tc of toolCallParts) {
+      const parsed = parseArgs(tc.arguments)
+      expect(parsed).not.toEqual({})
+    }
+  })
+
+  // Screenshot on failure
+  test.afterEach(async ({ page }, testInfo) => {
+    if (testInfo.status !== testInfo.expectedStatus) {
+      await page.screenshot({
+        path: `test-results/continuation-args-failure-${testInfo.title.replace(/\s+/g, '-')}.png`,
+        fullPage: true,
+      })
+
+      const events = await getEventLog(page)
+      const toolCalls = await getToolCalls(page)
+      const metadata = await getMetadata(page)
+
+      console.log('Test failed. Debug info:')
+      console.log('Metadata:', metadata)
+      console.log('Events:', events)
+      console.log('Tool calls:', toolCalls)
+    }
+  })
+})