test(e2e): add continuation re-execution tool call arguments tests

DiegoGBrisa · DiegoGBrisa · commit 654fb57efb13 · 2026-04-10T20:50:39.000+02:00
Add 3 E2E tests verifying tool call arguments are preserved during
continuation re-executions. Without TOOL_CALL_START/ARGS emission,
clients store tool calls with empty {} arguments.

Tests:
- Single client tool args preserved after continuation
- Sequential client tool args preserved across multiple continuations
- Parallel client tool args preserved in batch continuation
diff --git a/testing/e2e/tests/tools-test/continuation-args.spec.ts b/testing/e2e/tests/tools-test/continuation-args.spec.ts
@@ -0,0 +1,179 @@
+import type { Page } from '@playwright/test'
+import { test, expect } from '../fixtures'
+import {
+  selectScenario,
+  runTest,
+  waitForTestComplete,
+  getMetadata,
+  getEventLog,
+  getToolCalls,
+} from './helpers'
+
+/**
+ * Continuation Re-execution — Tool Call Arguments E2E Tests
+ *
+ * These tests verify that tool call arguments are correctly preserved during
+ * continuation re-executions. When a client tool completes and the conversation
+ * continues, the server re-processes message history containing pending tool
+ * calls. Without emitting TOOL_CALL_START + TOOL_CALL_ARGS before
+ * TOOL_CALL_END, tool-call parts arrive at the client with empty
+ * arguments {}, potentially causing infinite re-execution loops.
+ *
+ * These tests validate the end-to-end observable effect: after client tool
+ * continuation completes, every tool-call part in the final messages must
+ * have its original arguments — not empty {}.
+ */
+
+/**
+ * Parse messages JSON from #messages-json-content and extract all tool-call
+ * parts with their arguments.
+ */
+async function getToolCallParts(page: Page) {
+  const messagesJson = await page
+    .locator('#messages-json-content')
+    .textContent()
+  const messages = JSON.parse(messagesJson || '[]')
+
+  return messages.flatMap((msg: { parts: Array<Record<string, unknown>> }) =>
+    msg.parts
+      .filter((p: Record<string, unknown>) => p.type === 'tool-call')
+      .map((p: Record<string, unknown>) => ({
+        name: p.name as string,
+        arguments:
+          typeof p.arguments === 'string'
+            ? JSON.parse(p.arguments as string)
+            : p.arguments,
+      })),
+  )
+}
+
+test.describe('Continuation Re-execution — Tool Call Arguments', () => {
+  test('single client tool arguments preserved after continuation', async ({
+    page,
+    testId,
+    aimockPort,
+  }) => {
+    await selectScenario(page, 'client-tool-single', testId, aimockPort)
+    await runTest(page)
+    await waitForTestComplete(page)
+
+    const metadata = await getMetadata(page)
+    expect(metadata.testComplete).toBe('true')
+    expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(1)
+
+    const toolCallParts = await getToolCallParts(page)
+    expect(toolCallParts.length).toBeGreaterThanOrEqual(1)
+
+    const notificationCall = toolCallParts.find(
+      (tc: { name: string }) => tc.name === 'show_notification',
+    )
+    expect(notificationCall).toBeDefined()
+    expect(notificationCall.arguments).toEqual({
+      message: 'Hello from the AI!',
+      type: 'info',
+    })
+  })
+
+  test('sequential client tool arguments preserved across multiple continuations', async ({
+    page,
+    testId,
+    aimockPort,
+  }) => {
+    await selectScenario(page, 'sequential-client-tools', testId, aimockPort)
+    await runTest(page)
+    await waitForTestComplete(page, 15000, 2)
+
+    // Wait for execution events to propagate
+    await page.waitForFunction(
+      () => {
+        const el = document.querySelector('#test-metadata')
+        return (
+          parseInt(el?.getAttribute('data-execution-complete-count') || '0') >=
+          2
+        )
+      },
+      { timeout: 10000 },
+    )
+
+    const metadata = await getMetadata(page)
+    expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(2)
+
+    const toolCallParts = await getToolCallParts(page)
+    const notificationCalls = toolCallParts.filter(
+      (tc: { name: string }) => tc.name === 'show_notification',
+    )
+    expect(notificationCalls.length).toBeGreaterThanOrEqual(2)
+
+    // Both sets of arguments must be present (order may vary)
+    const allArgs = notificationCalls.map(
+      (tc: { arguments: unknown }) => tc.arguments,
+    )
+    expect(allArgs).toContainEqual({
+      message: 'First notification',
+      type: 'info',
+    })
+    expect(allArgs).toContainEqual({
+      message: 'Second notification',
+      type: 'warning',
+    })
+
+    // No tool call should have empty arguments
+    for (const tc of notificationCalls) {
+      expect(tc.arguments).not.toEqual({})
+    }
+  })
+
+  test('parallel client tool arguments preserved in batch continuation', async ({
+    page,
+    testId,
+    aimockPort,
+  }) => {
+    await selectScenario(page, 'parallel-client-tools', testId, aimockPort)
+    await runTest(page)
+    await waitForTestComplete(page, 15000, 2)
+
+    const metadata = await getMetadata(page)
+    expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(2)
+
+    const toolCallParts = await getToolCallParts(page)
+    expect(toolCallParts.length).toBeGreaterThanOrEqual(2)
+
+    const notificationCall = toolCallParts.find(
+      (tc: { name: string }) => tc.name === 'show_notification',
+    )
+    const chartCall = toolCallParts.find(
+      (tc: { name: string }) => tc.name === 'display_chart',
+    )
+
+    expect(notificationCall).toBeDefined()
+    expect(chartCall).toBeDefined()
+
+    expect(notificationCall.arguments).toEqual({
+      message: 'Parallel 1',
+      type: 'info',
+    })
+    expect(chartCall.arguments).toEqual({
+      type: 'bar',
+      data: [1, 2, 3],
+    })
+  })
+
+  // Screenshot on failure
+  test.afterEach(async ({ page }, testInfo) => {
+    if (testInfo.status !== testInfo.expectedStatus) {
+      await page.screenshot({
+        path: `test-results/continuation-args-failure-${testInfo.title.replace(/\s+/g, '-')}.png`,
+        fullPage: true,
+      })
+
+      const events = await getEventLog(page)
+      const toolCalls = await getToolCalls(page)
+      const metadata = await getMetadata(page)
+
+      console.log('Test failed. Debug info:')
+      console.log('Metadata:', metadata)
+      console.log('Events:', events)
+      console.log('Tool calls:', toolCalls)
+    }
+  })
+})