Skip to content

Commit 8974a32

Browse files
DiegoGBrisaclaude
andcommitted
test(e2e): add continuation re-execution tool call arguments tests
Add 3 E2E tests verifying tool call arguments are preserved during continuation re-executions. Without TOOL_CALL_START/ARGS emission, clients store tool calls with empty {} arguments. Tests: - Single client tool args preserved after continuation - Sequential client tool args preserved across multiple continuations - Parallel client tool args preserved in batch continuation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7cabf43 commit 8974a32

File tree

1 file changed

+220
-0
lines changed

1 file changed

+220
-0
lines changed
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
import { test, expect } from '../fixtures'
2+
import {
3+
selectScenario,
4+
runTest,
5+
waitForTestComplete,
6+
getMetadata,
7+
getEventLog,
8+
getToolCalls,
9+
} from './helpers'
10+
11+
/**
12+
* Continuation Re-execution — Tool Call Arguments E2E Tests
13+
*
14+
* These tests verify that tool call arguments are correctly preserved during
15+
* continuation re-executions. When a client tool completes and the conversation
16+
* continues, the server re-processes message history containing pending tool
17+
* calls. Without the fix (emitting TOOL_CALL_START + TOOL_CALL_ARGS before
18+
* TOOL_CALL_END), tool-call parts would arrive at the client with empty
19+
* arguments {}, potentially causing infinite re-execution loops.
20+
*
21+
* These tests validate the end-to-end observable effect: after client tool
22+
* continuation completes, every tool-call part in the final messages must
23+
* have its original arguments — not empty {}.
24+
*/
25+
26+
interface ToolCallPart {
27+
type: 'tool-call'
28+
id: string
29+
name: string
30+
arguments: Record<string, unknown> | string
31+
state?: string
32+
output?: unknown
33+
}
34+
35+
interface Message {
36+
id: string
37+
role: string
38+
parts: Array<{ type: string; [key: string]: unknown }>
39+
}
40+
41+
/**
42+
* Parse messages from the #messages-json-content element.
43+
*/
44+
async function getMessages(page: import('@playwright/test').Page) {
45+
const messagesJson = await page
46+
.locator('#messages-json-content')
47+
.textContent()
48+
return JSON.parse(messagesJson || '[]') as Array<Message>
49+
}
50+
51+
/**
52+
* Extract all tool-call parts from messages.
53+
*/
54+
function extractToolCallParts(messages: Array<Message>): Array<ToolCallPart> {
55+
return messages.flatMap((msg) =>
56+
msg.parts.filter((p): p is ToolCallPart => p.type === 'tool-call'),
57+
)
58+
}
59+
60+
/**
61+
* Normalize tool call arguments to a parsed object for comparison.
62+
*/
63+
function parseArgs(args: Record<string, unknown> | string): unknown {
64+
if (typeof args === 'string') {
65+
return JSON.parse(args)
66+
}
67+
return args
68+
}
69+
70+
test.describe('Continuation Re-execution — Tool Call Arguments', () => {
71+
test('single client tool arguments preserved after continuation', async ({
72+
page,
73+
testId,
74+
aimockPort,
75+
}) => {
76+
await selectScenario(page, 'client-tool-single', testId, aimockPort)
77+
await runTest(page)
78+
await waitForTestComplete(page)
79+
80+
// Verify completion
81+
const metadata = await getMetadata(page)
82+
expect(metadata.testComplete).toBe('true')
83+
expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(1)
84+
85+
// Parse messages and find tool-call parts
86+
const messages = await getMessages(page)
87+
const toolCallParts = extractToolCallParts(messages)
88+
89+
expect(toolCallParts.length).toBeGreaterThanOrEqual(1)
90+
91+
// The show_notification tool call must have its original arguments
92+
const notificationCall = toolCallParts.find(
93+
(tc) => tc.name === 'show_notification',
94+
)
95+
expect(notificationCall).toBeDefined()
96+
97+
const args = parseArgs(notificationCall!.arguments)
98+
expect(args).toEqual({
99+
message: 'Hello from the AI!',
100+
type: 'info',
101+
})
102+
})
103+
104+
test('sequential client tool arguments preserved across multiple continuations', async ({
105+
page,
106+
testId,
107+
aimockPort,
108+
}) => {
109+
await selectScenario(page, 'sequential-client-tools', testId, aimockPort)
110+
await runTest(page)
111+
await waitForTestComplete(page, 15000, 2)
112+
113+
// Wait for execution events to propagate
114+
await page.waitForFunction(
115+
() => {
116+
const el = document.querySelector('#test-metadata')
117+
return (
118+
parseInt(el?.getAttribute('data-execution-complete-count') || '0') >=
119+
2
120+
)
121+
},
122+
{ timeout: 10000 },
123+
)
124+
125+
const metadata = await getMetadata(page)
126+
expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(2)
127+
128+
// Parse messages and find all show_notification tool calls
129+
const messages = await getMessages(page)
130+
const toolCallParts = extractToolCallParts(messages).filter(
131+
(tc) => tc.name === 'show_notification',
132+
)
133+
134+
expect(toolCallParts.length).toBeGreaterThanOrEqual(2)
135+
136+
// Collect the arguments from all notification tool calls
137+
const allArgs = toolCallParts.map((tc) => parseArgs(tc.arguments))
138+
139+
// Both sets of arguments must be present (order may vary)
140+
expect(allArgs).toContainEqual({
141+
message: 'First notification',
142+
type: 'info',
143+
})
144+
expect(allArgs).toContainEqual({
145+
message: 'Second notification',
146+
type: 'warning',
147+
})
148+
149+
// No tool call should have empty arguments
150+
for (const tc of toolCallParts) {
151+
const parsed = parseArgs(tc.arguments)
152+
expect(parsed).not.toEqual({})
153+
}
154+
})
155+
156+
test('parallel client tool arguments preserved in batch continuation', async ({
157+
page,
158+
testId,
159+
aimockPort,
160+
}) => {
161+
await selectScenario(page, 'parallel-client-tools', testId, aimockPort)
162+
await runTest(page)
163+
await waitForTestComplete(page, 15000, 2)
164+
165+
const metadata = await getMetadata(page)
166+
expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(2)
167+
168+
// Parse messages and find tool-call parts
169+
const messages = await getMessages(page)
170+
const toolCallParts = extractToolCallParts(messages)
171+
172+
expect(toolCallParts.length).toBeGreaterThanOrEqual(2)
173+
174+
// Find each tool by name
175+
const notificationCall = toolCallParts.find(
176+
(tc) => tc.name === 'show_notification',
177+
)
178+
const chartCall = toolCallParts.find((tc) => tc.name === 'display_chart')
179+
180+
expect(notificationCall).toBeDefined()
181+
expect(chartCall).toBeDefined()
182+
183+
// Verify show_notification arguments
184+
expect(parseArgs(notificationCall!.arguments)).toEqual({
185+
message: 'Parallel 1',
186+
type: 'info',
187+
})
188+
189+
// Verify display_chart arguments
190+
expect(parseArgs(chartCall!.arguments)).toEqual({
191+
type: 'bar',
192+
data: [1, 2, 3],
193+
})
194+
195+
// No tool call should have empty arguments
196+
for (const tc of toolCallParts) {
197+
const parsed = parseArgs(tc.arguments)
198+
expect(parsed).not.toEqual({})
199+
}
200+
})
201+
202+
// Screenshot on failure
203+
test.afterEach(async ({ page }, testInfo) => {
204+
if (testInfo.status !== testInfo.expectedStatus) {
205+
await page.screenshot({
206+
path: `test-results/continuation-args-failure-${testInfo.title.replace(/\s+/g, '-')}.png`,
207+
fullPage: true,
208+
})
209+
210+
const events = await getEventLog(page)
211+
const toolCalls = await getToolCalls(page)
212+
const metadata = await getMetadata(page)
213+
214+
console.log('Test failed. Debug info:')
215+
console.log('Metadata:', metadata)
216+
console.log('Events:', events)
217+
console.log('Tool calls:', toolCalls)
218+
}
219+
})
220+
})

0 commit comments

Comments
 (0)