Skip to content

Commit 654fb57

Browse files
committed
test(e2e): add continuation re-execution tool call arguments tests
Add 3 E2E tests verifying tool call arguments are preserved during continuation re-executions. Without TOOL_CALL_START/ARGS emission, clients store tool calls with empty {} arguments. Tests: - Single client tool args preserved after continuation - Sequential client tool args preserved across multiple continuations - Parallel client tool args preserved in batch continuation
1 parent 7cabf43 commit 654fb57

File tree

1 file changed

+179
-0
lines changed

1 file changed

+179
-0
lines changed
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
import type { Page } from '@playwright/test'
2+
import { test, expect } from '../fixtures'
3+
import {
4+
selectScenario,
5+
runTest,
6+
waitForTestComplete,
7+
getMetadata,
8+
getEventLog,
9+
getToolCalls,
10+
} from './helpers'
11+
12+
/**
13+
* Continuation Re-execution — Tool Call Arguments E2E Tests
14+
*
15+
* These tests verify that tool call arguments are correctly preserved during
16+
* continuation re-executions. When a client tool completes and the conversation
17+
* continues, the server re-processes message history containing pending tool
18+
* calls. Without emitting TOOL_CALL_START + TOOL_CALL_ARGS before
19+
* TOOL_CALL_END, tool-call parts arrive at the client with empty
20+
* arguments {}, potentially causing infinite re-execution loops.
21+
*
22+
* These tests validate the end-to-end observable effect: after client tool
23+
* continuation completes, every tool-call part in the final messages must
24+
* have its original arguments — not empty {}.
25+
*/
26+
27+
/**
28+
* Parse messages JSON from #messages-json-content and extract all tool-call
29+
* parts with their arguments.
30+
*/
31+
async function getToolCallParts(page: Page) {
32+
const messagesJson = await page
33+
.locator('#messages-json-content')
34+
.textContent()
35+
const messages = JSON.parse(messagesJson || '[]')
36+
37+
return messages.flatMap((msg: { parts: Array<Record<string, unknown>> }) =>
38+
msg.parts
39+
.filter((p: Record<string, unknown>) => p.type === 'tool-call')
40+
.map((p: Record<string, unknown>) => ({
41+
name: p.name as string,
42+
arguments:
43+
typeof p.arguments === 'string'
44+
? JSON.parse(p.arguments as string)
45+
: p.arguments,
46+
})),
47+
)
48+
}
49+
50+
test.describe('Continuation Re-execution — Tool Call Arguments', () => {
51+
test('single client tool arguments preserved after continuation', async ({
52+
page,
53+
testId,
54+
aimockPort,
55+
}) => {
56+
await selectScenario(page, 'client-tool-single', testId, aimockPort)
57+
await runTest(page)
58+
await waitForTestComplete(page)
59+
60+
const metadata = await getMetadata(page)
61+
expect(metadata.testComplete).toBe('true')
62+
expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(1)
63+
64+
const toolCallParts = await getToolCallParts(page)
65+
expect(toolCallParts.length).toBeGreaterThanOrEqual(1)
66+
67+
const notificationCall = toolCallParts.find(
68+
(tc: { name: string }) => tc.name === 'show_notification',
69+
)
70+
expect(notificationCall).toBeDefined()
71+
expect(notificationCall.arguments).toEqual({
72+
message: 'Hello from the AI!',
73+
type: 'info',
74+
})
75+
})
76+
77+
test('sequential client tool arguments preserved across multiple continuations', async ({
78+
page,
79+
testId,
80+
aimockPort,
81+
}) => {
82+
await selectScenario(page, 'sequential-client-tools', testId, aimockPort)
83+
await runTest(page)
84+
await waitForTestComplete(page, 15000, 2)
85+
86+
// Wait for execution events to propagate
87+
await page.waitForFunction(
88+
() => {
89+
const el = document.querySelector('#test-metadata')
90+
return (
91+
parseInt(el?.getAttribute('data-execution-complete-count') || '0') >=
92+
2
93+
)
94+
},
95+
{ timeout: 10000 },
96+
)
97+
98+
const metadata = await getMetadata(page)
99+
expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(2)
100+
101+
const toolCallParts = await getToolCallParts(page)
102+
const notificationCalls = toolCallParts.filter(
103+
(tc: { name: string }) => tc.name === 'show_notification',
104+
)
105+
expect(notificationCalls.length).toBeGreaterThanOrEqual(2)
106+
107+
// Both sets of arguments must be present (order may vary)
108+
const allArgs = notificationCalls.map(
109+
(tc: { arguments: unknown }) => tc.arguments,
110+
)
111+
expect(allArgs).toContainEqual({
112+
message: 'First notification',
113+
type: 'info',
114+
})
115+
expect(allArgs).toContainEqual({
116+
message: 'Second notification',
117+
type: 'warning',
118+
})
119+
120+
// No tool call should have empty arguments
121+
for (const tc of notificationCalls) {
122+
expect(tc.arguments).not.toEqual({})
123+
}
124+
})
125+
126+
test('parallel client tool arguments preserved in batch continuation', async ({
127+
page,
128+
testId,
129+
aimockPort,
130+
}) => {
131+
await selectScenario(page, 'parallel-client-tools', testId, aimockPort)
132+
await runTest(page)
133+
await waitForTestComplete(page, 15000, 2)
134+
135+
const metadata = await getMetadata(page)
136+
expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(2)
137+
138+
const toolCallParts = await getToolCallParts(page)
139+
expect(toolCallParts.length).toBeGreaterThanOrEqual(2)
140+
141+
const notificationCall = toolCallParts.find(
142+
(tc: { name: string }) => tc.name === 'show_notification',
143+
)
144+
const chartCall = toolCallParts.find(
145+
(tc: { name: string }) => tc.name === 'display_chart',
146+
)
147+
148+
expect(notificationCall).toBeDefined()
149+
expect(chartCall).toBeDefined()
150+
151+
expect(notificationCall.arguments).toEqual({
152+
message: 'Parallel 1',
153+
type: 'info',
154+
})
155+
expect(chartCall.arguments).toEqual({
156+
type: 'bar',
157+
data: [1, 2, 3],
158+
})
159+
})
160+
161+
// Screenshot on failure
162+
test.afterEach(async ({ page }, testInfo) => {
163+
if (testInfo.status !== testInfo.expectedStatus) {
164+
await page.screenshot({
165+
path: `test-results/continuation-args-failure-${testInfo.title.replace(/\s+/g, '-')}.png`,
166+
fullPage: true,
167+
})
168+
169+
const events = await getEventLog(page)
170+
const toolCalls = await getToolCalls(page)
171+
const metadata = await getMetadata(page)
172+
173+
console.log('Test failed. Debug info:')
174+
console.log('Metadata:', metadata)
175+
console.log('Events:', events)
176+
console.log('Tool calls:', toolCalls)
177+
}
178+
})
179+
})

0 commit comments

Comments
 (0)