Skip to content

Commit 41a47c7

Browse files
committed
test(e2e): add continuation re-execution tool call arguments tests
Add 3 E2E tests verifying tool call arguments are preserved during continuation re-executions. Without TOOL_CALL_START/ARGS emission, clients store tool calls with empty {} arguments. Tests: - Single client tool args preserved after continuation - Sequential client tool args preserved across multiple continuations - Parallel client tool args preserved in batch continuation
1 parent 7cabf43 commit 41a47c7

2 files changed

Lines changed: 177 additions & 0 deletions

File tree

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
import { test, expect } from '../fixtures'
2+
import {
3+
selectScenario,
4+
runTest,
5+
waitForTestComplete,
6+
getMetadata,
7+
getEventLog,
8+
getToolCalls,
9+
getToolCallParts,
10+
} from './helpers'
11+
12+
/**
13+
* Continuation Re-execution — Tool Call Arguments E2E Tests
14+
*
15+
* These tests verify that tool call arguments are correctly preserved during
16+
* continuation re-executions. When a client tool completes and the conversation
17+
* continues, the server re-processes message history containing pending tool
18+
* calls. Without emitting TOOL_CALL_START + TOOL_CALL_ARGS before
19+
* TOOL_CALL_END, tool-call parts arrive at the client with empty
20+
* arguments {}, potentially causing infinite re-execution loops.
21+
*/
22+
23+
test.describe('Continuation Re-execution — Tool Call Arguments', () => {
24+
test('single client tool arguments preserved after continuation', async ({
25+
page,
26+
testId,
27+
aimockPort,
28+
}) => {
29+
await selectScenario(page, 'client-tool-single', testId, aimockPort)
30+
await runTest(page)
31+
await waitForTestComplete(page)
32+
33+
const metadata = await getMetadata(page)
34+
expect(metadata.testComplete).toBe('true')
35+
expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(1)
36+
37+
const parts = await getToolCallParts(page)
38+
expect(parts.length).toBeGreaterThanOrEqual(1)
39+
40+
const notificationCall = parts.find((tc) => tc.name === 'show_notification')
41+
expect(notificationCall).toBeDefined()
42+
expect(notificationCall?.arguments).toEqual({
43+
message: 'Hello from the AI!',
44+
type: 'info',
45+
})
46+
})
47+
48+
test('sequential client tool arguments preserved across multiple continuations', async ({
49+
page,
50+
testId,
51+
aimockPort,
52+
}) => {
53+
await selectScenario(page, 'sequential-client-tools', testId, aimockPort)
54+
await runTest(page)
55+
await waitForTestComplete(page, 15000, 2)
56+
57+
// Wait for execution events to propagate
58+
await page.waitForFunction(
59+
() => {
60+
const el = document.querySelector('#test-metadata')
61+
return (
62+
parseInt(el?.getAttribute('data-execution-complete-count') || '0') >=
63+
2
64+
)
65+
},
66+
{ timeout: 10000 },
67+
)
68+
69+
const metadata = await getMetadata(page)
70+
expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(2)
71+
72+
const parts = await getToolCallParts(page)
73+
const notificationCalls = parts.filter(
74+
(tc) => tc.name === 'show_notification',
75+
)
76+
expect(notificationCalls.length).toBeGreaterThanOrEqual(2)
77+
78+
// Both sets of arguments must be present (order may vary)
79+
const allArgs = notificationCalls.map((tc) => tc.arguments)
80+
expect(allArgs).toContainEqual({
81+
message: 'First notification',
82+
type: 'info',
83+
})
84+
expect(allArgs).toContainEqual({
85+
message: 'Second notification',
86+
type: 'warning',
87+
})
88+
89+
// No tool call should have empty arguments
90+
expect(
91+
notificationCalls.every((tc) => Object.keys(tc.arguments).length > 0),
92+
).toBe(true)
93+
})
94+
95+
test('parallel client tool arguments preserved in batch continuation', async ({
96+
page,
97+
testId,
98+
aimockPort,
99+
}) => {
100+
await selectScenario(page, 'parallel-client-tools', testId, aimockPort)
101+
await runTest(page)
102+
await waitForTestComplete(page, 15000, 2)
103+
104+
const metadata = await getMetadata(page)
105+
expect(parseInt(metadata.toolCallCount)).toBeGreaterThanOrEqual(2)
106+
107+
const parts = await getToolCallParts(page)
108+
expect(parts.length).toBeGreaterThanOrEqual(2)
109+
110+
const notificationCall = parts.find((tc) => tc.name === 'show_notification')
111+
const chartCall = parts.find((tc) => tc.name === 'display_chart')
112+
113+
expect(notificationCall).toBeDefined()
114+
expect(chartCall).toBeDefined()
115+
116+
expect(notificationCall?.arguments).toEqual({
117+
message: 'Parallel 1',
118+
type: 'info',
119+
})
120+
expect(chartCall?.arguments).toEqual({
121+
type: 'bar',
122+
data: [1, 2, 3],
123+
})
124+
})
125+
126+
// Screenshot on failure
127+
test.afterEach(async ({ page }, testInfo) => {
128+
if (testInfo.status !== testInfo.expectedStatus) {
129+
await page.screenshot({
130+
path: `test-results/continuation-args-failure-${testInfo.title.replace(/\s+/g, '-')}.png`,
131+
fullPage: true,
132+
})
133+
134+
const events = await getEventLog(page)
135+
const toolCalls = await getToolCalls(page)
136+
const metadata = await getMetadata(page)
137+
138+
console.log('Test failed. Debug info:')
139+
console.log('Metadata:', metadata)
140+
console.log('Events:', events)
141+
console.log('Tool calls:', toolCalls)
142+
}
143+
})
144+
})

testing/e2e/tests/tools-test/helpers.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,36 @@ export async function getToolCalls(
179179
}
180180
})
181181
}
182+
183+
/**
184+
* Extract tool-call parts with parsed arguments from #messages-json-content.
185+
*/
186+
export async function getToolCallParts(
187+
page: Page,
188+
): Promise<Array<{ name: string; arguments: Record<string, unknown> }>> {
189+
return page.evaluate(() => {
190+
const el = document.getElementById('messages-json-content')
191+
if (!el) return []
192+
try {
193+
const messages = JSON.parse(el.textContent || '[]')
194+
const parts: Array<{ name: string; arguments: Record<string, unknown> }> =
195+
[]
196+
for (const msg of messages) {
197+
for (const part of msg.parts || []) {
198+
if (part.type === 'tool-call') {
199+
parts.push({
200+
name: part.name,
201+
arguments:
202+
typeof part.arguments === 'string'
203+
? JSON.parse(part.arguments)
204+
: part.arguments,
205+
})
206+
}
207+
}
208+
}
209+
return parts
210+
} catch {
211+
return []
212+
}
213+
})
214+
}

0 commit comments

Comments
 (0)