Skip to content

Commit ca10329

Browse files
committed
Merge origin/main (apply ensureViewerIsPolling to save_as)
2 parents 2b85e59 + 3870056 commit ca10329

6 files changed

Lines changed: 442 additions & 79 deletions

File tree

examples/pdf-server/server.test.ts

Lines changed: 142 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,10 +1056,10 @@ describe("interact tool", () => {
10561056
});
10571057

10581058
describe("save_as", () => {
1059-
// save_as is the FIRST interact action with a full request/reply unit
1060-
// test (get_text/get_screenshot use the same plumbing but are e2e-only).
1061-
// The roundtrip tests need: writable scope, kick off interact WITHOUT
1062-
// awaiting (it blocks until the view replies), poll → submit → await.
1059+
// Roundtrip tests need: writable scope, kick off interact WITHOUT awaiting
1060+
// (it blocks until the view replies), poll → submit → await. The poll()
1061+
// call also registers the uuid in viewsPolled, satisfying
1062+
// ensureViewerIsPolling — without it interact would hang ~8s and fail.
10631063

10641064
let tmpDir: string;
10651065
let savedDirs: Set<string>;
@@ -1260,4 +1260,142 @@ describe("interact tool", () => {
12601260
await server.close();
12611261
});
12621262
});
1263+
1264+
describe("viewer liveness", () => {
1265+
// get_screenshot/get_text fail fast when the iframe never polled, instead
1266+
// of waiting 45s for a viewer that isn't there. Reproduces the case where
1267+
// the host goes idle before the iframe reaches startPolling().
1268+
1269+
it("get_screenshot fails fast when viewer never polled", async () => {
1270+
const { server, client } = await connect();
1271+
const uuid = "never-polled-screenshot";
1272+
1273+
const started = Date.now();
1274+
const r = await client.callTool({
1275+
name: "interact",
1276+
arguments: { viewUUID: uuid, action: "get_screenshot", page: 1 },
1277+
});
1278+
const elapsed = Date.now() - started;
1279+
1280+
expect(r.isError).toBe(true);
1281+
expect(firstText(r)).toContain("never connected");
1282+
expect(firstText(r)).toContain(uuid);
1283+
expect(firstText(r)).toContain("display_pdf again"); // recovery hint
1284+
// Fast-fail bound (~8s grace), well under the 45s page-data timeout.
1285+
// 15s upper bound leaves slack for CI scheduling.
1286+
expect(elapsed).toBeLessThan(15_000);
1287+
1288+
await client.close();
1289+
await server.close();
1290+
}, 20_000);
1291+
1292+
it("get_screenshot waits full timeout when viewer polled then went silent", async () => {
1293+
// Viewer polled once (proving it mounted) then hung on a heavy render.
1294+
// The grace check passes, so we fall through to the 45s page-data wait —
1295+
// verified here by racing against a 12s deadline that should NOT win.
1296+
const { server, client } = await connect();
1297+
const uuid = "polled-then-silent";
1298+
1299+
// Viewer's first poll: drain whatever's there so it returns fast.
1300+
// Enqueue a trivial command first so poll returns via the batch-wait
1301+
// path (~200ms) instead of blocking on the 30s long-poll.
1302+
await client.callTool({
1303+
name: "interact",
1304+
arguments: { viewUUID: uuid, action: "navigate", page: 1 },
1305+
});
1306+
await poll(client, uuid);
1307+
1308+
// Now get_screenshot — viewer has polled, so no fast-fail. But viewer
1309+
// never calls submit_page_data → should wait beyond the grace period.
1310+
const outcome = await Promise.race([
1311+
client
1312+
.callTool({
1313+
name: "interact",
1314+
arguments: { viewUUID: uuid, action: "get_screenshot", page: 1 },
1315+
})
1316+
.then(() => "completed" as const),
1317+
new Promise<"still-waiting">((r) =>
1318+
setTimeout(() => r("still-waiting"), 12_000),
1319+
),
1320+
]);
1321+
1322+
expect(outcome).toBe("still-waiting");
1323+
1324+
await client.close();
1325+
await server.close();
1326+
}, 20_000);
1327+
1328+
it("get_screenshot succeeds when viewer polls during grace window", async () => {
1329+
// Model calls interact before the viewer has polled — but the viewer
1330+
// shows up within the grace period and completes the roundtrip.
1331+
const { server, client } = await connect();
1332+
const uuid = "late-arriving-viewer";
1333+
1334+
const interactPromise = client.callTool({
1335+
name: "interact",
1336+
arguments: { viewUUID: uuid, action: "get_screenshot", page: 1 },
1337+
});
1338+
1339+
// Viewer connects 500ms late — well inside the grace window.
1340+
await new Promise((r) => setTimeout(r, 500));
1341+
const cmds = await poll(client, uuid);
1342+
const getPages = cmds.find((c) => c.type === "get_pages");
1343+
expect(getPages).toBeDefined();
1344+
1345+
// Viewer responds with the page data.
1346+
await client.callTool({
1347+
name: "submit_page_data",
1348+
arguments: {
1349+
requestId: getPages!.requestId as string,
1350+
pages: [
1351+
{ page: 1, image: Buffer.from("fake-jpeg").toString("base64") },
1352+
],
1353+
},
1354+
});
1355+
1356+
const r = await interactPromise;
1357+
expect(r.isError).toBeFalsy();
1358+
expect((r.content as Array<{ type: string }>)[0].type).toBe("image");
1359+
1360+
await client.close();
1361+
await server.close();
1362+
}, 15_000);
1363+
1364+
it("batch failure puts error message first", async () => {
1365+
// When [fill_form, get_screenshot] runs and get_screenshot times out,
1366+
// content[0] must describe the failure — not the earlier success. Some
1367+
// hosts flatten isError results to content[0].text only, which previously
1368+
// showed "Queued: Filled N fields" with isError:true and dropped the
1369+
// actual timeout entirely.
1370+
const { server, client } = await connect();
1371+
const uuid = "batch-error-ordering";
1372+
1373+
const r = await client.callTool({
1374+
name: "interact",
1375+
arguments: {
1376+
viewUUID: uuid,
1377+
commands: [
1378+
{ action: "navigate", page: 3 }, // succeeds → "Queued: ..."
1379+
{ action: "get_screenshot", page: 1 }, // never-polled → fast-fail
1380+
],
1381+
},
1382+
});
1383+
1384+
expect(r.isError).toBe(true);
1385+
const texts = (r.content as Array<{ type: string; text: string }>).map(
1386+
(c) => c.text,
1387+
);
1388+
// content[0]: batch-failed summary naming the culprit
1389+
expect(texts[0]).toContain("failed");
1390+
expect(texts[0]).toContain("2/2");
1391+
expect(texts[0]).toContain("get_screenshot");
1392+
// content[1]: the actual error
1393+
expect(texts[1]).toContain("never connected");
1394+
// content[2]: the earlier success, pushed to the back
1395+
expect(texts[2]).toContain("Queued");
1396+
1397+
await client.close();
1398+
await server.close();
1399+
}, 15_000);
1400+
});
12631401
});

examples/pdf-server/server.ts

Lines changed: 100 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,14 @@ export type { PdfCommand };
196196
// reject first and return a real error instead of the client cancelling us.
197197
const GET_PAGES_TIMEOUT_MS = 45_000;
198198

199+
/**
200+
* Grace period for the viewer's first poll. If interact() arrives before the
201+
* iframe has ever polled, we wait this long for it to show up (iframe mount +
202+
* PDF load + startPolling). If no poll comes, the viewer almost certainly
203+
* never rendered — failing fast beats a silent 45s hang.
204+
*/
205+
const VIEWER_FIRST_POLL_GRACE_MS = 8_000;
206+
199207
interface PageDataEntry {
200208
page: number;
201209
text?: string;
@@ -232,6 +240,33 @@ function waitForPageData(
232240
});
233241
}
234242

243+
/**
244+
* Wait for the viewer's first poll_pdf_commands call.
245+
*
246+
* Called before waitForPageData() / waitForSaveData() so a viewer that never
247+
* mounted fails in ~8s with a specific message instead of a generic 45s
248+
* "Timeout waiting for ..." that gives no hint why.
249+
*
250+
* Intentionally does NOT touch pollWaiters: piggybacking on that single-slot
251+
* Map races with poll_pdf_commands' batch-wait branch (which never cancels the
252+
* prior waiter) and with concurrent interact calls (which would overwrite each
253+
* other). A plain check loop on viewsPolled is stateless — multiple callers
254+
* can wait independently and all observe the same add() when it happens.
255+
*/
256+
async function ensureViewerIsPolling(uuid: string): Promise<void> {
257+
const deadline = Date.now() + VIEWER_FIRST_POLL_GRACE_MS;
258+
while (!viewsPolled.has(uuid)) {
259+
if (Date.now() >= deadline) {
260+
throw new Error(
261+
`Viewer never connected for viewUUID ${uuid} (no poll within ${VIEWER_FIRST_POLL_GRACE_MS / 1000}s). ` +
262+
`The iframe likely failed to mount — this happens when the conversation ` +
263+
`goes idle before the viewer finishes loading. Call display_pdf again to get a fresh viewUUID.`,
264+
);
265+
}
266+
await new Promise((r) => setTimeout(r, 100));
267+
}
268+
}
269+
235270
// =============================================================================
236271
// Pending save_as Requests (request-response bridge via client)
237272
// =============================================================================
@@ -278,6 +313,15 @@ const commandQueues = new Map<string, QueueEntry>();
278313
/** Waiters for long-poll: resolve callback wakes up a blocked poll_pdf_commands */
279314
const pollWaiters = new Map<string, () => void>();
280315

316+
/**
317+
* viewUUIDs that have been polled at least once. A view missing from this set
318+
* means the iframe never reached startPolling() — usually because it wasn't
319+
* mounted yet, or ontoolresult threw before the poll loop started. Used to
320+
* fail fast in get_screenshot/get_text instead of waiting the full 45s for
321+
* a viewer that was never there.
322+
*/
323+
const viewsPolled = new Set<string>();
324+
281325
/** Valid form field names per viewer UUID (populated during display_pdf) */
282326
const viewFieldNames = new Map<string, Set<string>>();
283327

@@ -323,6 +367,7 @@ function pruneStaleQueues(): void {
323367
commandQueues.delete(uuid);
324368
viewFieldNames.delete(uuid);
325369
viewFieldInfo.delete(uuid);
370+
viewsPolled.delete(uuid);
326371
stopFileWatch(uuid);
327372
}
328373
}
@@ -847,6 +892,10 @@ interface FormFieldInfo {
847892
y: number;
848893
width: number;
849894
height: number;
895+
/** Radio button export value (buttonValue) — distinguishes widgets that share a field name. */
896+
exportValue?: string;
897+
/** Dropdown/listbox option values, as seen in the widget's `options` array. */
898+
options?: string[];
850899
}
851900

852901
/**
@@ -899,6 +948,16 @@ async function extractFormFieldInfo(
899948
// Convert to model coords (top-left origin): modelY = pageHeight - pdfY - height
900949
const modelY = pageHeight - y2;
901950

951+
// Choice widgets (combo/listbox) carry `options` as
952+
// [{exportValue, displayValue}]. Expose export values — that's
953+
// what fill_form needs.
954+
let options: string[] | undefined;
955+
if (Array.isArray(ann.options) && ann.options.length > 0) {
956+
options = ann.options
957+
.map((o: { exportValue?: string }) => o?.exportValue)
958+
.filter((v: unknown): v is string => typeof v === "string");
959+
}
960+
902961
fields.push({
903962
name: fieldName,
904963
type: fieldType,
@@ -908,6 +967,12 @@ async function extractFormFieldInfo(
908967
width: Math.round(width),
909968
height: Math.round(height),
910969
...(ann.alternativeText ? { label: ann.alternativeText } : undefined),
970+
// Radio: buttonValue is the per-widget export value — the only
971+
// thing distinguishing three `size [Btn]` lines from each other.
972+
...(ann.radioButton && ann.buttonValue != null
973+
? { exportValue: String(ann.buttonValue) }
974+
: undefined),
975+
...(options?.length ? { options } : undefined),
911976
});
912977
}
913978
}
@@ -1317,6 +1382,14 @@ Set \`elicit_form_inputs\` to true to prompt the user to fill form fields before
13171382
y: z.number(),
13181383
width: z.number(),
13191384
height: z.number(),
1385+
exportValue: z
1386+
.string()
1387+
.optional()
1388+
.describe("Radio button value — pass this to fill_form"),
1389+
options: z
1390+
.array(z.string())
1391+
.optional()
1392+
.describe("Dropdown/listbox option values"),
13201393
}),
13211394
)
13221395
.optional()
@@ -1488,8 +1561,14 @@ URL: ${normalized}`,
14881561
for (const f of fields) {
14891562
const label = f.label ? ` "${f.label}"` : "";
14901563
const nameStr = f.name || "(unnamed)";
1564+
// Radio: =<exportValue> tells the model what value to pass.
1565+
// Dropdown: options:[...] lists valid choices.
1566+
const exportSuffix = f.exportValue ? `=${f.exportValue}` : "";
1567+
const optsSuffix = f.options
1568+
? ` options:[${f.options.join(", ")}]`
1569+
: "";
14911570
lines.push(
1492-
` ${nameStr}${label} [${f.type}] at (${f.x},${f.y}) ${f.width}×${f.height}`,
1571+
` ${nameStr}${exportSuffix}${label} [${f.type}] at (${f.x},${f.y}) ${f.width}×${f.height}${optsSuffix}`,
14931572
);
14941573
}
14951574
}
@@ -1973,6 +2052,7 @@ URL: ${normalized}`,
19732052

19742053
let pageData: PageDataEntry[];
19752054
try {
2055+
await ensureViewerIsPolling(uuid);
19762056
pageData = await waitForPageData(requestId, signal);
19772057
} catch (err) {
19782058
return {
@@ -2021,6 +2101,7 @@ URL: ${normalized}`,
20212101

20222102
let pageData: PageDataEntry[];
20232103
try {
2104+
await ensureViewerIsPolling(uuid);
20242105
pageData = await waitForPageData(requestId, signal);
20252106
} catch (err) {
20262107
return {
@@ -2105,6 +2186,7 @@ URL: ${normalized}`,
21052186
enqueueCommand(uuid, { type: "save_as", requestId });
21062187
let data: string;
21072188
try {
2189+
await ensureViewerIsPolling(uuid);
21082190
data = await waitForSaveData(requestId, signal);
21092191
} catch (err) {
21102192
return {
@@ -2356,7 +2438,7 @@ Example — add a signature image and a stamp, then screenshot to verify:
23562438

23572439
// Process commands sequentially, collecting all content parts
23582440
const allContent: ContentPart[] = [];
2359-
let hasError = false;
2441+
let failedAt = -1;
23602442

23612443
for (let i = 0; i < commandList.length; i++) {
23622444
const result = await processInteractCommand(
@@ -2365,15 +2447,27 @@ Example — add a signature image and a stamp, then screenshot to verify:
23652447
extra.signal,
23662448
);
23672449
if (result.isError) {
2368-
hasError = true;
2450+
// Error content first. Some hosts flatten isError results to
2451+
// content[0].text — if we push the error after prior successes,
2452+
// the user sees "Queued: Filled 7 fields" with isError:true and
2453+
// the actual failure is silently dropped.
2454+
allContent.unshift(...result.content);
2455+
failedAt = i;
2456+
break;
23692457
}
23702458
allContent.push(...result.content);
2371-
if (hasError) break; // Stop on first error
2459+
}
2460+
2461+
if (failedAt >= 0 && commandList.length > 1) {
2462+
allContent.unshift({
2463+
type: "text",
2464+
text: `Batch failed at step ${failedAt + 1}/${commandList.length} (${commandList[failedAt].action}):`,
2465+
});
23722466
}
23732467

23742468
return {
23752469
content: allContent,
2376-
...(hasError ? { isError: true } : {}),
2470+
...(failedAt >= 0 ? { isError: true } : {}),
23772471
};
23782472
},
23792473
);
@@ -2473,6 +2567,7 @@ Example — add a signature image and a stamp, then screenshot to verify:
24732567
_meta: { ui: { visibility: ["app"] } },
24742568
},
24752569
async ({ viewUUID: uuid }): Promise<CallToolResult> => {
2570+
viewsPolled.add(uuid);
24762571
// If commands are already queued, wait briefly to let more accumulate
24772572
if (commandQueues.has(uuid)) {
24782573
await new Promise((r) => setTimeout(r, POLL_BATCH_WAIT_MS));

0 commit comments

Comments
 (0)