Skip to content

Commit 627ed68

Browse files
authored
chore: DOM elements as inputs for in-page tools (#1791)
DOM elements are non-serializable and therefore cannot be directly sent between the inspected page and the MCP server. JSONSchema also has no native type for DOM elements. If an in-page tool expects a DOM element as an input parameter, it should specify this in its input schema by adding `'x-mcp-type': 'HTMLElement'` to the object it expects to be a DOM element. The MCP server internally refers to DOM elements by a UID (UIDs are assigned when generating a page snapshot which is based on the page's accessibility tree). This change provides the mapping between DOM element and UID in both directions: 1) The tool's input schema is rewritten internally, adding a required UID attribute to objects with `'x-mcp-type': 'HTMLElement'`. This allows the MCP server to call the in-page tool with UIDs where the tool expects DOM elements. 2) In the page context, the UIDs are replaced with the corresponding DOM elements, before the actual in-page tool is called. This means that the in-page tool receives DOM elements as parameters where it expects them.
1 parent cae3f2c commit 627ed68

File tree

5 files changed

+436
-3
lines changed

5 files changed

+436
-3
lines changed

src/McpResponse.ts

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import type {
1919
Page,
2020
ResourceType,
2121
TextContent,
22+
JSONSchema7Definition,
2223
} from './third_party/index.js';
2324
import type {ToolGroup, ToolDefinition} from './tools/inPage.js';
2425
import {handleDialog} from './tools/pages.js';
@@ -41,6 +42,57 @@ interface TraceInsightData {
4142
insightName: InsightName;
4243
}
4344

45+
export function replaceHtmlElementsWithUids(schema: JSONSchema7Definition) {
46+
if (typeof schema === 'boolean') {
47+
return;
48+
}
49+
50+
let isHtmlElement = false;
51+
for (const [key, value] of Object.entries(schema)) {
52+
if (key === 'x-mcp-type' && value === 'HTMLElement') {
53+
isHtmlElement = true;
54+
break;
55+
}
56+
}
57+
58+
if (isHtmlElement) {
59+
schema.properties = {uid: {type: 'string'}};
60+
schema.required = ['uid'];
61+
}
62+
63+
if (schema.properties) {
64+
for (const key of Object.keys(schema.properties)) {
65+
replaceHtmlElementsWithUids(schema.properties[key]);
66+
}
67+
}
68+
69+
if (schema.items) {
70+
if (Array.isArray(schema.items)) {
71+
for (const item of schema.items) {
72+
replaceHtmlElementsWithUids(item);
73+
}
74+
} else {
75+
replaceHtmlElementsWithUids(schema.items);
76+
}
77+
}
78+
79+
if (schema.anyOf) {
80+
for (const s of schema.anyOf) {
81+
replaceHtmlElementsWithUids(s);
82+
}
83+
}
84+
if (schema.allOf) {
85+
for (const s of schema.allOf) {
86+
replaceHtmlElementsWithUids(s);
87+
}
88+
}
89+
if (schema.oneOf) {
90+
for (const s of schema.oneOf) {
91+
replaceHtmlElementsWithUids(s);
92+
}
93+
}
94+
}
95+
4496
async function getToolGroup(
4597
page: McpPage,
4698
): Promise<ToolGroup<ToolDefinition> | undefined> {
@@ -91,6 +143,10 @@ async function getToolGroup(
91143
}, 0);
92144
});
93145
});
146+
147+
for (const tool of toolGroup?.tools ?? []) {
148+
replaceHtmlElementsWithUids(tool.inputSchema);
149+
}
94150
return toolGroup;
95151
}
96152

src/third_party/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ export {default as puppeteer} from 'puppeteer-core';
4141
export type * from 'puppeteer-core';
4242
export {PipeTransport} from 'puppeteer-core/internal/node/PipeTransport.js';
4343
export type {CdpPage} from 'puppeteer-core/internal/cdp/Page.js';
44-
export type {JSONSchema7} from 'json-schema';
44+
export type {JSONSchema7, JSONSchema7Definition} from 'json-schema';
4545
export {
4646
resolveDefaultUserDataDir,
4747
detectBrowserPlatform,

src/tools/inPage.ts

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,12 @@
44
* SPDX-License-Identifier: Apache-2.0
55
*/
66

7-
import {zod, ajv, type JSONSchema7} from '../third_party/index.js';
7+
import {
8+
zod,
9+
ajv,
10+
type JSONSchema7,
11+
type ElementHandle,
12+
} from '../third_party/index.js';
813

914
import {ToolCategory} from './categories.js';
1015
import {definePageTool} from './ToolDefinition.js';
@@ -87,6 +92,22 @@ export const executeInPageTool = definePageTool({
8792
}
8893
}
8994

95+
// Creates array of ElementHandles from the UIDs in the params.
96+
// We do not replace the uids with the ElementsHandles yet, because
97+
// the `evaluate` function only turns them into DOM elements if they
98+
// are passed as non-nested arguments.
99+
const handles: ElementHandle[] = [];
100+
for (const value of Object.values(params)) {
101+
if (
102+
value instanceof Object &&
103+
'uid' in value &&
104+
typeof value.uid === 'string' &&
105+
Object.keys(value).length === 1
106+
) {
107+
handles.push(await request.page.getElementByUid(value.uid));
108+
}
109+
}
110+
90111
const toolGroup = request.page.getInPageTools();
91112
const tool = toolGroup?.tools.find(t => t.name === toolName);
92113
if (!tool) {
@@ -102,7 +123,19 @@ export const executeInPageTool = definePageTool({
102123
}
103124

104125
const result = await request.page.pptrPage.evaluate(
105-
async (name, args) => {
126+
async (name, args, ...elements) => {
127+
// Replace the UIDs with DOM elements.
128+
for (const [key, value] of Object.entries(args)) {
129+
if (
130+
value instanceof Object &&
131+
'uid' in value &&
132+
typeof value.uid === 'string' &&
133+
Object.keys(value).length === 1
134+
) {
135+
args[key] = elements.shift();
136+
}
137+
}
138+
106139
if (!window.__dtmcp?.executeTool) {
107140
throw new Error('No tools found on the page');
108141
}
@@ -114,6 +147,7 @@ export const executeInPageTool = definePageTool({
114147
},
115148
toolName,
116149
params,
150+
...handles,
117151
);
118152
response.appendResponseLine(JSON.stringify(result, null, 2));
119153
},

0 commit comments

Comments
 (0)