Skip to content

Commit 4c95d8c

Browse files
SrinivasanTargetsaikrishna321claude
committed
fix: update appium-mcp tool calls and fix vscode webview buttons
Align tool calls with latest appium-mcp API: - Serialize capabilities as JSON string for appium_session_management - Replace removed tools (appium_clear_element, appium_long_press, appium_get_window_rect) with their new equivalents - Clean up stale tool references in exclusion lists Fix vscode extension webview buttons not responding to clicks by replacing inline onclick handlers (blocked by CSP) with addEventListener and resolving a duplicate const declaration that crashed the script. Co-Authored-By: Sai Krishna <saikrishna321@yahoo.com> Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 30a819d commit 4c95d8c

9 files changed

Lines changed: 45 additions & 33 deletions

File tree

src/agent/loop.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,7 +1197,8 @@ async function executeMetaTool(
11971197
for (const fb of lpFallbackStrategies) {
11981198
try {
11991199
const uuid = await findElement(mcp, fb.s as any, lpSelector);
1200-
const lpResult = await mcp.callTool('appium_long_press', {
1200+
const lpResult = await mcp.callTool('appium_gesture', {
1201+
action: 'long_press',
12011202
elementUUID: uuid,
12021203
duration: lpDuration,
12031204
});
@@ -1359,7 +1360,7 @@ async function executeMetaTool(
13591360

13601361
// Clear existing text if we have an active element
13611362
if (activeUuid) {
1362-
await mcp.callTool('appium_clear_element', { elementUUID: activeUuid }).catch(() => {});
1363+
await mcp.callTool('appium_set_value', { elementUUID: activeUuid, text: '' }).catch(() => {});
13631364
}
13641365

13651366
// Always use W3C Actions — works on local and cloud, Android and iOS

src/device/session.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@ export async function createPlatformSession(
4949

5050
const args: Record<string, unknown> = { platform };
5151

52-
// Add platform-specific capabilities
52+
// Add platform-specific capabilities (serialized as JSON string for appium-mcp)
5353
if (platform === 'android') {
5454
// extraCaps wins over config defaults (e.g. parallel workers override mjpeg/system ports)
5555
const caps = { ...buildAndroidCapabilities(config), ...extraCaps };
5656
if (Object.keys(caps).length > 0) {
57-
args.capabilities = caps;
57+
args.capabilities = JSON.stringify(caps);
5858
}
5959
} else if (platform === 'ios') {
6060
// For iOS, appium-mcp handles most capabilities internally (WDA setup, device selection).
@@ -64,7 +64,7 @@ export async function createPlatformSession(
6464
...extraCaps,
6565
};
6666
if (Object.keys(iosCaps).length > 0) {
67-
args.capabilities = iosCaps;
67+
args.capabilities = JSON.stringify(iosCaps);
6868
}
6969
}
7070

@@ -154,7 +154,7 @@ async function createLambdaTestSession(
154154
const args: Record<string, unknown> = {
155155
platform,
156156
remoteServerUrl: hubUrl,
157-
capabilities,
157+
capabilities: JSON.stringify(capabilities),
158158
};
159159

160160
try {
@@ -218,7 +218,7 @@ async function detectScreenSize(mcp: MCPClient, platform: Platform): Promise<voi
218218
// skip them here and let getScreenSizeForStark correct them at runtime using
219219
// the actual screenshot for comparison.
220220
try {
221-
const result = await mcp.callTool('appium_get_window_rect', {});
221+
const result = await mcp.callTool('appium_get_window_size', {});
222222
const text = extractText(result);
223223
try {
224224
const obj = JSON.parse(text);

src/flow/run-yaml-flow.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,7 +411,7 @@ async function flowTypeText(
411411
if (!target) {
412412
const visionUuid = await findByVision(mcp, 'text input field, search box, or editable area');
413413
if (visionUuid && !isAIElement(visionUuid)) {
414-
await mcp.callTool('appium_clear_element', { elementUUID: visionUuid }).catch(() => {});
414+
await mcp.callTool('appium_set_value', { elementUUID: visionUuid, text: '' }).catch(() => {});
415415
const setResult = await mcp.callTool('appium_set_value', { elementUUID: visionUuid, text });
416416
const setText =
417417
setResult.content
@@ -444,7 +444,7 @@ async function flowTypeText(
444444
return { success: false, message: 'Could not resolve editable element' };
445445
}
446446
await mcp.callTool('appium_gesture', { action: 'tap', elementUUID: uuid });
447-
await mcp.callTool('appium_clear_element', { elementUUID: uuid }).catch(() => {});
447+
await mcp.callTool('appium_set_value', { elementUUID: uuid, text: '' }).catch(() => {});
448448
const setResult = await mcp.callTool('appium_set_value', {
449449
...(Config.CLOUD_PROVIDER ? { w3cActions: true } : { elementUUID: uuid }),
450450
text,

src/mcp/client.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ function resolveAppiumMcp(): { command: string; args: string[] } {
3030
const appiumMcp = resolveAppiumMcp();
3131

3232
/** Tools that produce verbose output we don't want to log */
33-
const QUIET_TOOLS = new Set(['appium_get_page_source', 'appium_screenshot', 'appium_list_apps']);
33+
const QUIET_TOOLS = new Set(['appium_get_page_source', 'appium_screenshot']);
3434

3535
const mcpDebug = process.env.MCP_DEBUG === '1' || process.env.MCP_DEBUG === 'true';
3636

src/mcp/session-client.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import type { MCPClient, MCPToolResult, MCPToolInfo } from './types.js';
1919
const PRE_SESSION_TOOLS = new Set([
2020
'appium_session_management',
2121
'select_device',
22-
'delete_all_sessions',
2322
]);
2423

2524
export class SessionScopedMCPClient implements MCPClient {

src/mcp/tool-converter.ts

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,8 @@ export const EXCLUDED_MCP_TOOLS = new Set([
5555
/** Additional tools to exclude in vision mode — DOM-based tools that distract the agent */
5656
export const VISION_MODE_EXCLUDED_TOOLS = new Set([
5757
'appium_find_element',
58-
'appium_find_elements',
5958
'appium_get_page_source',
6059
'appium_get_text',
61-
'appium_get_attribute',
60+
'appium_get_element_attribute',
6261
'appium_get_active_element',
63-
'appium_clear_element',
64-
'appium_scroll_to_element',
6562
]);

src/recording/replayer.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ async function executeReplayAction(
273273
const uuid = await findElementWithFallback(mcp, screenElements, elementId, coords);
274274
if (uuid) {
275275
await mcp.callTool('appium_gesture', { action: 'tap', elementUUID: uuid });
276-
await mcp.callTool('appium_clear_element', { elementUUID: uuid }).catch(() => {});
276+
await mcp.callTool('appium_set_value', { elementUUID: uuid, text: '' }).catch(() => {});
277277
await mcp.callTool('appium_set_value', { elementUUID: uuid, text });
278278
return { success: true, message: `Typed "${text}"` };
279279
}
@@ -310,7 +310,7 @@ async function executeReplayAction(
310310
}
311311

312312
await mcp.callTool('appium_gesture', { action: 'tap', elementUUID: typeUuid });
313-
await mcp.callTool('appium_clear_element', { elementUUID: typeUuid }).catch(() => {});
313+
await mcp.callTool('appium_set_value', { elementUUID: typeUuid, text: '' }).catch(() => {});
314314
await mcp.callTool('appium_set_value', { elementUUID: typeUuid, text });
315315
return { success: true, message: `Typed "${text}"` };
316316
}

src/vision/window-size.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,8 @@ export async function getScreenSizeForStark(
227227
// return physical pixel dimensions (e.g. "Width: 1440, Height: 3120"), making it
228228
// ambiguous. appium_get_window_rect is more reliable.
229229
const windowRectTools = isIos
230-
? (['appium_get_window_rect', 'get_window_rect'] as const)
231-
: (['appium_get_window_rect', 'appium_get_window_size', 'get_window_rect'] as const);
230+
? (['appium_get_window_size'] as const)
231+
: (['appium_get_window_size'] as const);
232232

233233
for (const name of windowRectTools) {
234234
try {

vscode-extension/src/webview/device-panel.ts

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -615,35 +615,35 @@ export class DevicePanel {
615615
<div class="hitl-prompt" id="hitlPrompt"></div>
616616
<div class="hitl-input-row">
617617
<input type="text" id="hitlInput" placeholder="Enter response...">
618-
<button class="btn-run" onclick="submitHitl()">Send</button>
618+
<button class="btn-run" id="hitlSendBtn">Send</button>
619619
</div>
620620
</div>
621621
622-
<div id="rawLogToggle" onclick="toggleRawLog()" style="cursor:pointer;font-size:10px;color:var(--vscode-descriptionForeground);padding:2px 10px;border-top:1px solid var(--vscode-panel-border);user-select:none;">▸ Logs</div>
622+
<div id="rawLogToggle" style="cursor:pointer;font-size:10px;color:var(--vscode-descriptionForeground);padding:2px 10px;border-top:1px solid var(--vscode-panel-border);user-select:none;">▸ Logs</div>
623623
<div id="debugLog" style="display:none;font-size:10px;color:var(--vscode-descriptionForeground);padding:3px 10px;max-height:120px;overflow-y:auto;font-family:monospace;"></div>
624624
625625
<div class="input-area">
626626
<div style="display:flex;gap:6px;align-items:center;">
627627
<div id="modeToggle" style="display:flex;border-radius:4px;overflow:hidden;border:1px solid var(--vscode-input-border);font-size:11px;cursor:pointer;">
628-
<span id="modeGoal" onclick="setMode('goal')" style="padding:2px 8px;background:var(--vscode-button-background);color:var(--vscode-button-foreground);">Goal</span>
629-
<span id="modePlayground" onclick="setMode('playground')" style="padding:2px 8px;background:var(--vscode-input-background);color:var(--vscode-input-foreground);">Playground</span>
628+
<span id="modeGoal" style="padding:2px 8px;background:var(--vscode-button-background);color:var(--vscode-button-foreground);cursor:pointer;">Goal</span>
629+
<span id="modePlayground" style="padding:2px 8px;background:var(--vscode-input-background);color:var(--vscode-input-foreground);cursor:pointer;">Playground</span>
630630
</div>
631631
<div id="platformToggle" style="display:flex;border-radius:4px;overflow:hidden;border:1px solid var(--vscode-input-border);font-size:11px;cursor:pointer;">
632-
<span id="platAndroid" onclick="setPlatform('android')" style="padding:2px 8px;background:var(--vscode-input-background);color:var(--vscode-input-foreground);">Android</span>
633-
<span id="platIos" onclick="setPlatform('ios')" style="padding:2px 8px;background:var(--vscode-input-background);color:var(--vscode-input-foreground);">iOS</span>
632+
<span id="platAndroid" style="padding:2px 8px;background:var(--vscode-input-background);color:var(--vscode-input-foreground);cursor:pointer;">Android</span>
633+
<span id="platIos" style="padding:2px 8px;background:var(--vscode-input-background);color:var(--vscode-input-foreground);cursor:pointer;">iOS</span>
634634
</div>
635635
<span id="modeHint" style="font-size:10px;color:var(--vscode-descriptionForeground);flex:1;">AI agent executes your goal</span>
636636
<span id="playgroundActions" style="display:none;">
637-
<button class="action-btn" onclick="sendSlash('/yaml')">YAML</button>
638-
<button class="action-btn" onclick="exportFlow()">Export</button>
639-
<button class="action-btn" onclick="sendSlash('/undo')">Undo</button>
640-
<button class="action-btn" onclick="sendSlash('/clear')">Clear</button>
637+
<button class="action-btn" id="actionYaml">YAML</button>
638+
<button class="action-btn" id="actionExport">Export</button>
639+
<button class="action-btn" id="actionUndo">Undo</button>
640+
<button class="action-btn" id="actionClear">Clear</button>
641641
</span>
642642
</div>
643643
<div style="display:flex;gap:6px;">
644644
<input type="text" id="goalInput" placeholder="Enter a goal... e.g. &quot;Open Settings and enable Wi-Fi&quot;" />
645-
<button class="btn-run" id="runBtn" onclick="handleRun()">Run</button>
646-
<button class="btn-stop" id="stopBtn" onclick="stopExecution()" style="display:none">Stop</button>
645+
<button class="btn-run" id="runBtn">Run</button>
646+
<button class="btn-stop" id="stopBtn" style="display:none">Stop</button>
647647
</div>
648648
</div>
649649
</div>
@@ -677,8 +677,25 @@ export class DevicePanel {
677677
const platIos = document.getElementById("platIos");
678678
const platformToggle = document.getElementById("platformToggle");
679679
const multiLiveGrid = document.getElementById("multiLiveGrid");
680+
const rawLogToggle = document.getElementById("rawLogToggle");
681+
const debugLog = document.getElementById("debugLog");
682+
const hitlSendBtn = document.getElementById("hitlSendBtn");
680683
let selectedPlatform = ""; // "" = use settings default
681684
685+
// ── Click event listeners (CSP blocks inline onclick handlers) ──
686+
document.getElementById("modeGoal").addEventListener("click", function() { setMode("goal"); });
687+
document.getElementById("modePlayground").addEventListener("click", function() { setMode("playground"); });
688+
platAndroid.addEventListener("click", function() { setPlatform("android"); });
689+
platIos.addEventListener("click", function() { setPlatform("ios"); });
690+
runBtn.addEventListener("click", function() { handleRun(); });
691+
stopBtn.addEventListener("click", function() { stopExecution(); });
692+
hitlSendBtn.addEventListener("click", function() { submitHitl(); });
693+
rawLogToggle.addEventListener("click", function() { toggleRawLog(); });
694+
document.getElementById("actionYaml").addEventListener("click", function() { sendSlash("/yaml"); });
695+
document.getElementById("actionExport").addEventListener("click", function() { exportFlow(); });
696+
document.getElementById("actionUndo").addEventListener("click", function() { sendSlash("/undo"); });
697+
document.getElementById("actionClear").addEventListener("click", function() { sendSlash("/clear"); });
698+
682699
// ── Multi-device state ──────────────────────────────────
683700
let panelMode = "single"; // "single" | "multi"
684701
let liveFrames = {}; // deviceName → { element, imgEl, state }
@@ -1123,8 +1140,6 @@ export class DevicePanel {
11231140
}
11241141
}
11251142
1126-
const debugLog = document.getElementById("debugLog");
1127-
const rawLogToggle = document.getElementById("rawLogToggle");
11281143
var rawLogOpen = false;
11291144
function toggleRawLog() {
11301145
rawLogOpen = !rawLogOpen;

0 commit comments

Comments
 (0)