Skip to content

Commit 0e81e18

Browse files
committed
Handle short drag paths safely in CUA templates.
Normalize drag paths before calling Kernel drag APIs so zero/one-point model outputs do not 400 entire batches, padding single-point paths and no-oping empty drags in both TypeScript and Python. Made-with: Cursor
1 parent 3f99e7f commit 0e81e18

2 files changed

Lines changed: 65 additions & 6 deletions

File tree

pkg/templates/python/openai-computer-use/computers/kernel_computer.py

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,33 @@ def _normalize_button(button) -> str:
121121
return str(button)
122122

123123

124+
def _normalize_drag_path(path: Any) -> List[List[int]]:
125+
points: List[List[int]] = []
126+
if isinstance(path, list):
127+
for point in path:
128+
if not isinstance(point, dict):
129+
continue
130+
x = point.get("x")
131+
y = point.get("y")
132+
if (
133+
isinstance(x, (int, float))
134+
and not isinstance(x, bool)
135+
and isinstance(y, (int, float))
136+
and not isinstance(y, bool)
137+
):
138+
points.append([int(x), int(y)])
139+
if not points:
140+
return []
141+
if len(points) == 1:
142+
x, y = points[0]
143+
return [[x, y], [x + 1, y]]
144+
return points
145+
146+
147+
def _drag_noop_action() -> Dict[str, Any]:
148+
return {"type": "sleep", "sleep": {"duration_ms": 1}}
149+
150+
124151
def _translate_cua_action(action: Dict[str, Any]) -> Dict[str, Any]:
125152
action_type = action.get("type", "")
126153
if action_type == "click":
@@ -185,7 +212,9 @@ def _translate_cua_action(action: Dict[str, Any]) -> Dict[str, Any]:
185212
elif action_type == "move":
186213
return {"type": "move_mouse", "move_mouse": {"x": action.get("x", 0), "y": action.get("y", 0)}}
187214
elif action_type == "drag":
188-
path = [[p["x"], p["y"]] for p in action.get("path", [])]
215+
path = _normalize_drag_path(action.get("path", []))
216+
if len(path) < 2:
217+
return _drag_noop_action()
189218
return {"type": "drag_mouse", "drag_mouse": {"path": path}}
190219
elif action_type == "wait":
191220
return {"type": "sleep", "sleep": {"duration_ms": action.get("ms", 1000)}}
@@ -546,8 +575,11 @@ def drag(self, path: List[Dict[str, int]]) -> None:
546575
op = _describe_action("drag", {"path": path})
547576

548577
def _do() -> None:
549-
p = [[pt["x"], pt["y"]] for pt in path]
550-
self.client.browsers.computer.drag_mouse(self.session_id, path=p)
578+
normalized_path = _normalize_drag_path(path)
579+
if len(normalized_path) < 2:
580+
time.sleep(0.001)
581+
return
582+
self.client.browsers.computer.drag_mouse(self.session_id, path=normalized_path)
551583

552584
self._trace_backend(op, _do)
553585

pkg/templates/typescript/openai-computer-use/lib/kernel-computer.ts

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,28 @@ function normalizeButton(button?: string | number): string {
165165
return button;
166166
}
167167

168+
function normalizeDragPath(path: Array<{ x: number; y: number }> | undefined): number[][] {
169+
const points: Array<[number, number]> = (path ?? [])
170+
.filter(
171+
(point): point is { x: number; y: number } =>
172+
typeof point?.x === 'number' &&
173+
Number.isFinite(point.x) &&
174+
typeof point?.y === 'number' &&
175+
Number.isFinite(point.y),
176+
)
177+
.map((point): [number, number] => [Math.trunc(point.x), Math.trunc(point.y)]);
178+
if (points.length === 0) return [];
179+
if (points.length === 1) {
180+
const [x, y] = points[0]!;
181+
return [[x, y], [x + 1, y]];
182+
}
183+
return points;
184+
}
185+
186+
function dragNoopAction(): BatchAction {
187+
return { type: 'sleep', sleep: { duration_ms: 1 } };
188+
}
189+
168190
function translateCuaAction(action: CuaAction): BatchAction {
169191
switch (action.type) {
170192
case 'click': {
@@ -220,7 +242,8 @@ function translateCuaAction(action: CuaAction): BatchAction {
220242
case 'move':
221243
return { type: 'move_mouse', move_mouse: { x: action.x ?? 0, y: action.y ?? 0 } };
222244
case 'drag': {
223-
const path = (action.path ?? []).map((p) => [p.x, p.y]);
245+
const path = normalizeDragPath(action.path);
246+
if (path.length < 2) return dragNoopAction();
224247
return { type: 'drag_mouse', drag_mouse: { path } };
225248
}
226249
case 'wait':
@@ -483,8 +506,12 @@ export class KernelComputer {
483506
async drag(path: Array<{ x: number; y: number }>): Promise<void> {
484507
const op = describeAction('drag', { path });
485508
await this.traceCall(op, async () => {
486-
const p = path.map((pt) => [pt.x, pt.y]);
487-
await this.client.browsers.computer.dragMouse(this.sessionId, { path: p });
509+
const normalizedPath = normalizeDragPath(path);
510+
if (normalizedPath.length < 2) {
511+
await new Promise((resolve) => setTimeout(resolve, 1));
512+
return;
513+
}
514+
await this.client.browsers.computer.dragMouse(this.sessionId, { path: normalizedPath });
488515
});
489516
}
490517

0 commit comments

Comments
 (0)