diff --git a/docs/tools.md b/docs/tools.md index 9e71e42c2c..8086af2326 100644 --- a/docs/tools.md +++ b/docs/tools.md @@ -199,7 +199,7 @@ When a [`ComputerTool`][agents.tool.ComputerTool] is present, `tool_choice="comp This distinction matters when `ComputerTool` is backed by a [`ComputerProvider`][agents.tool.ComputerProvider] factory. The GA `computer` payload does not need `environment` or dimensions at serialization time, so unresolved factories are fine. Preview-compatible serialization still needs a resolved `Computer` or `AsyncComputer` instance so the SDK can send `environment`, `display_width`, and `display_height`. -At runtime, both paths still use the same local harness. Preview responses emit `computer_call` items with a single `action`; `gpt-5.4` can emit batched `actions[]`, and the SDK executes them in order before producing a `computer_call_output` screenshot item. See `examples/tools/computer_use.py` for a runnable Playwright-based harness. +At runtime, both paths still use the same local harness. Preview responses emit `computer_call` items with a single `action`; `gpt-5.4` can emit batched `actions[]`, and the SDK executes them in order before producing a `computer_call_output` screenshot item. Mouse actions can also carry an optional `keys` list for held modifiers such as `Shift` or `Ctrl`. See `examples/tools/computer_use.py` for a runnable Playwright-based harness. ```python from agents import Agent, ApplyPatchTool, ShellTool @@ -211,14 +211,14 @@ class NoopComputer(AsyncComputer): environment = "browser" dimensions = (1024, 768) async def screenshot(self): return "" - async def click(self, x, y, button): ... - async def double_click(self, x, y): ... - async def scroll(self, x, y, scroll_x, scroll_y): ... + async def click(self, x, y, button, *, keys=None): ... + async def double_click(self, x, y, *, keys=None): ... + async def scroll(self, x, y, scroll_x, scroll_y, *, keys=None): ... async def type(self, text): ... async def wait(self): ... - async def move(self, x, y): ... + async def move(self, x, y, *, keys=None): ... async def keypress(self, keys): ... - async def drag(self, path): ... + async def drag(self, path, *, keys=None): ... class NoopEditor(ApplyPatchEditor): diff --git a/examples/tools/computer_use.py b/examples/tools/computer_use.py index 1935ec1ecb..4e3f32ef30 100644 --- a/examples/tools/computer_use.py +++ b/examples/tools/computer_use.py @@ -5,6 +5,8 @@ import asyncio import base64 import sys +from collections.abc import AsyncIterator +from contextlib import asynccontextmanager from typing import Any, Literal, Union from playwright.async_api import Browser, Page, Playwright, async_playwright @@ -118,21 +120,50 @@ async def screenshot(self) -> str: png_bytes = await self.page.screenshot(full_page=False) return base64.b64encode(png_bytes).decode("utf-8") - async def click(self, x: int, y: int, button: Button = "left") -> None: + def _normalize_keys(self, keys: list[str] | None) -> list[str]: + if not keys: + return [] + return [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys] + + @asynccontextmanager + async def _hold_keys(self, keys: list[str] | None) -> AsyncIterator[None]: + mapped_keys = self._normalize_keys(keys) + try: + for key in mapped_keys: + await self.page.keyboard.down(key) + yield + finally: + for key in reversed(mapped_keys): + await self.page.keyboard.up(key) + + async def click( + self, x: int, y: int, button: Button = "left", *, keys: list[str] | None = None + ) -> None: playwright_button: Literal["left", "middle", "right"] = "left" # Playwright only supports left, middle, right buttons if button in ("left", "right", "middle"): playwright_button = button # type: ignore - await self.page.mouse.click(x, y, button=playwright_button) + async with self._hold_keys(keys): + await self.page.mouse.click(x, y, button=playwright_button) - async def double_click(self, x: int, y: int) -> None: - await self.page.mouse.dblclick(x, y) + async def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + async with self._hold_keys(keys): + await self.page.mouse.dblclick(x, y) - async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: - await self.page.mouse.move(x, y) - await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") + async def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: + async with self._hold_keys(keys): + await self.page.mouse.move(x, y) + await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") async def type(self, text: str) -> None: await self.page.keyboard.type(text) @@ -140,24 +171,26 @@ async def type(self, text: str) -> None: async def wait(self) -> None: await asyncio.sleep(1) - async def move(self, x: int, y: int) -> None: - await self.page.mouse.move(x, y) + async def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + async with self._hold_keys(keys): + await self.page.mouse.move(x, y) async def keypress(self, keys: list[str]) -> None: - mapped_keys = [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys] + mapped_keys = self._normalize_keys(keys) for key in mapped_keys: await self.page.keyboard.down(key) for key in reversed(mapped_keys): await self.page.keyboard.up(key) - async def drag(self, path: list[tuple[int, int]]) -> None: + async def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: if not path: return - await self.page.mouse.move(path[0][0], path[0][1]) - await self.page.mouse.down() - for px, py in path[1:]: - await self.page.mouse.move(px, py) - await self.page.mouse.up() + async with self._hold_keys(keys): + await self.page.mouse.move(path[0][0], path[0][1]) + await self.page.mouse.down() + for px, py in path[1:]: + await self.page.mouse.move(px, py) + await self.page.mouse.up() async def run_agent( diff --git a/src/agents/computer.py b/src/agents/computer.py index dca2f155b7..c3f49e4f5b 100644 --- a/src/agents/computer.py +++ b/src/agents/computer.py @@ -24,15 +24,23 @@ def screenshot(self) -> str: pass @abc.abstractmethod - def click(self, x: int, y: int, button: Button) -> None: + def click(self, x: int, y: int, button: Button, *, keys: list[str] | None = None) -> None: pass @abc.abstractmethod - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass @abc.abstractmethod - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass @abc.abstractmethod @@ -44,7 +52,7 @@ def wait(self) -> None: pass @abc.abstractmethod - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass @abc.abstractmethod @@ -52,7 +60,7 @@ def keypress(self, keys: list[str]) -> None: pass @abc.abstractmethod - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass @@ -75,15 +83,23 @@ async def screenshot(self) -> str: pass @abc.abstractmethod - async def click(self, x: int, y: int, button: Button) -> None: + async def click(self, x: int, y: int, button: Button, *, keys: list[str] | None = None) -> None: pass @abc.abstractmethod - async def double_click(self, x: int, y: int) -> None: + async def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass @abc.abstractmethod - async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + async def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass @abc.abstractmethod @@ -95,7 +111,7 @@ async def wait(self) -> None: pass @abc.abstractmethod - async def move(self, x: int, y: int) -> None: + async def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass @abc.abstractmethod @@ -103,5 +119,5 @@ async def keypress(self, keys: list[str]) -> None: pass @abc.abstractmethod - async def drag(self, path: list[tuple[int, int]]) -> None: + async def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass diff --git a/src/agents/run_internal/tool_actions.py b/src/agents/run_internal/tool_actions.py index 005a0b163f..eb35601abf 100644 --- a/src/agents/run_internal/tool_actions.py +++ b/src/agents/run_internal/tool_actions.py @@ -185,17 +185,32 @@ async def _execute_action_and_capture( ) -> str: """Execute computer actions (sync or async drivers) and return the final screenshot.""" - async def maybe_call(method_name: str, *args: Any) -> Any: + async def maybe_call(method_name: str, *args: Any, **kwargs: Any) -> Any: method = getattr(computer, method_name, None) if method is None or not callable(method): raise ModelBehaviorError(f"Computer driver missing method {method_name}") - result = method(*args) + supported_kwargs = cls._supported_keyword_arguments(method) + filtered_kwargs = {key: value for key, value in kwargs.items() if value is not None} + unsupported_kwargs = [ + key + for key in filtered_kwargs + if key not in supported_kwargs and None not in supported_kwargs + ] + if unsupported_kwargs: + unsupported = ", ".join(sorted(unsupported_kwargs)) + raise ModelBehaviorError( + "Computer driver method " + f"{method_name!r} does not accept keyword argument(s) {unsupported}. " + "Update the driver to support modifier keys for computer actions." + ) + result = method(*args, **filtered_kwargs) return await result if inspect.isawaitable(result) else result last_action_was_screenshot = False last_screenshot_result: Any = None for action in cls._iter_actions(tool_call): action_type = get_mapping_or_attr(action, "type") + action_keys = cls._normalize_modifier_keys(get_mapping_or_attr(action, "keys")) last_action_was_screenshot = False if action_type == "click": await maybe_call( @@ -203,12 +218,14 @@ async def maybe_call(method_name: str, *args: Any) -> Any: get_mapping_or_attr(action, "x"), get_mapping_or_attr(action, "y"), get_mapping_or_attr(action, "button"), + keys=action_keys, ) elif action_type == "double_click": await maybe_call( "double_click", get_mapping_or_attr(action, "x"), get_mapping_or_attr(action, "y"), + keys=action_keys, ) elif action_type == "drag": path = get_mapping_or_attr(action, "path") or [] @@ -221,6 +238,7 @@ async def maybe_call(method_name: str, *args: Any) -> Any: ) for point in path ], + keys=action_keys, ) elif action_type == "keypress": await maybe_call("keypress", get_mapping_or_attr(action, "keys")) @@ -229,6 +247,7 @@ async def maybe_call(method_name: str, *args: Any) -> Any: "move", get_mapping_or_attr(action, "x"), get_mapping_or_attr(action, "y"), + keys=action_keys, ) elif action_type == "screenshot": last_screenshot_result = await maybe_call("screenshot") @@ -240,6 +259,7 @@ async def maybe_call(method_name: str, *args: Any) -> Any: get_mapping_or_attr(action, "y"), get_mapping_or_attr(action, "scroll_x"), get_mapping_or_attr(action, "scroll_y"), + keys=action_keys, ) elif action_type == "type": await maybe_call("type", get_mapping_or_attr(action, "text")) @@ -285,6 +305,31 @@ def _serialize_action_payload(action: Any) -> Any: return dataclasses.asdict(action) return action + @staticmethod + def _normalize_modifier_keys(keys: Any) -> list[str] | None: + if not keys: + return None + return cast(list[str], keys) + + @staticmethod + def _supported_keyword_arguments(method: Any) -> set[str | None]: + signature = inspect.signature(method) + supported: set[str | None] = { + parameter.name + for parameter in signature.parameters.values() + if parameter.kind + in { + inspect.Parameter.KEYWORD_ONLY, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + } + } + if any( + parameter.kind == inspect.Parameter.VAR_KEYWORD + for parameter in signature.parameters.values() + ): + supported.add(None) + return supported + class LocalShellAction: """Execute local shell commands via the LocalShellTool with lifecycle hooks.""" diff --git a/tests/test_agent_runner.py b/tests/test_agent_runner.py index 0db1032c88..70d299c550 100644 --- a/tests/test_agent_runner.py +++ b/tests/test_agent_runner.py @@ -3891,22 +3891,30 @@ def dimensions(self) -> tuple[int, int]: def screenshot(self) -> str: return "screenshot" - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: pass - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass def keypress(self, keys: list[str]) -> None: pass - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass def type(self, text: str) -> None: diff --git a/tests/test_computer_action.py b/tests/test_computer_action.py index bb6823942d..8feb457dd6 100644 --- a/tests/test_computer_action.py +++ b/tests/test_computer_action.py @@ -5,7 +5,8 @@ hooks and returns the expected ToolCallOutputItem.""" import json -from typing import Any, cast +from collections.abc import Callable +from typing import Any, TypeVar, cast import pytest from openai.types.responses.computer_action import ( @@ -41,6 +42,7 @@ set_tracing_disabled, trace, ) +from agents.exceptions import ModelBehaviorError from agents.items import ToolCallOutputItem from agents.run_internal import run_loop from agents.run_internal.run_loop import ComputerAction, ToolRunComputerAction @@ -50,6 +52,8 @@ from .test_responses import get_text_message from .testing_processor import SPAN_PROCESSOR_TESTING +T = TypeVar("T") + def _get_function_span(tool_name: str) -> dict[str, Any]: for span in SPAN_PROCESSOR_TESTING.get_ordered_spans(including_empty=True): @@ -77,6 +81,10 @@ def _get_agent_span(agent_name: str) -> dict[str, Any]: raise AssertionError(f"Agent span for '{agent_name}' not found") +def _action_with_keys(factory: Callable[..., T], **kwargs: Any) -> T: + return cast(T, cast(Any, factory)(**kwargs)) + + class LoggingComputer(Computer): """A `Computer` implementation that logs calls to its methods for verification in tests.""" @@ -96,14 +104,20 @@ def screenshot(self) -> str: self.calls.append(("screenshot", ())) return self._screenshot_return - def click(self, x: int, y: int, button: str) -> None: - self.calls.append(("click", (x, y, button))) + def _log_mouse_action(self, name: str, *args: Any, keys: list[str] | None = None) -> None: + payload = args if keys is None else (*args, keys) + self.calls.append((name, payload)) - def double_click(self, x: int, y: int) -> None: - self.calls.append(("double_click", (x, y))) + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("click", x, y, button, keys=keys) - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: - self.calls.append(("scroll", (x, y, scroll_x, scroll_y))) + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("double_click", x, y, keys=keys) + + def scroll( + self, x: int, y: int, scroll_x: int, scroll_y: int, *, keys: list[str] | None = None + ) -> None: + self._log_mouse_action("scroll", x, y, scroll_x, scroll_y, keys=keys) def type(self, text: str) -> None: self.calls.append(("type", (text,))) @@ -111,14 +125,14 @@ def type(self, text: str) -> None: def wait(self) -> None: self.calls.append(("wait", ())) - def move(self, x: int, y: int) -> None: - self.calls.append(("move", (x, y))) + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("move", x, y, keys=keys) def keypress(self, keys: list[str]) -> None: self.calls.append(("keypress", (keys,))) - def drag(self, path: list[tuple[int, int]]) -> None: - self.calls.append(("drag", (tuple(path),))) + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: + self._log_mouse_action("drag", tuple(path), keys=keys) class LoggingAsyncComputer(AsyncComputer): @@ -140,14 +154,20 @@ async def screenshot(self) -> str: self.calls.append(("screenshot", ())) return self._screenshot_return - async def click(self, x: int, y: int, button: str) -> None: - self.calls.append(("click", (x, y, button))) + def _log_mouse_action(self, name: str, *args: Any, keys: list[str] | None = None) -> None: + payload = args if keys is None else (*args, keys) + self.calls.append((name, payload)) + + async def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("click", x, y, button, keys=keys) - async def double_click(self, x: int, y: int) -> None: - self.calls.append(("double_click", (x, y))) + async def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("double_click", x, y, keys=keys) - async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: - self.calls.append(("scroll", (x, y, scroll_x, scroll_y))) + async def scroll( + self, x: int, y: int, scroll_x: int, scroll_y: int, *, keys: list[str] | None = None + ) -> None: + self._log_mouse_action("scroll", x, y, scroll_x, scroll_y, keys=keys) async def type(self, text: str) -> None: self.calls.append(("type", (text,))) @@ -155,14 +175,14 @@ async def type(self, text: str) -> None: async def wait(self) -> None: self.calls.append(("wait", ())) - async def move(self, x: int, y: int) -> None: - self.calls.append(("move", (x, y))) + async def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("move", x, y, keys=keys) async def keypress(self, keys: list[str]) -> None: self.calls.append(("keypress", (keys,))) - async def drag(self, path: list[tuple[int, int]]) -> None: - self.calls.append(("drag", (tuple(path),))) + async def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: + self._log_mouse_action("drag", tuple(path), keys=keys) @pytest.mark.asyncio @@ -296,6 +316,76 @@ async def test_get_screenshot_reuses_terminal_batched_screenshot() -> None: assert screenshot_output == "captured" +@pytest.mark.asyncio +async def test_get_screenshot_preserves_modifier_keys_for_sync_driver() -> None: + computer = LoggingComputer(screenshot_return="with_keys") + tool_call = ResponseComputerToolCall( + id="c5", + type="computer_call", + action=_action_with_keys( + ActionClick, type="click", x=4, y=8, button="left", keys=["shift", "ctrl"] + ), + call_id="c5", + pending_safety_checks=[], + status="completed", + ) + + screenshot_output = await ComputerAction._execute_action_and_capture(computer, tool_call) + + assert computer.calls == [ + ("click", (4, 8, "left", ["shift", "ctrl"])), + ("screenshot", ()), + ] + assert screenshot_output == "with_keys" + + +@pytest.mark.asyncio +async def test_get_screenshot_preserves_modifier_keys_for_async_driver() -> None: + computer = LoggingAsyncComputer(screenshot_return="async_keys") + tool_call = ResponseComputerToolCall( + id="c6", + type="computer_call", + action=_action_with_keys( + ActionScroll, type="scroll", x=7, y=9, scroll_x=3, scroll_y=-2, keys=["alt"] + ), + call_id="c6", + pending_safety_checks=[], + status="completed", + ) + + screenshot_output = await ComputerAction._execute_action_and_capture(computer, tool_call) + + assert computer.calls == [ + ("scroll", (7, 9, 3, -2, ["alt"])), + ("screenshot", ()), + ] + assert screenshot_output == "async_keys" + + +@pytest.mark.asyncio +async def test_get_screenshot_raises_for_legacy_driver_missing_modifier_support() -> None: + class LegacyDriver: + def screenshot(self) -> str: + return "legacy" + + def click(self, x: int, y: int, button: str) -> None: + return None + + tool_call = ResponseComputerToolCall( + id="c7", + type="computer_call", + action=_action_with_keys( + ActionClick, type="click", x=1, y=1, button="left", keys=["shift"] + ), + call_id="c7", + pending_safety_checks=[], + status="completed", + ) + + with pytest.raises(ModelBehaviorError, match="does not accept keyword argument\\(s\\) keys"): + await ComputerAction._execute_action_and_capture(LegacyDriver(), tool_call) + + class LoggingRunHooks(RunHooks[Any]): """Capture on_tool_start and on_tool_end invocations.""" diff --git a/tests/test_computer_tool_lifecycle.py b/tests/test_computer_tool_lifecycle.py index cce8665b23..ecbcdf5207 100644 --- a/tests/test_computer_tool_lifecycle.py +++ b/tests/test_computer_tool_lifecycle.py @@ -34,13 +34,21 @@ def dimensions(self) -> tuple[int, int]: def screenshot(self) -> str: return "img" - def click(self, x: int, y: int, button: Button) -> None: + def click(self, x: int, y: int, button: Button, *, keys: list[str] | None = None) -> None: return None - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: return None - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: return None def type(self, text: str) -> None: @@ -49,13 +57,13 @@ def type(self, text: str) -> None: def wait(self) -> None: return None - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: return None def keypress(self, keys: list[str]) -> None: return None - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: return None diff --git a/tests/test_hitl_error_scenarios.py b/tests/test_hitl_error_scenarios.py index d0de312d69..16c062782e 100644 --- a/tests/test_hitl_error_scenarios.py +++ b/tests/test_hitl_error_scenarios.py @@ -102,13 +102,21 @@ def screenshot(self) -> str: self.calls.append("screenshot") return "img" - def click(self, _x: int, _y: int, _button: str) -> None: + def click(self, _x: int, _y: int, _button: str, *, keys: list[str] | None = None) -> None: self.calls.append("click") - def double_click(self, _x: int, _y: int) -> None: + def double_click(self, _x: int, _y: int, *, keys: list[str] | None = None) -> None: self.calls.append("double_click") - def scroll(self, _x: int, _y: int, _scroll_x: int, _scroll_y: int) -> None: + def scroll( + self, + _x: int, + _y: int, + _scroll_x: int, + _scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: self.calls.append("scroll") def type(self, _text: str) -> None: @@ -117,13 +125,13 @@ def type(self, _text: str) -> None: def wait(self) -> None: self.calls.append("wait") - def move(self, _x: int, _y: int) -> None: + def move(self, _x: int, _y: int, *, keys: list[str] | None = None) -> None: self.calls.append("move") def keypress(self, _keys: list[str]) -> None: self.calls.append("keypress") - def drag(self, _path: list[tuple[int, int]]) -> None: + def drag(self, _path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: self.calls.append("drag") diff --git a/tests/test_openai_responses.py b/tests/test_openai_responses.py index 929d5e7985..c561eaa585 100644 --- a/tests/test_openai_responses.py +++ b/tests/test_openai_responses.py @@ -983,22 +983,32 @@ class DummyComputer(AsyncComputer): async def screenshot(self) -> str: return "screenshot" - async def click(self, x: int, y: int, button: str) -> None: + async def click( + self, x: int, y: int, button: str, *, keys: list[str] | None = None + ) -> None: pass - async def double_click(self, x: int, y: int) -> None: + async def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - async def drag(self, path: list[tuple[int, int]]) -> None: + async def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass async def keypress(self, keys: list[str]) -> None: pass - async def move(self, x: int, y: int) -> None: + async def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + async def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass async def type(self, text: str) -> None: @@ -1054,22 +1064,30 @@ def dimensions(self) -> tuple[int, int]: def screenshot(self) -> str: return "screenshot" - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: pass - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass def keypress(self, keys: list[str]) -> None: pass - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass def type(self, text: str) -> None: @@ -1126,22 +1144,30 @@ class DummyComputer(Computer): def screenshot(self) -> str: return "screenshot" - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: pass - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass def keypress(self, keys: list[str]) -> None: pass - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass def type(self, text: str) -> None: @@ -1201,22 +1227,30 @@ def dimensions(self) -> tuple[int, int]: def screenshot(self) -> str: return "screenshot" - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: pass - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass def keypress(self, keys: list[str]) -> None: pass - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass def type(self, text: str) -> None: @@ -1276,22 +1310,30 @@ def dimensions(self) -> tuple[int, int]: def screenshot(self) -> str: return "screenshot" - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: pass - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass def keypress(self, keys: list[str]) -> None: pass - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass def type(self, text: str) -> None: @@ -1352,22 +1394,30 @@ def dimensions(self) -> tuple[int, int]: def screenshot(self) -> str: return "screenshot" - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: pass - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass def keypress(self, keys: list[str]) -> None: pass - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass def type(self, text: str) -> None: diff --git a/tests/test_openai_responses_converter.py b/tests/test_openai_responses_converter.py index a461785ede..17ae29a444 100644 --- a/tests/test_openai_responses_converter.py +++ b/tests/test_openai_responses_converter.py @@ -62,13 +62,21 @@ def dimensions(self): def screenshot(self) -> str: raise NotImplementedError - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: raise NotImplementedError - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: raise NotImplementedError - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: raise NotImplementedError def type(self, text: str) -> None: @@ -77,13 +85,13 @@ def type(self, text: str) -> None: def wait(self) -> None: raise NotImplementedError - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: raise NotImplementedError def keypress(self, keys: list[str]) -> None: raise NotImplementedError - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: raise NotImplementedError diff --git a/tests/test_run_state.py b/tests/test_run_state.py index 56cd61fab2..2a1fb4b626 100644 --- a/tests/test_run_state.py +++ b/tests/test_run_state.py @@ -2810,22 +2810,30 @@ def dimensions(self) -> tuple[int, int]: def screenshot(self) -> str: return "screenshot" - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: pass - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass def keypress(self, keys: list[str]) -> None: pass - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass def type(self, text: str) -> None: @@ -3608,22 +3616,30 @@ def dimensions(self) -> tuple[int, int]: def screenshot(self) -> str: return "screenshot" - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: pass - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass def keypress(self, keys: list[str]) -> None: pass - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass def type(self, text: str) -> None: @@ -3685,22 +3701,30 @@ def dimensions(self) -> tuple[int, int]: def screenshot(self) -> str: return "screenshot" - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: pass - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: pass def keypress(self, keys: list[str]) -> None: pass - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: pass - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: pass def type(self, text: str) -> None: diff --git a/tests/test_run_step_processing.py b/tests/test_run_step_processing.py index 2682ba647d..2fafb4345d 100644 --- a/tests/test_run_step_processing.py +++ b/tests/test_run_step_processing.py @@ -445,13 +445,21 @@ def dimensions(self): def screenshot(self) -> str: return "" # pragma: no cover - def click(self, x: int, y: int, button: str) -> None: + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: return None # pragma: no cover - def double_click(self, x: int, y: int) -> None: + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: return None # pragma: no cover - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: return None # pragma: no cover def type(self, text: str) -> None: @@ -460,13 +468,13 @@ def type(self, text: str) -> None: def wait(self) -> None: return None # pragma: no cover - def move(self, x: int, y: int) -> None: + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: return None # pragma: no cover def keypress(self, keys: list[str]) -> None: return None # pragma: no cover - def drag(self, path: list[tuple[int, int]]) -> None: + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: return None # pragma: no cover