From a588bf780043c1b5564f37b40702a85d90338ff6 Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Mon, 13 Apr 2026 10:27:49 +0900 Subject: [PATCH 1/2] fix: #2873 preserve computer driver compatibility for modifier keys --- examples/tools/computer_use.py | 65 ++++++-- src/agents/computer.py | 34 +++- src/agents/run_internal/tool_actions.py | 70 ++++++++- tests/test_computer_action.py | 198 +++++++++++++++++++++--- 4 files changed, 324 insertions(+), 43 deletions(-) diff --git a/examples/tools/computer_use.py b/examples/tools/computer_use.py index b974dbfe16..0f076bba96 100644 --- a/examples/tools/computer_use.py +++ b/examples/tools/computer_use.py @@ -5,6 +5,8 @@ import asyncio import base64 import sys +from collections.abc import AsyncIterator +from contextlib import asynccontextmanager from typing import Any, Literal from playwright.async_api import Browser, Page, Playwright, async_playwright @@ -118,21 +120,50 @@ async def screenshot(self) -> str: png_bytes = await self.page.screenshot(full_page=False) return base64.b64encode(png_bytes).decode("utf-8") - async def click(self, x: int, y: int, button: Button = "left") -> None: + def _normalize_keys(self, keys: list[str] | None) -> list[str]: + if not keys: + return [] + return [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys] + + @asynccontextmanager + async def _hold_keys(self, keys: list[str] | None) -> AsyncIterator[None]: + mapped_keys = self._normalize_keys(keys) + try: + for key in mapped_keys: + await self.page.keyboard.down(key) + yield + finally: + for key in reversed(mapped_keys): + await self.page.keyboard.up(key) + + async def click( + self, x: int, y: int, button: Button = "left", *, keys: list[str] | None = None + ) -> None: playwright_button: Literal["left", "middle", "right"] = "left" # Playwright only supports left, middle, right buttons if button in ("left", "right", "middle"): playwright_button = button # type: ignore - await self.page.mouse.click(x, y, button=playwright_button) + async with self._hold_keys(keys): + await self.page.mouse.click(x, y, button=playwright_button) - async def double_click(self, x: int, y: int) -> None: - await self.page.mouse.dblclick(x, y) + async def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + async with self._hold_keys(keys): + await self.page.mouse.dblclick(x, y) - async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: - await self.page.mouse.move(x, y) - await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") + async def scroll( + self, + x: int, + y: int, + scroll_x: int, + scroll_y: int, + *, + keys: list[str] | None = None, + ) -> None: + async with self._hold_keys(keys): + await self.page.mouse.move(x, y) + await self.page.evaluate(f"window.scrollBy({scroll_x}, {scroll_y})") async def type(self, text: str) -> None: await self.page.keyboard.type(text) @@ -140,24 +171,26 @@ async def type(self, text: str) -> None: async def wait(self) -> None: await asyncio.sleep(1) - async def move(self, x: int, y: int) -> None: - await self.page.mouse.move(x, y) + async def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + async with self._hold_keys(keys): + await self.page.mouse.move(x, y) async def keypress(self, keys: list[str]) -> None: - mapped_keys = [CUA_KEY_TO_PLAYWRIGHT_KEY.get(key.lower(), key) for key in keys] + mapped_keys = self._normalize_keys(keys) for key in mapped_keys: await self.page.keyboard.down(key) for key in reversed(mapped_keys): await self.page.keyboard.up(key) - async def drag(self, path: list[tuple[int, int]]) -> None: + async def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: if not path: return - await self.page.mouse.move(path[0][0], path[0][1]) - await self.page.mouse.down() - for px, py in path[1:]: - await self.page.mouse.move(px, py) - await self.page.mouse.up() + async with self._hold_keys(keys): + await self.page.mouse.move(path[0][0], path[0][1]) + await self.page.mouse.down() + for px, py in path[1:]: + await self.page.mouse.move(px, py) + await self.page.mouse.up() async def run_agent( diff --git a/src/agents/computer.py b/src/agents/computer.py index dca2f155b7..14373b830e 100644 --- a/src/agents/computer.py +++ b/src/agents/computer.py @@ -6,8 +6,12 @@ class Computer(abc.ABC): - """A computer implemented with sync operations. The Computer interface abstracts the - operations needed to control a computer or browser.""" + """A computer implemented with sync operations. + + Subclasses provide the local runtime behind `ComputerTool`. Mouse action methods may + also accept a keyword-only `keys` argument to receive held modifier keys when the + driver supports them. + """ @property def environment(self) -> Environment | None: @@ -21,44 +25,57 @@ def dimensions(self) -> tuple[int, int] | None: @abc.abstractmethod def screenshot(self) -> str: + """Return a base64-encoded PNG screenshot of the current display.""" pass @abc.abstractmethod def click(self, x: int, y: int, button: Button) -> None: + """Click `button` at the given `(x, y)` screen coordinates.""" pass @abc.abstractmethod def double_click(self, x: int, y: int) -> None: + """Double-click at the given `(x, y)` screen coordinates.""" pass @abc.abstractmethod def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + """Scroll at `(x, y)` by `(scroll_x, scroll_y)` units.""" pass @abc.abstractmethod def type(self, text: str) -> None: + """Type `text` into the currently focused target.""" pass @abc.abstractmethod def wait(self) -> None: + """Wait until the computer is ready for the next action.""" pass @abc.abstractmethod def move(self, x: int, y: int) -> None: + """Move the mouse cursor to the given `(x, y)` screen coordinates.""" pass @abc.abstractmethod def keypress(self, keys: list[str]) -> None: + """Press the provided keys, such as `["ctrl", "c"]`.""" pass @abc.abstractmethod def drag(self, path: list[tuple[int, int]]) -> None: + """Click-and-drag the mouse along the given sequence of `(x, y)` waypoints.""" pass class AsyncComputer(abc.ABC): - """A computer implemented with async operations. The Computer interface abstracts the - operations needed to control a computer or browser.""" + """A computer implemented with async operations. + + Subclasses provide the local runtime behind `ComputerTool`. Mouse action methods may + also accept a keyword-only `keys` argument to receive held modifier keys when the + driver supports them. + """ @property def environment(self) -> Environment | None: @@ -72,36 +89,45 @@ def dimensions(self) -> tuple[int, int] | None: @abc.abstractmethod async def screenshot(self) -> str: + """Return a base64-encoded PNG screenshot of the current display.""" pass @abc.abstractmethod async def click(self, x: int, y: int, button: Button) -> None: + """Click `button` at the given `(x, y)` screen coordinates.""" pass @abc.abstractmethod async def double_click(self, x: int, y: int) -> None: + """Double-click at the given `(x, y)` screen coordinates.""" pass @abc.abstractmethod async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: + """Scroll at `(x, y)` by `(scroll_x, scroll_y)` units.""" pass @abc.abstractmethod async def type(self, text: str) -> None: + """Type `text` into the currently focused target.""" pass @abc.abstractmethod async def wait(self) -> None: + """Wait until the computer is ready for the next action.""" pass @abc.abstractmethod async def move(self, x: int, y: int) -> None: + """Move the mouse cursor to the given `(x, y)` screen coordinates.""" pass @abc.abstractmethod async def keypress(self, keys: list[str]) -> None: + """Press the provided keys, such as `["ctrl", "c"]`.""" pass @abc.abstractmethod async def drag(self, path: list[tuple[int, int]]) -> None: + """Click-and-drag the mouse along the given sequence of `(x, y)` waypoints.""" pass diff --git a/src/agents/run_internal/tool_actions.py b/src/agents/run_internal/tool_actions.py index 7efbaf496d..f6fe58eb5b 100644 --- a/src/agents/run_internal/tool_actions.py +++ b/src/agents/run_internal/tool_actions.py @@ -189,17 +189,23 @@ async def _execute_action_and_capture( ) -> str: """Execute computer actions (sync or async drivers) and return the final screenshot.""" - async def maybe_call(method_name: str, *args: Any) -> Any: + async def maybe_call(method_name: str, *args: Any, **kwargs: Any) -> Any: method = getattr(computer, method_name, None) if method is None or not callable(method): raise ModelBehaviorError(f"Computer driver missing method {method_name}") - result = method(*args) + filtered_kwargs = cls._filter_supported_kwargs( + method_name=method_name, + method=method, + kwargs=kwargs, + ) + result = method(*args, **filtered_kwargs) return await result if inspect.isawaitable(result) else result last_action_was_screenshot = False last_screenshot_result: Any = None for action in cls._iter_actions(tool_call): action_type = get_mapping_or_attr(action, "type") + action_keys = cls._normalize_modifier_keys(get_mapping_or_attr(action, "keys")) last_action_was_screenshot = False if action_type == "click": await maybe_call( @@ -207,12 +213,14 @@ async def maybe_call(method_name: str, *args: Any) -> Any: get_mapping_or_attr(action, "x"), get_mapping_or_attr(action, "y"), get_mapping_or_attr(action, "button"), + keys=action_keys, ) elif action_type == "double_click": await maybe_call( "double_click", get_mapping_or_attr(action, "x"), get_mapping_or_attr(action, "y"), + keys=action_keys, ) elif action_type == "drag": path = get_mapping_or_attr(action, "path") or [] @@ -225,6 +233,7 @@ async def maybe_call(method_name: str, *args: Any) -> Any: ) for point in path ], + keys=action_keys, ) elif action_type == "keypress": await maybe_call("keypress", get_mapping_or_attr(action, "keys")) @@ -233,6 +242,7 @@ async def maybe_call(method_name: str, *args: Any) -> Any: "move", get_mapping_or_attr(action, "x"), get_mapping_or_attr(action, "y"), + keys=action_keys, ) elif action_type == "screenshot": last_screenshot_result = await maybe_call("screenshot") @@ -244,6 +254,7 @@ async def maybe_call(method_name: str, *args: Any) -> Any: get_mapping_or_attr(action, "y"), get_mapping_or_attr(action, "scroll_x"), get_mapping_or_attr(action, "scroll_y"), + keys=action_keys, ) elif action_type == "type": await maybe_call("type", get_mapping_or_attr(action, "text")) @@ -289,6 +300,61 @@ def _serialize_action_payload(action: Any) -> Any: return dataclasses.asdict(action) return action + @staticmethod + def _normalize_modifier_keys(keys: Any) -> list[str] | None: + if not keys: + return None + return cast(list[str], keys) + + @classmethod + def _filter_supported_kwargs( + cls, + *, + method_name: str, + method: Any, + kwargs: dict[str, Any], + ) -> dict[str, Any]: + filtered_kwargs = {key: value for key, value in kwargs.items() if value is not None} + if not filtered_kwargs: + return {} + + supported_kwargs = cls._supported_keyword_arguments(method) + unsupported_kwargs = [ + key + for key in filtered_kwargs + if key not in supported_kwargs and None not in supported_kwargs + ] + if unsupported_kwargs: + logger.warning( + "Computer driver method %r does not accept keyword argument(s) %s; " + "dropping them and continuing.", + method_name, + ", ".join(sorted(unsupported_kwargs)), + ) + for key in unsupported_kwargs: + filtered_kwargs.pop(key, None) + + return filtered_kwargs + + @staticmethod + def _supported_keyword_arguments(method: Any) -> set[str | None]: + signature = inspect.signature(method) + supported: set[str | None] = { + parameter.name + for parameter in signature.parameters.values() + if parameter.kind + in { + inspect.Parameter.KEYWORD_ONLY, + inspect.Parameter.POSITIONAL_OR_KEYWORD, + } + } + if any( + parameter.kind == inspect.Parameter.VAR_KEYWORD + for parameter in signature.parameters.values() + ): + supported.add(None) + return supported + class LocalShellAction: """Execute local shell commands via the LocalShellTool with lifecycle hooks.""" diff --git a/tests/test_computer_action.py b/tests/test_computer_action.py index dd69e87537..d3c21c3d64 100644 --- a/tests/test_computer_action.py +++ b/tests/test_computer_action.py @@ -5,7 +5,9 @@ hooks and returns the expected ToolCallOutputItem.""" import json -from typing import Any, cast +import logging +from collections.abc import Callable +from typing import Any, TypeVar, cast import pytest from openai.types.responses.computer_action import ( @@ -50,6 +52,8 @@ from .test_responses import get_text_message from .testing_processor import SPAN_PROCESSOR_TESTING +T = TypeVar("T") + def _get_function_span(tool_name: str) -> dict[str, Any]: for span in SPAN_PROCESSOR_TESTING.get_ordered_spans(including_empty=True): @@ -77,6 +81,10 @@ def _get_agent_span(agent_name: str) -> dict[str, Any]: raise AssertionError(f"Agent span for '{agent_name}' not found") +def _action_with_keys(factory: Callable[..., T], **kwargs: Any) -> T: + return cast(T, cast(Any, factory)(**kwargs)) + + class LoggingComputer(Computer): """A `Computer` implementation that logs calls to its methods for verification in tests.""" @@ -96,14 +104,20 @@ def screenshot(self) -> str: self.calls.append(("screenshot", ())) return self._screenshot_return - def click(self, x: int, y: int, button: str) -> None: - self.calls.append(("click", (x, y, button))) + def _log_mouse_action(self, name: str, *args: Any, keys: list[str] | None = None) -> None: + payload = args if keys is None else (*args, keys) + self.calls.append((name, payload)) - def double_click(self, x: int, y: int) -> None: - self.calls.append(("double_click", (x, y))) + def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("click", x, y, button, keys=keys) - def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: - self.calls.append(("scroll", (x, y, scroll_x, scroll_y))) + def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("double_click", x, y, keys=keys) + + def scroll( + self, x: int, y: int, scroll_x: int, scroll_y: int, *, keys: list[str] | None = None + ) -> None: + self._log_mouse_action("scroll", x, y, scroll_x, scroll_y, keys=keys) def type(self, text: str) -> None: self.calls.append(("type", (text,))) @@ -111,14 +125,14 @@ def type(self, text: str) -> None: def wait(self) -> None: self.calls.append(("wait", ())) - def move(self, x: int, y: int) -> None: - self.calls.append(("move", (x, y))) + def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("move", x, y, keys=keys) def keypress(self, keys: list[str]) -> None: self.calls.append(("keypress", (keys,))) - def drag(self, path: list[tuple[int, int]]) -> None: - self.calls.append(("drag", (tuple(path),))) + def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: + self._log_mouse_action("drag", tuple(path), keys=keys) class LoggingAsyncComputer(AsyncComputer): @@ -140,14 +154,20 @@ async def screenshot(self) -> str: self.calls.append(("screenshot", ())) return self._screenshot_return - async def click(self, x: int, y: int, button: str) -> None: - self.calls.append(("click", (x, y, button))) + def _log_mouse_action(self, name: str, *args: Any, keys: list[str] | None = None) -> None: + payload = args if keys is None else (*args, keys) + self.calls.append((name, payload)) - async def double_click(self, x: int, y: int) -> None: - self.calls.append(("double_click", (x, y))) + async def click(self, x: int, y: int, button: str, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("click", x, y, button, keys=keys) - async def scroll(self, x: int, y: int, scroll_x: int, scroll_y: int) -> None: - self.calls.append(("scroll", (x, y, scroll_x, scroll_y))) + async def double_click(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("double_click", x, y, keys=keys) + + async def scroll( + self, x: int, y: int, scroll_x: int, scroll_y: int, *, keys: list[str] | None = None + ) -> None: + self._log_mouse_action("scroll", x, y, scroll_x, scroll_y, keys=keys) async def type(self, text: str) -> None: self.calls.append(("type", (text,))) @@ -155,14 +175,14 @@ async def type(self, text: str) -> None: async def wait(self) -> None: self.calls.append(("wait", ())) - async def move(self, x: int, y: int) -> None: - self.calls.append(("move", (x, y))) + async def move(self, x: int, y: int, *, keys: list[str] | None = None) -> None: + self._log_mouse_action("move", x, y, keys=keys) async def keypress(self, keys: list[str]) -> None: self.calls.append(("keypress", (keys,))) - async def drag(self, path: list[tuple[int, int]]) -> None: - self.calls.append(("drag", (tuple(path),))) + async def drag(self, path: list[tuple[int, int]], *, keys: list[str] | None = None) -> None: + self._log_mouse_action("drag", tuple(path), keys=keys) @pytest.mark.asyncio @@ -296,6 +316,142 @@ async def test_get_screenshot_reuses_terminal_batched_screenshot() -> None: assert screenshot_output == "captured" +@pytest.mark.asyncio +async def test_get_screenshot_preserves_modifier_keys_for_sync_driver() -> None: + computer = LoggingComputer(screenshot_return="with_keys") + tool_call = ResponseComputerToolCall( + id="c5", + type="computer_call", + action=_action_with_keys( + ActionClick, type="click", x=4, y=8, button="left", keys=["shift", "ctrl"] + ), + call_id="c5", + pending_safety_checks=[], + status="completed", + ) + + screenshot_output = await ComputerAction._execute_action_and_capture(computer, tool_call) + + assert computer.calls == [ + ("click", (4, 8, "left", ["shift", "ctrl"])), + ("screenshot", ()), + ] + assert screenshot_output == "with_keys" + + +@pytest.mark.asyncio +async def test_get_screenshot_preserves_modifier_keys_for_async_driver() -> None: + computer = LoggingAsyncComputer(screenshot_return="async_keys") + tool_call = ResponseComputerToolCall( + id="c6", + type="computer_call", + action=_action_with_keys( + ActionScroll, type="scroll", x=7, y=9, scroll_x=3, scroll_y=-2, keys=["alt"] + ), + call_id="c6", + pending_safety_checks=[], + status="completed", + ) + + screenshot_output = await ComputerAction._execute_action_and_capture(computer, tool_call) + + assert computer.calls == [ + ("scroll", (7, 9, 3, -2, ["alt"])), + ("screenshot", ()), + ] + assert screenshot_output == "async_keys" + + +@pytest.mark.asyncio +async def test_get_screenshot_drops_modifier_keys_for_legacy_driver_with_warning( + caplog: pytest.LogCaptureFixture, +) -> None: + class LegacyDriver: + def __init__(self) -> None: + self.calls: list[tuple[str, tuple[Any, ...]]] = [] + + def screenshot(self) -> str: + self.calls.append(("screenshot", ())) + return "legacy" + + def click(self, x: int, y: int, button: str) -> None: + self.calls.append(("click", (x, y, button))) + + tool_call = ResponseComputerToolCall( + id="c7", + type="computer_call", + action=_action_with_keys( + ActionClick, type="click", x=1, y=1, button="left", keys=["shift"] + ), + call_id="c7", + pending_safety_checks=[], + status="completed", + ) + + driver = LegacyDriver() + with caplog.at_level(logging.WARNING, logger="openai.agents"): + screenshot_output = await ComputerAction._execute_action_and_capture(driver, tool_call) + + assert driver.calls == [("click", (1, 1, "left")), ("screenshot", ())] + assert screenshot_output == "legacy" + assert "does not accept keyword argument(s) keys" in caplog.text + + +@pytest.mark.asyncio +async def test_get_screenshot_preserves_modifier_keys_for_kwargs_driver() -> None: + class KwargsDriver: + def __init__(self) -> None: + self.calls: list[tuple[str, tuple[Any, ...], dict[str, Any]]] = [] + + def screenshot(self) -> str: + self.calls.append(("screenshot", (), {})) + return "kwargs" + + def move(self, x: int, y: int, **kwargs: Any) -> None: + self.calls.append(("move", (x, y), kwargs)) + + tool_call = ResponseComputerToolCall( + id="c8", + type="computer_call", + action=_action_with_keys(ActionMove, type="move", x=10, y=12, keys=["meta"]), + call_id="c8", + pending_safety_checks=[], + status="completed", + ) + + driver = KwargsDriver() + screenshot_output = await ComputerAction._execute_action_and_capture(driver, tool_call) + + assert driver.calls == [ + ("move", (10, 12), {"keys": ["meta"]}), + ("screenshot", (), {}), + ] + assert screenshot_output == "kwargs" + + +@pytest.mark.asyncio +async def test_get_screenshot_preserves_modifier_keys_for_batched_actions() -> None: + computer = LoggingComputer(screenshot_return="batched_keys") + tool_call = ResponseComputerToolCall( + id="c9", + type="computer_call", + actions=[ + _action_with_keys(BatchedClick, type="click", x=11, y=12, button="left", keys=["ctrl"]) + ], + call_id="c9", + pending_safety_checks=[], + status="completed", + ) + + screenshot_output = await ComputerAction._execute_action_and_capture(computer, tool_call) + + assert computer.calls == [ + ("click", (11, 12, "left", ["ctrl"])), + ("screenshot", ()), + ] + assert screenshot_output == "batched_keys" + + class LoggingRunHooks(RunHooks[Any]): """Capture on_tool_start and on_tool_end invocations.""" From 83a0e588bac5761ed6e7ce07abfc1d546ca388c4 Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Thu, 16 Apr 2026 03:30:09 +0900 Subject: [PATCH 2/2] fix review comments --- src/agents/run_internal/tool_actions.py | 5 ++- tests/test_computer_action.py | 52 +++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/agents/run_internal/tool_actions.py b/src/agents/run_internal/tool_actions.py index f6fe58eb5b..3ef1ced8f4 100644 --- a/src/agents/run_internal/tool_actions.py +++ b/src/agents/run_internal/tool_actions.py @@ -338,7 +338,10 @@ def _filter_supported_kwargs( @staticmethod def _supported_keyword_arguments(method: Any) -> set[str | None]: - signature = inspect.signature(method) + try: + signature = inspect.signature(method) + except (TypeError, ValueError): + return set() supported: set[str | None] = { parameter.name for parameter in signature.parameters.values() diff --git a/tests/test_computer_action.py b/tests/test_computer_action.py index d3c21c3d64..3aa908c66c 100644 --- a/tests/test_computer_action.py +++ b/tests/test_computer_action.py @@ -397,6 +397,50 @@ def click(self, x: int, y: int, button: str) -> None: assert "does not accept keyword argument(s) keys" in caplog.text +@pytest.mark.asyncio +async def test_get_screenshot_drops_modifier_keys_for_non_introspectable_driver_with_warning( + caplog: pytest.LogCaptureFixture, +) -> None: + class NonIntrospectableClick: + def __init__(self, calls: list[tuple[str, tuple[Any, ...]]]) -> None: + self._calls = calls + + @property + def __signature__(self) -> Any: + raise ValueError("signature unavailable") + + def __call__(self, x: int, y: int, button: str) -> None: + self._calls.append(("click", (x, y, button))) + + class NonIntrospectableDriver: + def __init__(self) -> None: + self.calls: list[tuple[str, tuple[Any, ...]]] = [] + self.click = NonIntrospectableClick(self.calls) + + def screenshot(self) -> str: + self.calls.append(("screenshot", ())) + return "non_introspectable" + + tool_call = ResponseComputerToolCall( + id="c8", + type="computer_call", + action=_action_with_keys( + ActionClick, type="click", x=2, y=5, button="left", keys=["shift"] + ), + call_id="c8", + pending_safety_checks=[], + status="completed", + ) + + driver = NonIntrospectableDriver() + with caplog.at_level(logging.WARNING, logger="openai.agents"): + screenshot_output = await ComputerAction._execute_action_and_capture(driver, tool_call) + + assert driver.calls == [("click", (2, 5, "left")), ("screenshot", ())] + assert screenshot_output == "non_introspectable" + assert "does not accept keyword argument(s) keys" in caplog.text + + @pytest.mark.asyncio async def test_get_screenshot_preserves_modifier_keys_for_kwargs_driver() -> None: class KwargsDriver: @@ -411,10 +455,10 @@ def move(self, x: int, y: int, **kwargs: Any) -> None: self.calls.append(("move", (x, y), kwargs)) tool_call = ResponseComputerToolCall( - id="c8", + id="c9", type="computer_call", action=_action_with_keys(ActionMove, type="move", x=10, y=12, keys=["meta"]), - call_id="c8", + call_id="c9", pending_safety_checks=[], status="completed", ) @@ -433,12 +477,12 @@ def move(self, x: int, y: int, **kwargs: Any) -> None: async def test_get_screenshot_preserves_modifier_keys_for_batched_actions() -> None: computer = LoggingComputer(screenshot_return="batched_keys") tool_call = ResponseComputerToolCall( - id="c9", + id="c10", type="computer_call", actions=[ _action_with_keys(BatchedClick, type="click", x=11, y=12, button="left", keys=["ctrl"]) ], - call_id="c9", + call_id="c10", pending_safety_checks=[], status="completed", )