Skip to content

Commit 9632197

Browse files
Merge pull request #277 from askui/feat/agent-os-getfile-getfilelist-clear-virtual-displays
feat: add GetFile, GetFileList, ClearVirtualDisplays agent OS commands
2 parents 9b6de63 + 4445a4c commit 9632197

17 files changed

Lines changed: 883 additions & 97 deletions

pdm.lock

Lines changed: 23 additions & 27 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ authors = [
55
{name = "askui GmbH", email = "info@askui.com"},
66
]
77
dependencies = [
8-
"askui-agent-os>=26.1.1",
8+
'askui-agent-os>=26.4.1; sys_platform == "darwin"',
9+
'askui-agent-os>=26.5.1; sys_platform != "darwin"',
910
"anthropic>=0.86.0",
1011
"fastapi>=0.115.12",
1112
"fastmcp>=2.3.0",

src/askui/computer_agent.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,9 @@ class ComputerAgent(Agent):
5656
tools (AgentToolbox | None, optional): Custom toolbox instance. If `None`, a default one will be created with `AskUiControllerClient`.
5757
settings (AgentSettings | None, optional): Provider-based model settings. If `None`, uses the default AskUI model stack.
5858
retry (Retry, optional): The retry instance to use for retrying failed actions. Defaults to `ConfigurableRetry` with exponential backoff. Currently only supported for `locate()` method.
59-
act_tools (list[Tool] | None, optional): Additional tools to make available for the `act()` method.
59+
act_tools (list[Tool] | None, optional): Additional tools to make available for
60+
the `act()` method for every call. Same tools can instead be passed per call
61+
via `act(..., tools=[...])` (see example below).
6062
6163
Example:
6264
```python
@@ -67,6 +69,26 @@ class ComputerAgent(Agent):
6769
agent.type("Hello World")
6870
agent.act("Open settings menu")
6971
```
72+
73+
Example (optional tools for `act()`):
74+
Register tools from `askui.tools.store` (or your own `Tool` implementations)
75+
either on the agent so they apply to all `act()` calls, or only for one call.
76+
77+
```python
78+
from askui import ComputerAgent
79+
from askui.tools.store.computer import ComputerSaveScreenshotTool
80+
81+
with ComputerAgent(
82+
act_tools=[ComputerSaveScreenshotTool(base_dir="/path/to/screenshots")]
83+
) as agent:
84+
agent.act("Take a screenshot and save it as demo/demo.png")
85+
86+
with ComputerAgent() as agent:
87+
agent.act(
88+
"Take a screenshot and save it as demo/demo.png",
89+
tools=[ComputerSaveScreenshotTool(base_dir="/path/to/screenshots")],
90+
)
91+
```
7092
"""
7193

7294
@telemetry.record_call(

src/askui/tools/agent_os.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,3 +676,46 @@ def set_window_in_focus(self, process_id: int, window_id: int) -> None:
676676
window_id (int): The ID of the window to set as active.
677677
"""
678678
raise NotImplementedError
679+
680+
def get_file_names(self, absolute_directory_path: str) -> list[str]:
681+
"""
682+
List file names in an absolute directory on the automation target
683+
(desktop Agent OS).
684+
685+
Args:
686+
absolute_directory_path (str): Absolute directory path on the target system.
687+
688+
Returns:
689+
list[str]: Names of files in that directory.
690+
691+
Raises:
692+
NotImplementedError: If the implementation does not support this operation.
693+
"""
694+
raise NotImplementedError
695+
696+
def get_file(self, path: str) -> Image.Image | str:
697+
"""
698+
Read a file from the automation target (desktop Agent OS).
699+
700+
Binary image payloads are returned as `PIL.Image.Image` when recognized;
701+
otherwise UTF-8 text when decodable.
702+
703+
Args:
704+
path (str): File path on the target system.
705+
706+
Returns:
707+
Image.Image | str: Decoded file contents.
708+
709+
Raises:
710+
NotImplementedError: If the implementation does not support this operation.
711+
"""
712+
raise NotImplementedError
713+
714+
def remove_virtual_displays(self) -> None:
715+
"""
716+
Remove virtual displays from the controller, leaving real displays only.
717+
718+
Raises:
719+
NotImplementedError: If the implementation does not support this operation.
720+
"""
721+
raise NotImplementedError

src/askui/tools/askui/askui_controller.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import base64
12
import logging
23
import pathlib
34
import subprocess
@@ -43,6 +44,8 @@
4344
DeleteRenderObjectCommand,
4445
GetActiveProcessCommand,
4546
GetActiveWindowCommand,
47+
GetFileCommand,
48+
GetFileNamesCommand,
4649
GetMousePositionCommand,
4750
GetSystemInfoCommand,
4851
Guid,
@@ -51,6 +54,7 @@
5154
Location,
5255
Message,
5356
Parameter3,
57+
RemoveVirtualDisplaysCommand,
5458
RenderImage,
5559
RenderObjectId,
5660
RenderObjectStyle,
@@ -66,10 +70,13 @@
6670
GetActiveProcessResponseModel,
6771
GetActiveWindowResponse,
6872
GetActiveWindowResponseModel,
73+
GetFileNamesResponse,
74+
GetFileResponse,
6975
GetSystemInfoResponse,
7076
GetSystemInfoResponseModel,
7177
)
7278
from askui.utils.annotated_image import AnnotatedImage
79+
from askui.utils.image_utils import base64_to_image
7380

7481
from ..utils import process_exists, wait_for_port
7582
from .exceptions import (
@@ -217,6 +224,12 @@ def connect(self) -> None:
217224
self._start_session()
218225
self._start_execution()
219226
self.set_display(self._display)
227+
if self._settings.clean_virtual_displays:
228+
logger.info(
229+
"clean_virtual_displays is enabled. Removing all virtual displays ... "
230+
)
231+
self.remove_virtual_displays()
232+
logger.info("Virtual displays removed.")
220233

221234
def _get_stub(self) -> controller_v1.ControllerAPIStub:
222235
assert isinstance(self._stub, controller_v1.ControllerAPIStub), (
@@ -1294,3 +1307,109 @@ def set_window_in_focus(self, process_id: int, window_id: int) -> None:
12941307
_window_id = Parameter3(root=window_id)
12951308
command = SetActiveWindowCommand(parameters=[_process_id, _window_id])
12961309
self._send_command(command)
1310+
1311+
def get_file_names(self, absolute_directory_path: str) -> list[str]:
1312+
"""
1313+
Get the file names in the given absolute directory on the device under
1314+
automation.
1315+
1316+
Args:
1317+
absolute_directory_path (str): The absolute directory path to list
1318+
file names from.
1319+
1320+
Returns:
1321+
list[str]: The file names returned by the controller.
1322+
"""
1323+
assert isinstance(self._stub, controller_v1.ControllerAPIStub), (
1324+
"Stub is not initialized"
1325+
)
1326+
self._reporter.add_message(
1327+
"AgentOS", f"get_file_names({absolute_directory_path})"
1328+
)
1329+
command = GetFileNamesCommand(parameters=[absolute_directory_path])
1330+
res = self._send_command(command).message.command
1331+
if not isinstance(res, GetFileNamesResponse):
1332+
message = f"unexpected response type: {res}"
1333+
raise DesktopAgentOsError(message)
1334+
if res.error is not None:
1335+
raise DesktopAgentOsError(res.error)
1336+
if res.response is None:
1337+
message = f"{type(res).__name__} is missing both error and response"
1338+
raise DesktopAgentOsError(message)
1339+
self._reporter.add_message(
1340+
"AgentOS", f"get_file_names({absolute_directory_path}) -> {res.response}"
1341+
)
1342+
return res.response.fileNames
1343+
1344+
def get_file(self, path: str) -> Image.Image | str:
1345+
"""
1346+
Get the contents of a file at the given path on the device under
1347+
automation.
1348+
1349+
The controller returns the file as a Base64-encoded string, which is
1350+
decoded and returned as `PIL.Image.Image` when the bytes can be opened
1351+
as an image (PNG, JPEG, BMP, GIF, WebP, TIFF, ...), or as `str` when
1352+
they decode cleanly as UTF-8 text.
1353+
1354+
Args:
1355+
path (str): The file path to read on the device under automation.
1356+
1357+
Returns:
1358+
Image.Image | str: The decoded file contents.
1359+
1360+
Raises:
1361+
DesktopAgentOsError: If the file cannot be read or the response is invalid.
1362+
"""
1363+
assert isinstance(self._stub, controller_v1.ControllerAPIStub), (
1364+
"Stub is not initialized"
1365+
)
1366+
self._reporter.add_message("AgentOS", f"get_file({path})")
1367+
command = GetFileCommand(parameters=[path])
1368+
res = self._send_command(command).message.command
1369+
if not isinstance(res, GetFileResponse):
1370+
message = f"unexpected response type: {res}"
1371+
raise DesktopAgentOsError(message)
1372+
if res.error is not None:
1373+
raise DesktopAgentOsError(res.error)
1374+
if res.response is None:
1375+
message = f"{type(res).__name__} is missing both error and response"
1376+
raise DesktopAgentOsError(message)
1377+
decoded = self._decode_file_payload(res.response.file.content)
1378+
if isinstance(decoded, Image.Image):
1379+
detail = f"image ({decoded.format}, {decoded.size[0]}x{decoded.size[1]})"
1380+
self._reporter.add_message(
1381+
"AgentOS", f"get_file({path}) -> {detail}", decoded
1382+
)
1383+
return decoded
1384+
1385+
detail = f"text ({len(decoded)} chars)"
1386+
self._reporter.add_message("AgentOS", f"get_file({path}) -> {detail}")
1387+
return decoded
1388+
1389+
def remove_virtual_displays(self) -> None:
1390+
"""
1391+
Remove all virtual displays from the controller, leaving only real
1392+
displays active.
1393+
"""
1394+
assert isinstance(self._stub, controller_v1.ControllerAPIStub), (
1395+
"Stub is not initialized"
1396+
)
1397+
self._reporter.add_message("AgentOS", "remove_virtual_displays()")
1398+
command = RemoveVirtualDisplaysCommand()
1399+
self._send_command(command)
1400+
self._reporter.add_message("AgentOS", "remove_virtual_displays() -> done")
1401+
1402+
@staticmethod
1403+
def _decode_file_payload(base64_data: str) -> Image.Image | str:
1404+
try:
1405+
return base64_to_image(base64_data)
1406+
except ValueError:
1407+
pass
1408+
data = base64.b64decode(base64_data, validate=True)
1409+
if b"\x00" not in data:
1410+
try:
1411+
return data.decode("utf-8")
1412+
except UnicodeDecodeError:
1413+
pass
1414+
message = "File contents are neither a supported image nor UTF-8 text"
1415+
raise DesktopAgentOsError(message)

src/askui/tools/askui/askui_controller_client_settings.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,13 @@ class AskUiControllerClientSettings(BaseSettings):
2222
"Controller server. Defaults to True.",
2323
)
2424

25+
clean_virtual_displays: bool = Field(
26+
default=False,
27+
description=(
28+
"Whether to clean virtual displays after the controller is started."
29+
"Default: False"
30+
),
31+
)
32+
2533

2634
__all__ = ["AskUiControllerClientSettings"]

src/askui/tools/askui/askui_ui_controller_grpc/generated/AgentOS_Send_Request_2501.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,10 @@
55
from __future__ import annotations
66

77
from enum import Enum
8-
from typing import Dict, List, Literal, Optional, Union
8+
from typing import Any, Dict, List, Literal, Optional, Union
99

10-
from pydantic import BaseModel, ConfigDict, Field, RootModel, confloat, conint, constr
10+
from pydantic import (BaseModel, ConfigDict, Field, RootModel, confloat,
11+
conint, constr)
1112

1213

1314
class ParameterEnum(Enum):
@@ -393,6 +394,20 @@ class LoadCharacterMapCommand(BaseModel):
393394
None, max_length=1, min_length=1
394395
)
395396

397+
class GetFileNamesCommand(BaseModel):
398+
name: Literal['GetFileNames'] = 'GetFileNames'
399+
parameters: list[str] = Field(..., max_length=1, min_length=1)
400+
401+
402+
class GetFileCommand(BaseModel):
403+
name: Literal['GetFile'] = 'GetFile'
404+
parameters: list[str] = Field(..., max_length=1, min_length=1)
405+
406+
407+
class RemoveVirtualDisplaysCommand(BaseModel):
408+
name: Literal['RemoveVirtualDisplays'] = 'RemoveVirtualDisplays'
409+
parameters: List[str] = Field(default=[], max_length=0)
410+
396411
Command =Union[
397412
GetSystemInfoCommand,
398413
GetMousePositionCommand,
@@ -412,6 +427,9 @@ class LoadCharacterMapCommand(BaseModel):
412427
SetActiveProcessCommand,
413428
GetActiveWindowCommand,
414429
SetActiveWindowCommand,
430+
GetFileNamesCommand,
431+
GetFileCommand,
432+
RemoveVirtualDisplaysCommand,
415433
]
416434

417435
class Message(BaseModel):

0 commit comments

Comments
 (0)