Skip to content
Open
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ plugins = pydantic.mypy,sqlalchemy.ext.mypy.plugin
exclude = (?x)(
^src/askui/models/ui_tars_ep/ui_tars_api\.py$
| ^src/askui/tools/askui/askui_ui_controller_grpc/.*$
| ^venv/.*$
| ^\.venv/.*$
)
mypy_path = src:tests
explicit_package_bases = true
Expand Down
5 changes: 4 additions & 1 deletion src/askui/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""AskUI Python SDK"""

__version__ = "0.32.1"
__version__ = "0.33.0"

import logging
import os
Expand Down Expand Up @@ -45,6 +45,7 @@
from .models.types.response_schemas import ResponseSchema, ResponseSchemaBase
from .retry import ConfigurableRetry, Retry
from .tools import ModifierKey, PcKey
from .tools.askui import LocalAgentOsTargetComputer, RemoteAgentOsTargetComputer
from .utils.image_utils import ImageSource
from .utils.source_utils import InputSource

Expand All @@ -69,6 +70,8 @@
logging.getLogger(__name__).addHandler(logging.NullHandler())

__all__ = [
"RemoteAgentOsTargetComputer",
"LocalAgentOsTargetComputer",
"Agent",
"AutomationError",
"ComputerAgent",
Expand Down
85 changes: 79 additions & 6 deletions src/askui/computer_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
create_computer_agent_prompt,
)
from askui.tools.computer import (
ComputerGetCurrentComputerTargetIdTool,
ComputerGetMousePositionTool,
ComputerGetSystemInfoTool,
ComputerKeyboardPressedTool,
ComputerKeyboardReleaseTool,
ComputerKeyboardTapTool,
ComputerListAgentOsTargetComputersTool,
ComputerListDisplaysTool,
ComputerMouseClickTool,
ComputerMouseHoldDownTool,
Expand All @@ -31,14 +33,15 @@
ComputerRetrieveActiveDisplayTool,
ComputerScreenshotTool,
ComputerSetActiveDisplayTool,
ComputerSwitchAgentOsTargetComputerTool,
ComputerTypeTool,
)
from askui.tools.exception_tool import ExceptionTool

from .reporting import CompositeReporter, Reporter
from .retry import Retry
from .tools import AgentToolbox, ComputerAgentOsFacade, ModifierKey, PcKey
from .tools.askui import AskUiControllerClient
from .tools.askui import AgentOsTargetComputer, AskUiControllerClient

logger = logging.getLogger(__name__)

Expand All @@ -50,15 +53,37 @@ class ComputerAgent(Agent):
This agent can perform various UI interactions like clicking, typing, scrolling, and more.
It uses computer vision models to locate UI elements and execute actions on them.

A single `ComputerAgent` can drive **one or more machines** through the
`agent_os_target_computers` argument. Each entry is an Agent OS target
computer (local subprocess or remote gRPC endpoint) identified by a stable
`computer_id`. At any moment one target is *active* and receives all
explicit calls (`click`, `type`, `keyboard`, ...). The active target can be
changed at runtime via
`agent.tools.os.switch_agent_os_target_computer(computer_id)` or scoped to a
block using `agent.tools.os.temporary_select(computer_id)`. The `act()`
model is also given list/switch/get-current tools so it can orchestrate
work across machines on its own (e.g. read something on one computer and
re-enter it on another).

Args:
display (int, optional): The display number to use for screen interactions. Defaults to `1`.
display (int, optional): The display number to use for screen interactions on the default local target. Ignored when `agent_os_target_computers` is provided. Defaults to `1`.
reporters (list[Reporter] | None, optional): List of reporter instances for logging and reporting. If `None`, an empty list is used.
tools (AgentToolbox | None, optional): Custom toolbox instance. If `None`, a default one will be created with `AskUiControllerClient`.
agent_os_target_computers (list[AgentOsTargetComputer] | None, optional):
Target computers the agent can route actions to. May mix one
`LocalAgentOsTargetComputer` (managing a controller subprocess on this
machine) with any number of `RemoteAgentOsTargetComputer`s pointing at
controllers already running on other machines. Constraints: at
least one target, at most one local, and remote `address`es plus
all `computer_id`s must be unique. The first entry becomes the
initial active target. Defaults to a single local target bound to
`display`.
settings (AgentSettings | None, optional): Provider-based model settings. If `None`, uses the default AskUI model stack.
retry (Retry, optional): The retry instance to use for retrying failed actions. Defaults to `ConfigurableRetry` with exponential backoff. Currently only supported for `locate()` method.
act_tools (list[Tool] | None, optional): Additional tools to make available for the `act()` method.

Example:
Single local machine (the default):

```python
from askui import ComputerAgent

Expand All @@ -67,35 +92,80 @@ class ComputerAgent(Agent):
agent.type("Hello World")
agent.act("Open settings menu")
```

Example:
Research on one machine and write up the findings on another. The
first target in the list is the active one; `temporary_select`
re-routes a block of explicit calls and restores the previous
active target on exit.

```python
from askui import ComputerAgent
from askui.tools.askui import LocalAgentOsTargetComputer, RemoteAgentOsTargetComputer

with ComputerAgent(
agent_os_target_computers=[
LocalAgentOsTargetComputer(computer_id="research-box"),
RemoteAgentOsTargetComputer(
address="192.168.1.42:26000",
description="Writer box with a text editor open",
computer_id="writer-box",
),
],
) as agent:
agent.act(
"On research-box, open a browser, google 'askui', and read "
"the top results to gather key facts about what AskUI is, "
"what it does, and notable features. Then switch to "
"writer-box and write a Markdown document titled "
"'AskUI Findings' summarizing those facts as a bulleted "
"list in the open text editor."
)
```

Example:
Register a remote machine at runtime:

```python
from askui import ComputerAgent

with ComputerAgent() as agent:
agent.tools.os.add_remote_agent_os_target_computer(
address="10.0.0.5:26000",
description="Build server",
)
agent.act("Kick off a release build on the build server")
```
"""

@telemetry.record_call(
exclude={
"reporters",
"tools",
"settings",
"act_tools",
"callbacks",
"truncation_strategy",
"agent_os_target_computers",
}
)
@validate_call(config=ConfigDict(arbitrary_types_allowed=True))
def __init__(
self,
display: Annotated[int, Field(ge=1)] = 1,
reporters: list[Reporter] | None = None,
tools: AgentToolbox | None = None,
agent_os_target_computers: list[AgentOsTargetComputer] | None = None,
settings: AgentSettings | None = None,
retry: Retry | None = None,
act_tools: list[Tool] | None = None,
callbacks: list[ConversationCallback] | None = None,
truncation_strategy: TruncationStrategy | None = None,
) -> None:
reporter = CompositeReporter(reporters=reporters)
self.tools = tools or AgentToolbox(
self.tools = AgentToolbox(
agent_os=AskUiControllerClient(
display=display,
reporter=reporter,
agent_os_target_computers=agent_os_target_computers,
)
)
super().__init__(
Expand Down Expand Up @@ -519,6 +589,9 @@ def get_default_tools() -> list[Tool]:
ComputerListDisplaysTool(),
ComputerRetrieveActiveDisplayTool(),
ComputerSetActiveDisplayTool(),
ComputerListAgentOsTargetComputersTool(),
ComputerSwitchAgentOsTargetComputerTool(),
ComputerGetCurrentComputerTargetIdTool(),
]


Expand Down
29 changes: 20 additions & 9 deletions src/askui/models/shared/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,10 +362,10 @@ def __init__(

@property
def agent_os(self) -> AgentOs | AndroidAgentOs:
"""Get the agent OS.
"""Get the AgentOS.

Returns:
AgentOs | AndroidAgentOs: The agent OS instance.
AgentOs | AndroidAgentOs: The AgentOS instance.
"""
if self._agent_os is None:
msg = (
Expand All @@ -381,7 +381,7 @@ def agent_os(self, agent_os: AgentOs | AndroidAgentOs) -> None:
self._agent_os = agent_os

def is_agent_os_initialized(self) -> bool:
"""Check if the agent OS is initialized."""
"""Check if the AgentOS is initialized."""
return self._agent_os is not None


Expand Down Expand Up @@ -471,10 +471,10 @@ def __init__(
self.add_agent_os(agent_os)

def add_agent_os(self, agent_os: AgentOs | AndroidAgentOs) -> None:
"""Add an agent OS to the collection.
"""Add an AgentOS to the collection.

Args:
agent_os (AgentOs | AndroidAgentOs): The agent OS instance to add.
agent_os (AgentOs | AndroidAgentOs): The AgentOS instance to add.
"""
self._agent_os_list.append(agent_os)

Expand Down Expand Up @@ -534,12 +534,23 @@ def reset_tools(self, tools: list[Tool] | None = None) -> None:
"""Reset the tools in the collection with new tools."""
self._tools = tools or []

def get_agent_os_by_tags(self, tags: list[str]) -> AgentOs | AndroidAgentOs:
"""Get an agent OS by tags."""
def get_agent_os_by_tags(
self, required_tags: list[str]
) -> AgentOs | AndroidAgentOs:
"""
Find the first registered AgentOS whose tags are a superset of
`required_tags`.

Every tag in `required_tags` must appear in the AgentOS's tags; the
AgentOS may declare additional tags beyond those.

Raises:
ValueError: when no registered AgentOS satisfies the required tags.
"""
for agent_os in self._agent_os_list:
if all(tag in agent_os.tags for tag in tags):
if all(required in agent_os.tags for required in required_tags):
return agent_os
msg = f"Agent OS with tags [{', '.join(tags)}] not found"
msg = f"No AgentOS satisfies required tags [{', '.join(required_tags)}]"
raise ValueError(msg)

def _initialize_tools(self) -> None:
Expand Down
65 changes: 65 additions & 0 deletions src/askui/tools/agent_os.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from abc import ABC, abstractmethod
from contextlib import AbstractContextManager
from typing import TYPE_CHECKING, Literal

from PIL import Image
from pydantic import BaseModel, ConfigDict, Field
from typing_extensions import Self

from askui.models.shared.tool_tags import ToolTags

if TYPE_CHECKING:
from askui.tools.askui.agent_os_target_computer import (
AgentOsTargetComputer,
RemoteAgentOsTargetComputer,
)
from askui.tools.askui.askui_ui_controller_grpc.generated import (
Controller_V1_pb2 as controller_v1_pbs,
)
Expand Down Expand Up @@ -676,3 +682,62 @@ def set_window_in_focus(self, process_id: int, window_id: int) -> None:
window_id (int): The ID of the window to set as active.
"""
raise NotImplementedError

def add_agent_os_target_computer(
self, agent_os_target_computer: "AgentOsTargetComputer"
) -> "AgentOsTargetComputer":
"""Register an additional target computer. Auto-connects if connected."""
raise NotImplementedError

def add_remote_agent_os_target_computer(
self,
address: str,
description: str,
) -> "RemoteAgentOsTargetComputer":
"""Register an additional remote target computer."""
raise NotImplementedError

def reset_agent_os_target_computers(
self,
agent_os_target_computers: "list[AgentOsTargetComputer] | None" = None,
) -> None:
"""Disconnect (if connected) and replace the target computer list."""
raise NotImplementedError

def list_agent_os_target_computers(self) -> "list[AgentOsTargetComputer]":
"""Return all registered target computers."""
raise NotImplementedError

def get_current_computer_target_id(self, report: bool = True) -> str:
"""Return the `computer_id` of the currently active target computer."""
raise NotImplementedError

def switch_agent_os_target_computer(
self, computer_id: str
) -> "AgentOsTargetComputer":
"""Switch the active target computer by its `computer_id`."""
raise NotImplementedError

def temporary_select(self, computer_id: str) -> AbstractContextManager[Self]:
"""
Temporarily switch the active target computer for the duration of a `with`
block, then restore the previously-active target on exit (even if the
block raises).

Args:
computer_id (str): Computer id of the target to activate inside the
block.

Returns:
AbstractContextManager[Self]: Context manager that yields this
`AgentOs` with the selected target active.

Example:
```python
with agent_os.temporary_select('Remote-Machine') as remote_machine:
img = remote_machine.screenshot()
img.save("remote_machine.png")
# previous active target restored here
```
"""
raise NotImplementedError
25 changes: 25 additions & 0 deletions src/askui/tools/android/agent_os.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from abc import ABC, abstractmethod
from contextlib import AbstractContextManager
from typing import List, Literal

from PIL import Image
from typing_extensions import Self

from askui.tools.android.uiautomator_hierarchy import UIElementCollection

Expand Down Expand Up @@ -502,3 +504,26 @@ def get_ui_elements(self) -> UIElementCollection:
Gets the UI elements.
"""
raise NotImplementedError

def temporary_select(self, device_sn: str) -> AbstractContextManager[Self]:
Comment thread
programminx-askui marked this conversation as resolved.
"""
Temporarily switch the active device for the duration of a `with` block,
then restore the previously-active device on exit (even if the block
raises).

Args:
device_sn (str): Serial number of the device to activate inside the
block.

Returns:
AbstractContextManager[Self]: Context manager that yields this
`AndroidAgentOs` with `device_sn` active.

Example:
```python
with android_agent_os.temporary_select('table_phone') as table_phone:
table_phone.tap(100, 200)
# previous active device restored here
```
"""
raise NotImplementedError
Loading
Loading