Skip to content

Commit 40873be

Browse files
Merge remote-tracking branch 'origin/main' into SOLENG-360-refactor/askui-controller-multi-target
2 parents 55cdac0 + 9632197 commit 40873be

18 files changed

Lines changed: 898 additions & 112 deletions

pdm.lock

Lines changed: 23 additions & 27 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ authors = [
55
{name = "askui GmbH", email = "info@askui.com"},
66
]
77
dependencies = [
8-
"askui-agent-os>=26.1.1",
8+
'askui-agent-os>=26.4.1; sys_platform == "darwin"',
9+
'askui-agent-os>=26.5.1; sys_platform != "darwin"',
910
"anthropic>=0.86.0",
1011
"fastapi>=0.115.12",
1112
"fastmcp>=2.3.0",

src/askui/computer_agent.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@ class ComputerAgent(Agent):
7979
`display`.
8080
settings (AgentSettings | None, optional): Provider-based model settings. If `None`, uses the default AskUI model stack.
8181
retry (Retry, optional): The retry instance to use for retrying failed actions. Defaults to `ConfigurableRetry` with exponential backoff. Currently only supported for `locate()` method.
82-
act_tools (list[Tool] | None, optional): Additional tools to make available for the `act()` method.
82+
act_tools (list[Tool] | None, optional): Additional tools to make available for
83+
the `act()` method for every call. Same tools can instead be passed per call
84+
via `act(..., tools=[...])` (see example below).
8385
8486
Example:
8587
Single local machine (the default):
@@ -136,6 +138,26 @@ class ComputerAgent(Agent):
136138
)
137139
agent.act("Kick off a release build on the build server")
138140
```
141+
142+
Example (optional tools for `act()`):
143+
Register tools from `askui.tools.store` (or your own `Tool` implementations)
144+
either on the agent so they apply to all `act()` calls, or only for one call.
145+
146+
```python
147+
from askui import ComputerAgent
148+
from askui.tools.store.computer import ComputerSaveScreenshotTool
149+
150+
with ComputerAgent(
151+
act_tools=[ComputerSaveScreenshotTool(base_dir="/path/to/screenshots")]
152+
) as agent:
153+
agent.act("Take a screenshot and save it as demo/demo.png")
154+
155+
with ComputerAgent() as agent:
156+
agent.act(
157+
"Take a screenshot and save it as demo/demo.png",
158+
tools=[ComputerSaveScreenshotTool(base_dir="/path/to/screenshots")],
159+
)
160+
```
139161
"""
140162

141163
@telemetry.record_call(
@@ -548,8 +570,8 @@ def cli(
548570
549571
with ComputerAgent() as agent:
550572
# Use for Windows
551-
agent.cli(r'start "" "C:\Program Files\VideoLAN\VLC\vlc.exe"') # Start in VLC non-blocking
552-
agent.cli(r'"C:\Program Files\VideoLAN\VLC\vlc.exe"') # Start in VLC blocking
573+
agent.cli(r'start "" "C:\\Program Files\\VideoLAN\\VLC\\vlc.exe"') # Start in VLC non-blocking
574+
agent.cli(r'"C:\\Program Files\\VideoLAN\\VLC\\vlc.exe"') # Start in VLC blocking
553575
554576
# Mac
555577
agent.cli("open -a chrome") # Open Chrome non-blocking for mac

src/askui/models/shared/conversation.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -371,18 +371,18 @@ def _add_message(self, message: MessageParam) -> None:
371371
Args:
372372
message: Message to add
373373
"""
374+
# Report to reporter
375+
self._reporter.add_message(
376+
self.current_speaker.name, message.model_dump(mode="json")
377+
)
378+
374379
if not self._truncation_strategy:
375380
logger.error("No truncation strategy, cannot add message")
376381
return
377382

378383
# Add to truncation strategy
379384
self._truncation_strategy.append_message(message)
380385

381-
# Report to reporter
382-
self._reporter.add_message(
383-
self.current_speaker.name, message.model_dump(mode="json")
384-
)
385-
386386
@tracer.start_as_current_span("_handle_continue_conversation")
387387
def _handle_continue_conversation(self, result: SpeakerResult) -> bool:
388388
"""Handle speaker result status and determine if loop should continue.

src/askui/models/shared/truncation_strategies.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@
2121
from askui.models.shared.token_counter import SimpleTokenCounter
2222
from askui.models.shared.tools import ToolCollection
2323
from askui.prompts.truncation import SUMMARIZE_INSTRUCTION_PROMPT
24+
from askui.reporting import Reporter
2425

2526
if TYPE_CHECKING:
2627
from askui.callbacks.conversation_callback import ConversationCallback
2728
from askui.models.shared.conversation import Conversation
28-
from askui.reporting import Reporter
2929

3030
logger = logging.getLogger(__name__)
3131

@@ -81,6 +81,7 @@ def _summarize_message_history(
8181
system: SystemPrompt | None = None,
8282
tools: ToolCollection | None = None,
8383
provider_options: dict[str, Any] | None = None,
84+
reporter: Reporter | None = None,
8485
) -> MessageParam:
8586
"""Ask the VLM to summarize the conversation history.
8687
@@ -99,6 +100,7 @@ def _summarize_message_history(
99100
Required for cache hits on the prefix.
100101
provider_options: Provider-specific options (e.g. ``betas``)
101102
used by the regular conversation calls.
103+
reporter: Reporter to log errors during summarization to
102104
103105
Returns:
104106
The raw VLM response message.
@@ -121,13 +123,21 @@ def _summarize_message_history(
121123
)
122124
)
123125

124-
return vlm_provider.create_message(
125-
messages=messages_to_summarize,
126-
max_tokens=2048,
127-
system=system,
128-
tools=tools,
129-
provider_options=provider_options,
130-
)
126+
try:
127+
return vlm_provider.create_message(
128+
messages=messages_to_summarize,
129+
max_tokens=2048,
130+
system=system,
131+
tools=tools,
132+
provider_options=provider_options,
133+
)
134+
except Exception as e:
135+
# catch e.g. BadRequestError
136+
error_msg = f"Truncation Failed with error: {e}"
137+
logger.exception(error_msg)
138+
if reporter:
139+
reporter.add_message("TruncationStrategy", error_msg)
140+
raise
131141

132142

133143
def _extract_summary_text(response: MessageParam) -> str:
@@ -405,6 +415,7 @@ def truncate(self) -> None:
405415
system=system,
406416
tools=tools,
407417
provider_options=provider_options,
418+
reporter=self.reporter,
408419
)
409420
if self.reporter:
410421
self.reporter.add_message(
@@ -731,6 +742,7 @@ def truncate(self) -> None:
731742
system=system,
732743
tools=tools,
733744
provider_options=provider_options,
745+
reporter=self.reporter,
734746
)
735747
if self.reporter:
736748
self.reporter.add_message(

src/askui/tools/agent_os.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,3 +741,46 @@ def temporary_select(self, computer_id: str) -> AbstractContextManager[Self]:
741741
```
742742
"""
743743
raise NotImplementedError
744+
745+
def get_file_names(self, absolute_directory_path: str) -> list[str]:
746+
"""
747+
List file names in an absolute directory on the automation target
748+
(desktop Agent OS).
749+
750+
Args:
751+
absolute_directory_path (str): Absolute directory path on the target system.
752+
753+
Returns:
754+
list[str]: Names of files in that directory.
755+
756+
Raises:
757+
NotImplementedError: If the implementation does not support this operation.
758+
"""
759+
raise NotImplementedError
760+
761+
def get_file(self, path: str) -> Image.Image | str:
762+
"""
763+
Read a file from the automation target (desktop Agent OS).
764+
765+
Binary image payloads are returned as `PIL.Image.Image` when recognized;
766+
otherwise UTF-8 text when decodable.
767+
768+
Args:
769+
path (str): File path on the target system.
770+
771+
Returns:
772+
Image.Image | str: Decoded file contents.
773+
774+
Raises:
775+
NotImplementedError: If the implementation does not support this operation.
776+
"""
777+
raise NotImplementedError
778+
779+
def remove_virtual_displays(self) -> None:
780+
"""
781+
Remove virtual displays from the controller, leaving real displays only.
782+
783+
Raises:
784+
NotImplementedError: If the implementation does not support this operation.
785+
"""
786+
raise NotImplementedError

0 commit comments

Comments
 (0)