Skip to content

Commit 87c4902

Browse files
committed
init
0 parents  commit 87c4902

553 files changed

Lines changed: 158936 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/pytest.yml

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
name: Unittests
2+
3+
# Allow to trigger the workflow manually (e.g., when deps changes)
4+
on: [push, workflow_dispatch]
5+
6+
jobs:
7+
pytest-job:
8+
runs-on: ubuntu-latest
9+
timeout-minutes: 30
10+
11+
concurrency:
12+
group: ${{ github.workflow }}-${{ github.ref }}
13+
cancel-in-progress: true
14+
15+
steps:
16+
- uses: actions/checkout@v4
17+
18+
# Set up Python
19+
- uses: actions/setup-python@v5
20+
with:
21+
python-version: "3.11"
22+
23+
# Install AndroidWorld
24+
- name: Install AndroidWorld
25+
run: |
26+
mv pyproject.toml pyproject.toml.bak || true
27+
pip install -r requirements.txt
28+
python setup.py install
29+
30+
# Set PYTHONPATH
31+
- name: Set PYTHONPATH
32+
run: |
33+
echo "PYTHONPATH=$(pwd)" >> $GITHUB_ENV
34+
35+
# Set TMPDIR
36+
- name: Set TMPDIR
37+
run: |
38+
echo "TMPDIR=/tmp/" >> $GITHUB_ENV
39+
40+
# Install additional dependencies
41+
- name: Install additional dependencies
42+
run: |
43+
sudo apt-get update
44+
sudo apt-get install -y fonts-dejavu fonts-liberation ttf-mscorefonts-installer
45+
pip --version
46+
pip install pytest-xdist
47+
pip install -e .[dev]
48+
pip freeze
49+
50+
# Run tests (in parallel)
51+
- name: Run core tests
52+
env:
53+
PYTHONPATH: ${{ env.PYTHONPATH }}
54+
run: |
55+
pytest -vv -n auto --ignore=android_env
56+
57+
- name: Rename pyproject.toml
58+
run: |
59+
mv pyproject.toml.bak pyproject.toml || true

.gitignore

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Compiled python modules.
2+
*.pyc
3+
4+
# Byte-compiled
5+
_pycache__/
6+
.cache/
7+
8+
# Poetry, setuptools, PyPI distribution artifacts.
9+
/*.egg-info
10+
.eggs/
11+
build/
12+
dist/
13+
poetry.lock
14+
15+
# Tests
16+
.pytest_cache/
17+
18+
# Type checking
19+
.pytype/
20+
21+
# Other
22+
*.DS_Store
23+
24+
# PyCharm
25+
.idea

android_world/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Copyright 2025 The android_world Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""android_world API."""
16+
17+
# A new PyPI release will be pushed every time `__version__` is increased.
18+
# When changing this, also update the CHANGELOG.md.
19+
__version__ = '0.1.0'
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Copyright 2025 The android_world Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Utilities for agents."""
16+
17+
import ast
18+
import json
19+
import re
20+
from typing import Any
21+
22+
23+
def extract_json(s: str) -> dict[str, Any] | None:
24+
"""Extracts JSON from string.
25+
26+
Tries conversion with ast and json modules.
27+
28+
Args:
29+
s: A string with a JSON in it. E.g., "{'hello': 'world'}" or from CoT:
30+
"let's think step-by-step, ..., {'hello': 'world'}".
31+
32+
Returns:
33+
JSON object.
34+
"""
35+
pattern = r'\{.*?\}'
36+
match = re.search(pattern, s)
37+
if match:
38+
try:
39+
return ast.literal_eval(match.group())
40+
except (SyntaxError, ValueError) as error:
41+
try:
42+
# Try conversion with json module.
43+
return json.loads(match.group())
44+
except (SyntaxError, ValueError) as error2:
45+
print(
46+
'Cannot extract JSON, skipping due to errors %s and %s',
47+
error,
48+
error2,
49+
)
50+
return None
51+
else:
52+
return None

android_world/agents/base_agent.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# Copyright 2025 The android_world Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Base agent."""
16+
17+
import abc # 用于定义抽象基类
18+
import dataclasses
19+
import logging
20+
import time
21+
from typing import Any
22+
23+
from android_world.env import interface
24+
25+
# 一个数据类,用于表示代理与环境单次交互的结果:
26+
@dataclasses.dataclass()
27+
class AgentInteractionResult:
28+
"""Result of a single agent interaction with the environment.
29+
30+
Attributes:
31+
done: Whether the agent indicates the entire session is done; i.e. this is
32+
the last interaction with the environment and the session will terminate.
33+
data: Environment and agent data from interaction.
34+
"""
35+
36+
done: bool
37+
data: dict[str, Any]
38+
39+
# 与环境交互的代理的基类,提供了代理与环境交互的基本框架
40+
class EnvironmentInteractingAgent(abc.ABC):
41+
"""Base class for an agent that directly interacts with and acts on the environment.
42+
43+
This class provides flexibility in agent design, allowing developers to define
44+
custom action spaces and interaction methods without being confined to a
45+
specific approach.
46+
"""
47+
48+
def __init__(
49+
self,
50+
env: interface.AsyncEnv,
51+
name: str = '',
52+
transition_pause: float | None = 1.0,
53+
):
54+
"""Initializes the agent.
55+
56+
Args:
57+
env: The environment.
58+
name: The agent name.
59+
transition_pause: The pause before grabbing the state. This is required
60+
because typically the agent is grabbing state immediatley after an
61+
action and the screen is still changing. If `None` is provided, then it
62+
uses "auto" mode which dynamically adjusts the wait time based on
63+
environmental feedback.
64+
65+
Raises:
66+
ValueError: If the transition pause is negative.
67+
"""
68+
self._env = env # 环境对象,类型为 interface.AsyncEnv
69+
self._name = name # 代理名称
70+
# 动作执行后等待状态稳定的时间(秒),默认为1.0。如果为None,则使用自动模式,根据环境反馈动态调整等待时间
71+
if transition_pause is not None and transition_pause < 0:
72+
raise ValueError(
73+
f'transition_pause must be non-negative, got {transition_pause}'
74+
)
75+
self._transition_pause = transition_pause
76+
# 最大步骤数,初始为None
77+
self._max_steps = None
78+
79+
# 提供了对私有属性 _transition_pause 、 _env 和 _name 的访问和修改功能。
80+
@property
81+
def transition_pause(self) -> float | None:
82+
return self._transition_pause
83+
84+
@transition_pause.setter
85+
def transition_pause(self, transition_pause: float | None) -> None:
86+
self._transition_pause = transition_pause
87+
88+
@property
89+
def env(self) -> interface.AsyncEnv:
90+
return self._env
91+
92+
@env.setter
93+
def env(self, env: interface.AsyncEnv) -> None:
94+
self._env = env
95+
96+
def set_max_steps(self, max_steps: int) -> None:
97+
self._max_steps = max_steps
98+
99+
# 重置代理状态,调用env的reset方法,并可选择是否返回主页。
100+
def reset(self, go_home: bool = False) -> None:
101+
"""Resets the agent."""
102+
self.env.reset(go_home=go_home)
103+
104+
# 用于在动作执行后获取稳定的环境状态
105+
def get_post_transition_state(self) -> interface.State:
106+
"""Convenience function to get the agent state after the transition."""
107+
# 如果没有暂停时间,则等待环境状态稳定
108+
if self._transition_pause is None:
109+
logging.info('Waiting for screen to stabilize before grabbing state...')
110+
start = time.time()
111+
state = self.env.get_state(wait_to_stabilize=True)
112+
logging.info('Fetched after %.1f seconds.', time.time() - start)
113+
return state
114+
# 否则,暂停指定时间后获取状态
115+
else:
116+
time.sleep(self._transition_pause)
117+
logging.info(
118+
'Pausing {:2.1f} seconds before grabbing state.'.format(
119+
self._transition_pause
120+
)
121+
)
122+
return self.env.get_state(wait_to_stabilize=False)
123+
124+
# 抽象方法,必须由子类实现。它定义了代理在环境中执行一步的接口
125+
@abc.abstractmethod
126+
def step(self, goal: str) -> AgentInteractionResult:
127+
"""Performs a step of the agent on the environment.
128+
129+
Args:
130+
goal: The goal.
131+
132+
Returns:
133+
Done and agent & observation data.
134+
"""
135+
136+
@property
137+
def name(self) -> str:
138+
return self._name
139+
140+
@name.setter
141+
def name(self, name: str) -> None:
142+
self._name = name

0 commit comments

Comments
 (0)