diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..125171df --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,36 @@ +name: CI + +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + + - name: Install dev requirements + run: | + python -m pip install --upgrade pip + pip install -r requirements-dev.txt + + - name: Run linters + env: + PYTHONPATH: ${{ github.workspace }} + run: | + python -m black --check . + python -m isort --check-only . + python -m flake8 . + + - name: Run tests + env: + PYTHONPATH: ${{ github.workspace }} + run: | + python -m pytest -q diff --git a/.gitignore b/.gitignore index cf1f613d..f35edaab 100644 --- a/.gitignore +++ b/.gitignore @@ -161,4 +161,7 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ logs/ -.DS_Store \ No newline at end of file +.DS_Store + +# Local env file for secrets +.env \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..7e0e7f67 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,20 @@ +repos: + - repo: https://github.com/psf/black + rev: 25.11.0 + hooks: + - id: black + language_version: python3.11 + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + - repo: https://github.com/pre-commit/mirrors-flake8 + rev: 7.1.0 + hooks: + - id: flake8 + args: ["--max-line-length=88", "--extend-ignore=E203,W503"] + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..33e66caa --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,6 @@ +pytest +pillow +black +flake8 +isort +pre-commit diff --git a/tests/test_code_agent.py b/tests/test_code_agent.py new file mode 100644 index 00000000..8ff20825 --- /dev/null +++ b/tests/test_code_agent.py @@ -0,0 +1,36 @@ +from gui_agents.s3.agents.code_agent import extract_code_block, execute_code + + +class DummyEnvController: + def __init__(self): + pass + + def run_python_script(self, code): + # emulate running python code + if "print(" in code: + return {"status": "success", "output": "printed", "returncode": 0} + return {"status": "success", "output": "ok", "returncode": 0} + + def run_bash_script(self, code, timeout=30): + return {"status": "success", "output": code, "returncode": 0} + + +def test_extract_code_block(): + s = "Some text ```python\nprint(1)\n``` more" + t, code = extract_code_block(s) + assert t == "python" + assert "print(1)" in code + + +def test_execute_code_python(): + controller = DummyEnvController() + res = execute_code("python", "print(1)", controller) + assert res["status"] == "success" + assert "output" in res + + +def test_execute_code_bash(): + controller = DummyEnvController() + res = execute_code("bash", "echo hi", controller) + assert res["status"] == "success" + assert res["output"] == "echo hi" diff --git a/tests/test_smoke.py b/tests/test_smoke.py new file mode 100644 index 00000000..598fb624 --- /dev/null +++ b/tests/test_smoke.py @@ -0,0 +1,142 @@ +import importlib +import io +import sys + +from PIL import Image + + +# ---- Insert lightweight dummy modules to avoid heavy external deps at import time ---- +class DummyPytesseractModule: + Output = type("Output", (), {})() + + @staticmethod + def image_to_data(image, output_type=None): + # Return minimal dict expected by grounding.get_ocr_elements + return { + "text": [], + "left": [], + "top": [], + "width": [], + "height": [], + "block_num": [], + } + + +sys.modules.setdefault("pytesseract", DummyPytesseractModule) + + +class DummyPyAutoGUI: + def size(self): + return (100, 100) + + def screenshot(self): + return Image.new("RGB", (100, 100)) + + def press(self, *args, **kwargs): + pass + + def click(self, *args, **kwargs): + pass + + def hotkey(self, *args, **kwargs): + pass + + +sys.modules.setdefault("pyautogui", DummyPyAutoGUI()) + +# ---- Monkeypatch LMMAgent to avoid external LLM calls ---- +import gui_agents.s3.core.mllm as mllm # noqa: E402 + + +class FakeLMMAgent: + def __init__(self, engine_params=None, system_prompt=None, engine=None): + self.messages = [] + self.system_prompt = system_prompt or "You are a helpful assistant." + + def reset(self): + self.messages = [ + { + "role": "system", + "content": [{"type": "text", "text": self.system_prompt}], + } + ] + + def add_system_prompt(self, prompt): + self.system_prompt = prompt + + def add_message(self, text_content=None, image_content=None, role=None, **kwargs): + self.messages.append( + { + "role": role or "user", + "content": [{"type": "text", "text": text_content}], + } + ) + + def get_response(self, *args, **kwargs): + # Return a response that contains a single valid action: agent.wait + return "thinking```python\nagent.wait(1.333)\n```" + + +mllm.LMMAgent = FakeLMMAgent +import gui_agents.s3.agents.code_agent as _code_agent +_code_agent.LMMAgent = FakeLMMAgent +import gui_agents.s3.agents.grounding as _grounding +_grounding.LMMAgent = FakeLMMAgent + + +def _create_screenshot_bytes(): + img = Image.new("RGB", (100, 100), color=(73, 109, 137)) + buf = io.BytesIO() + img.save(buf, format="PNG") + return buf.getvalue() + + +def test_agent_smoke_flow(): + from gui_agents.s3.agents.agent_s import AgentS3 + from gui_agents.s3.agents.grounding import OSWorldACI + + screenshot = _create_screenshot_bytes() + + grounding = OSWorldACI( + env=None, + platform="linux", + engine_params_for_generation={"engine_type": "mock"}, + engine_params_for_grounding={ + "engine_type": "mock", + "grounding_width": 100, + "grounding_height": 100, + }, + width=100, + height=100, + ) + + agent = AgentS3( + worker_engine_params={"engine_type": "mock", "model": "gpt-4o"}, + grounding_agent=grounding, + platform="linux", + ) + + info, actions = agent.predict( + instruction="Wait a bit", observation={"screenshot": screenshot} + ) + + assert isinstance(actions, list) and len(actions) > 0 + assert "time.sleep" in actions[0] + + +def test_cli_help_runs_ok(): + # ensure cli module can be imported with dummy pyautogui in sys.modules + cli = importlib.import_module("gui_agents.s3.cli_app") + + # Running help should exit with code 0 + import sys as _sys + + prev_argv = _sys.argv.copy() + try: + _sys.argv = ["agent_s", "--help"] + try: + cli.main() + except SystemExit as e: + assert e.code == 0 + finally: + _sys.argv = prev_argv diff --git a/tests/test_utils_formatters.py b/tests/test_utils_formatters.py new file mode 100644 index 00000000..1b5fffa2 --- /dev/null +++ b/tests/test_utils_formatters.py @@ -0,0 +1,15 @@ +from gui_agents.s3.utils.common_utils import (extract_agent_functions, + parse_code_from_string) + + +def test_parse_code_from_string_normal(): + s = "Intro ```python\nagent.wait(1)\n``` end" + code = parse_code_from_string(s) + assert "agent.wait" in code + + +def test_extract_agent_functions(): + code = "agent.wait(1); agent.click('ok')" + funcs = extract_agent_functions(code) + assert any("agent.wait" in f for f in funcs) + assert any("agent.click" in f for f in funcs) diff --git a/tests/test_worker.py b/tests/test_worker.py new file mode 100644 index 00000000..fef27abb --- /dev/null +++ b/tests/test_worker.py @@ -0,0 +1,79 @@ +import io + +from PIL import Image + +from gui_agents.s3.agents.agent_s import AgentS3 +from gui_agents.s3.agents.grounding import OSWorldACI +from gui_agents.s3.core import mllm as mllm_mod + + +# Monkeypatch LMMAgent used in Worker via module replacement +class FakeLMMAgent: + def __init__(self, engine_params=None, system_prompt=None, engine=None): + self.messages = [] + self.system_prompt = system_prompt or "You are a helpful assistant." + + def reset(self): + self.messages = [ + { + "role": "system", + "content": [{"type": "text", "text": self.system_prompt}], + } + ] + + def add_system_prompt(self, prompt): + self.system_prompt = prompt + + def add_message(self, text_content=None, image_content=None, role=None, **kwargs): + self.messages.append( + { + "role": role or "user", + "content": [{"type": "text", "text": text_content}], + } + ) + + def get_response(self, *args, **kwargs): + return "thinking```python\nagent.wait(0.5)\n```" + + +mllm_mod.LMMAgent = FakeLMMAgent +import gui_agents.s3.agents.code_agent as _code_agent +_code_agent.LMMAgent = FakeLMMAgent +import gui_agents.s3.agents.grounding as _grounding +_grounding.LMMAgent = FakeLMMAgent + + +def _create_screenshot(): + img = Image.new("RGB", (100, 100), color=(73, 109, 137)) + buf = io.BytesIO() + img.save(buf, format="PNG") + return buf.getvalue() + + +def test_worker_generate_next_action(): + screenshot = _create_screenshot() + grounding = OSWorldACI( + env=None, + platform="linux", + engine_params_for_generation={"engine_type": "mock"}, + engine_params_for_grounding={ + "engine_type": "mock", + "grounding_width": 100, + "grounding_height": 100, + }, + width=100, + height=100, + ) + agent = AgentS3( + worker_engine_params={"engine_type": "mock", "model": "gpt-4o"}, + grounding_agent=grounding, + platform="linux", + ) + + info, actions = agent.predict( + instruction="Wait small", observation={"screenshot": screenshot} + ) + + assert isinstance(actions, list) + assert len(actions) == 1 + assert "time.sleep" in actions[0] or "wait" in actions[0]