diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 00000000..125171df
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,36 @@
+name: CI
+
+on:
+ push:
+ branches: [main, master]
+ pull_request:
+ branches: [main, master]
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v4
+ with:
+ python-version: "3.11"
+
+ - name: Install dev requirements
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r requirements-dev.txt
+
+ - name: Run linters
+ env:
+ PYTHONPATH: ${{ github.workspace }}
+ run: |
+ python -m black --check .
+ python -m isort --check-only .
+ python -m flake8 .
+
+ - name: Run tests
+ env:
+ PYTHONPATH: ${{ github.workspace }}
+ run: |
+ python -m pytest -q
diff --git a/.gitignore b/.gitignore
index cf1f613d..f35edaab 100644
--- a/.gitignore
+++ b/.gitignore
@@ -161,4 +161,7 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
logs/
-.DS_Store
\ No newline at end of file
+.DS_Store
+
+# Local env file for secrets
+.env
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..7e0e7f67
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,20 @@
+repos:
+ - repo: https://github.com/psf/black
+ rev: 25.11.0
+ hooks:
+ - id: black
+ language_version: python3.11
+ - repo: https://github.com/PyCQA/isort
+ rev: 5.12.0
+ hooks:
+ - id: isort
+ - repo: https://github.com/pre-commit/mirrors-flake8
+ rev: 7.1.0
+ hooks:
+ - id: flake8
+ args: ["--max-line-length=88", "--extend-ignore=E203,W503"]
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.6.0
+ hooks:
+ - id: end-of-file-fixer
+ - id: trailing-whitespace
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 00000000..33e66caa
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,6 @@
+pytest
+pillow
+black
+flake8
+isort
+pre-commit
diff --git a/tests/test_code_agent.py b/tests/test_code_agent.py
new file mode 100644
index 00000000..8ff20825
--- /dev/null
+++ b/tests/test_code_agent.py
@@ -0,0 +1,36 @@
+from gui_agents.s3.agents.code_agent import extract_code_block, execute_code
+
+
+class DummyEnvController:
+ def __init__(self):
+ pass
+
+ def run_python_script(self, code):
+ # emulate running python code
+ if "print(" in code:
+ return {"status": "success", "output": "printed", "returncode": 0}
+ return {"status": "success", "output": "ok", "returncode": 0}
+
+ def run_bash_script(self, code, timeout=30):
+ return {"status": "success", "output": code, "returncode": 0}
+
+
+def test_extract_code_block():
+ s = "Some text ```python\nprint(1)\n``` more"
+ t, code = extract_code_block(s)
+ assert t == "python"
+ assert "print(1)" in code
+
+
+def test_execute_code_python():
+ controller = DummyEnvController()
+ res = execute_code("python", "print(1)", controller)
+ assert res["status"] == "success"
+ assert "output" in res
+
+
+def test_execute_code_bash():
+ controller = DummyEnvController()
+ res = execute_code("bash", "echo hi", controller)
+ assert res["status"] == "success"
+ assert res["output"] == "echo hi"
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
new file mode 100644
index 00000000..598fb624
--- /dev/null
+++ b/tests/test_smoke.py
@@ -0,0 +1,142 @@
+import importlib
+import io
+import sys
+
+from PIL import Image
+
+
+# ---- Insert lightweight dummy modules to avoid heavy external deps at import time ----
+class DummyPytesseractModule:
+ Output = type("Output", (), {})()
+
+ @staticmethod
+ def image_to_data(image, output_type=None):
+ # Return minimal dict expected by grounding.get_ocr_elements
+ return {
+ "text": [],
+ "left": [],
+ "top": [],
+ "width": [],
+ "height": [],
+ "block_num": [],
+ }
+
+
+sys.modules.setdefault("pytesseract", DummyPytesseractModule)
+
+
+class DummyPyAutoGUI:
+ def size(self):
+ return (100, 100)
+
+ def screenshot(self):
+ return Image.new("RGB", (100, 100))
+
+ def press(self, *args, **kwargs):
+ pass
+
+ def click(self, *args, **kwargs):
+ pass
+
+ def hotkey(self, *args, **kwargs):
+ pass
+
+
+sys.modules.setdefault("pyautogui", DummyPyAutoGUI())
+
+# ---- Monkeypatch LMMAgent to avoid external LLM calls ----
+import gui_agents.s3.core.mllm as mllm # noqa: E402
+
+
+class FakeLMMAgent:
+ def __init__(self, engine_params=None, system_prompt=None, engine=None):
+ self.messages = []
+ self.system_prompt = system_prompt or "You are a helpful assistant."
+
+ def reset(self):
+ self.messages = [
+ {
+ "role": "system",
+ "content": [{"type": "text", "text": self.system_prompt}],
+ }
+ ]
+
+ def add_system_prompt(self, prompt):
+ self.system_prompt = prompt
+
+ def add_message(self, text_content=None, image_content=None, role=None, **kwargs):
+ self.messages.append(
+ {
+ "role": role or "user",
+ "content": [{"type": "text", "text": text_content}],
+ }
+ )
+
+ def get_response(self, *args, **kwargs):
+ # Return a response that contains a single valid action: agent.wait
+ return "thinking```python\nagent.wait(1.333)\n```"
+
+
+mllm.LMMAgent = FakeLMMAgent
+import gui_agents.s3.agents.code_agent as _code_agent
+_code_agent.LMMAgent = FakeLMMAgent
+import gui_agents.s3.agents.grounding as _grounding
+_grounding.LMMAgent = FakeLMMAgent
+
+
+def _create_screenshot_bytes():
+ img = Image.new("RGB", (100, 100), color=(73, 109, 137))
+ buf = io.BytesIO()
+ img.save(buf, format="PNG")
+ return buf.getvalue()
+
+
+def test_agent_smoke_flow():
+ from gui_agents.s3.agents.agent_s import AgentS3
+ from gui_agents.s3.agents.grounding import OSWorldACI
+
+ screenshot = _create_screenshot_bytes()
+
+ grounding = OSWorldACI(
+ env=None,
+ platform="linux",
+ engine_params_for_generation={"engine_type": "mock"},
+ engine_params_for_grounding={
+ "engine_type": "mock",
+ "grounding_width": 100,
+ "grounding_height": 100,
+ },
+ width=100,
+ height=100,
+ )
+
+ agent = AgentS3(
+ worker_engine_params={"engine_type": "mock", "model": "gpt-4o"},
+ grounding_agent=grounding,
+ platform="linux",
+ )
+
+ info, actions = agent.predict(
+ instruction="Wait a bit", observation={"screenshot": screenshot}
+ )
+
+ assert isinstance(actions, list) and len(actions) > 0
+ assert "time.sleep" in actions[0]
+
+
+def test_cli_help_runs_ok():
+ # ensure cli module can be imported with dummy pyautogui in sys.modules
+ cli = importlib.import_module("gui_agents.s3.cli_app")
+
+ # Running help should exit with code 0
+ import sys as _sys
+
+ prev_argv = _sys.argv.copy()
+ try:
+ _sys.argv = ["agent_s", "--help"]
+ try:
+ cli.main()
+ except SystemExit as e:
+ assert e.code == 0
+ finally:
+ _sys.argv = prev_argv
diff --git a/tests/test_utils_formatters.py b/tests/test_utils_formatters.py
new file mode 100644
index 00000000..1b5fffa2
--- /dev/null
+++ b/tests/test_utils_formatters.py
@@ -0,0 +1,15 @@
+from gui_agents.s3.utils.common_utils import (extract_agent_functions,
+ parse_code_from_string)
+
+
+def test_parse_code_from_string_normal():
+ s = "Intro ```python\nagent.wait(1)\n``` end"
+ code = parse_code_from_string(s)
+ assert "agent.wait" in code
+
+
+def test_extract_agent_functions():
+ code = "agent.wait(1); agent.click('ok')"
+ funcs = extract_agent_functions(code)
+ assert any("agent.wait" in f for f in funcs)
+ assert any("agent.click" in f for f in funcs)
diff --git a/tests/test_worker.py b/tests/test_worker.py
new file mode 100644
index 00000000..fef27abb
--- /dev/null
+++ b/tests/test_worker.py
@@ -0,0 +1,79 @@
+import io
+
+from PIL import Image
+
+from gui_agents.s3.agents.agent_s import AgentS3
+from gui_agents.s3.agents.grounding import OSWorldACI
+from gui_agents.s3.core import mllm as mllm_mod
+
+
+# Monkeypatch LMMAgent used in Worker via module replacement
+class FakeLMMAgent:
+ def __init__(self, engine_params=None, system_prompt=None, engine=None):
+ self.messages = []
+ self.system_prompt = system_prompt or "You are a helpful assistant."
+
+ def reset(self):
+ self.messages = [
+ {
+ "role": "system",
+ "content": [{"type": "text", "text": self.system_prompt}],
+ }
+ ]
+
+ def add_system_prompt(self, prompt):
+ self.system_prompt = prompt
+
+ def add_message(self, text_content=None, image_content=None, role=None, **kwargs):
+ self.messages.append(
+ {
+ "role": role or "user",
+ "content": [{"type": "text", "text": text_content}],
+ }
+ )
+
+ def get_response(self, *args, **kwargs):
+ return "thinking```python\nagent.wait(0.5)\n```"
+
+
+mllm_mod.LMMAgent = FakeLMMAgent
+import gui_agents.s3.agents.code_agent as _code_agent
+_code_agent.LMMAgent = FakeLMMAgent
+import gui_agents.s3.agents.grounding as _grounding
+_grounding.LMMAgent = FakeLMMAgent
+
+
+def _create_screenshot():
+ img = Image.new("RGB", (100, 100), color=(73, 109, 137))
+ buf = io.BytesIO()
+ img.save(buf, format="PNG")
+ return buf.getvalue()
+
+
+def test_worker_generate_next_action():
+ screenshot = _create_screenshot()
+ grounding = OSWorldACI(
+ env=None,
+ platform="linux",
+ engine_params_for_generation={"engine_type": "mock"},
+ engine_params_for_grounding={
+ "engine_type": "mock",
+ "grounding_width": 100,
+ "grounding_height": 100,
+ },
+ width=100,
+ height=100,
+ )
+ agent = AgentS3(
+ worker_engine_params={"engine_type": "mock", "model": "gpt-4o"},
+ grounding_agent=grounding,
+ platform="linux",
+ )
+
+ info, actions = agent.predict(
+ instruction="Wait small", observation={"screenshot": screenshot}
+ )
+
+ assert isinstance(actions, list)
+ assert len(actions) == 1
+ assert "time.sleep" in actions[0] or "wait" in actions[0]