simular-ai · Vasyl198 · Dec 24, 2025 · ScottBrenner · Jan 4, 2026 · coderabbitai
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,36 @@
+name: CI
+
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
-      - uses: actions/setup-python@v4
-        with:
-          python-version: "3.11"
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
-      - uses: actions/setup-python@v4
-        with:
-          python-version: "3.11"
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dev requirements
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements-dev.txt
+
+      - name: Run linters
+        env:
+          PYTHONPATH: ${{ github.workspace }}
+        run: |
+          python -m black --check .
+          python -m isort --check-only .
+          python -m flake8 .
+
+      - name: Run tests
+        env:
+          PYTHONPATH: ${{ github.workspace }}
+        run: |
+          python -m pytest -q
diff --git a/.gitignore b/.gitignore
@@ -161,4 +161,7 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 logs/
-.DS_Store
+.DS_Store
+
+# Local env file for secrets
+.env
-.DS_Store
-
-# Local env file for secrets
-.env
+.DS_Store
-.DS_Store
-
-# Local env file for secrets
-.env
+.DS_Store
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,20 @@
+repos:
+  - repo: https://github.com/psf/black
+    rev: 25.11.0
+    hooks:
+      - id: black
+        language_version: python3.11
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+  - repo: https://github.com/pre-commit/mirrors-flake8
+    rev: 7.1.0
+    hooks:
+      - id: flake8
+        args: ["--max-line-length=88", "--extend-ignore=E203,W503"]
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,6 @@
+pytest
+pillow
+black
+flake8
+isort
+pre-commit
diff --git a/tests/test_code_agent.py b/tests/test_code_agent.py
@@ -0,0 +1,36 @@
+from gui_agents.s3.agents.code_agent import extract_code_block, execute_code
+
+
+class DummyEnvController:
+    def __init__(self):
+        pass
+
+    def run_python_script(self, code):
+        # emulate running python code
+        if "print(" in code:
+            return {"status": "success", "output": "printed", "returncode": 0}
+        return {"status": "success", "output": "ok", "returncode": 0}
+
+    def run_bash_script(self, code, timeout=30):
+        return {"status": "success", "output": code, "returncode": 0}
+
+
+def test_extract_code_block():
+    s = "Some text ```python\nprint(1)\n``` more"
+    t, code = extract_code_block(s)
+    assert t == "python"
+    assert "print(1)" in code
+
+
+def test_execute_code_python():
+    controller = DummyEnvController()
+    res = execute_code("python", "print(1)", controller)
+    assert res["status"] == "success"
+    assert "output" in res
+
+
+def test_execute_code_bash():
+    controller = DummyEnvController()
+    res = execute_code("bash", "echo hi", controller)
+    assert res["status"] == "success"
+    assert res["output"] == "echo hi"
diff --git a/tests/test_smoke.py b/tests/test_smoke.py
@@ -0,0 +1,142 @@
+import importlib
+import io
+import sys
+
+from PIL import Image
+
+
+# ---- Insert lightweight dummy modules to avoid heavy external deps at import time ----
+class DummyPytesseractModule:
+    Output = type("Output", (), {})()
+
+    @staticmethod
+    def image_to_data(image, output_type=None):
+        # Return minimal dict expected by grounding.get_ocr_elements
+        return {
+            "text": [],
+            "left": [],
+            "top": [],
+            "width": [],
+            "height": [],
+            "block_num": [],
+        }
+
+
+sys.modules.setdefault("pytesseract", DummyPytesseractModule)
+
+
+class DummyPyAutoGUI:
+    def size(self):
+        return (100, 100)
+
+    def screenshot(self):
+        return Image.new("RGB", (100, 100))
+
+    def press(self, *args, **kwargs):
+        pass
+
+    def click(self, *args, **kwargs):
+        pass
+
+    def hotkey(self, *args, **kwargs):
+        pass
+
+
+sys.modules.setdefault("pyautogui", DummyPyAutoGUI())
+
+# ---- Monkeypatch LMMAgent to avoid external LLM calls ----
+import gui_agents.s3.core.mllm as mllm  # noqa: E402
+
+
+class FakeLMMAgent:
+    def __init__(self, engine_params=None, system_prompt=None, engine=None):
+        self.messages = []
+        self.system_prompt = system_prompt or "You are a helpful assistant."
+
+    def reset(self):
+        self.messages = [
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": self.system_prompt}],
+            }
+        ]
+
+    def add_system_prompt(self, prompt):
+        self.system_prompt = prompt
+
+    def add_message(self, text_content=None, image_content=None, role=None, **kwargs):
+        self.messages.append(
+            {
+                "role": role or "user",
+                "content": [{"type": "text", "text": text_content}],
+            }
+        )
+
+    def get_response(self, *args, **kwargs):
+        # Return a response that contains a single valid action: agent.wait
+        return "<thoughts>thinking</thoughts><answer>```python\nagent.wait(1.333)\n```</answer>"
+
+
+mllm.LMMAgent = FakeLMMAgent
+import gui_agents.s3.agents.code_agent as _code_agent
+_code_agent.LMMAgent = FakeLMMAgent
+import gui_agents.s3.agents.grounding as _grounding
+_grounding.LMMAgent = FakeLMMAgent
+
+
+def _create_screenshot_bytes():
+    img = Image.new("RGB", (100, 100), color=(73, 109, 137))
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return buf.getvalue()
+
+
+def test_agent_smoke_flow():
+    from gui_agents.s3.agents.agent_s import AgentS3
+    from gui_agents.s3.agents.grounding import OSWorldACI
+
+    screenshot = _create_screenshot_bytes()
+
+    grounding = OSWorldACI(
+        env=None,
+        platform="linux",
+        engine_params_for_generation={"engine_type": "mock"},
+        engine_params_for_grounding={
+            "engine_type": "mock",
+            "grounding_width": 100,
+            "grounding_height": 100,
+        },
+        width=100,
+        height=100,
+    )
+
+    agent = AgentS3(
+        worker_engine_params={"engine_type": "mock", "model": "gpt-4o"},
+        grounding_agent=grounding,
+        platform="linux",
+    )
+
+    info, actions = agent.predict(
+        instruction="Wait a bit", observation={"screenshot": screenshot}
+    )
+
+    assert isinstance(actions, list) and len(actions) > 0
+    assert "time.sleep" in actions[0]
+
+
+def test_cli_help_runs_ok():
+    # ensure cli module can be imported with dummy pyautogui in sys.modules
+    cli = importlib.import_module("gui_agents.s3.cli_app")
+
+    # Running help should exit with code 0
+    import sys as _sys
+
+    prev_argv = _sys.argv.copy()
+    try:
+        _sys.argv = ["agent_s", "--help"]
+        try:
+            cli.main()
+        except SystemExit as e:
+            assert e.code == 0
+    finally:
+        _sys.argv = prev_argv
diff --git a/tests/test_utils_formatters.py b/tests/test_utils_formatters.py
@@ -0,0 +1,15 @@
+from gui_agents.s3.utils.common_utils import (extract_agent_functions,
+                                              parse_code_from_string)
+
+
+def test_parse_code_from_string_normal():
+    s = "Intro ```python\nagent.wait(1)\n``` end"
+    code = parse_code_from_string(s)
+    assert "agent.wait" in code
+
+
+def test_extract_agent_functions():
+    code = "agent.wait(1); agent.click('ok')"
+    funcs = extract_agent_functions(code)
+    assert any("agent.wait" in f for f in funcs)
+    assert any("agent.click" in f for f in funcs)
diff --git a/tests/test_worker.py b/tests/test_worker.py
@@ -0,0 +1,79 @@
+import io
+
+from PIL import Image
+
+from gui_agents.s3.agents.agent_s import AgentS3
+from gui_agents.s3.agents.grounding import OSWorldACI
+from gui_agents.s3.core import mllm as mllm_mod
+
+
+# Monkeypatch LMMAgent used in Worker via module replacement
+class FakeLMMAgent:
+    def __init__(self, engine_params=None, system_prompt=None, engine=None):
+        self.messages = []
+        self.system_prompt = system_prompt or "You are a helpful assistant."
+
+    def reset(self):
+        self.messages = [
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": self.system_prompt}],
+            }
+        ]
+
+    def add_system_prompt(self, prompt):
+        self.system_prompt = prompt
+
+    def add_message(self, text_content=None, image_content=None, role=None, **kwargs):
+        self.messages.append(
+            {
+                "role": role or "user",
+                "content": [{"type": "text", "text": text_content}],
+            }
+        )
+
+    def get_response(self, *args, **kwargs):
+        return "<thoughts>thinking</thoughts><answer>```python\nagent.wait(0.5)\n```</answer>"
+
+
+mllm_mod.LMMAgent = FakeLMMAgent
+import gui_agents.s3.agents.code_agent as _code_agent
+_code_agent.LMMAgent = FakeLMMAgent
+import gui_agents.s3.agents.grounding as _grounding
+_grounding.LMMAgent = FakeLMMAgent
+
+
+def _create_screenshot():
+    img = Image.new("RGB", (100, 100), color=(73, 109, 137))
+    buf = io.BytesIO()
+    img.save(buf, format="PNG")
+    return buf.getvalue()
+
+
+def test_worker_generate_next_action():
+    screenshot = _create_screenshot()
+    grounding = OSWorldACI(
+        env=None,
+        platform="linux",
+        engine_params_for_generation={"engine_type": "mock"},
+        engine_params_for_grounding={
+            "engine_type": "mock",
+            "grounding_width": 100,
+            "grounding_height": 100,
+        },
+        width=100,
+        height=100,
+    )
+    agent = AgentS3(
+        worker_engine_params={"engine_type": "mock", "model": "gpt-4o"},
+        grounding_agent=grounding,
+        platform="linux",
+    )
+
+    info, actions = agent.predict(
+        instruction="Wait small", observation={"screenshot": screenshot}
+    )
+
+    assert isinstance(actions, list)
+    assert len(actions) == 1
+    assert "time.sleep" in actions[0] or "wait" in actions[0]