refactor(tests): migrate Python tests from custom runner to pytest

tayuLuc · tayuLuc · commit fb56d137d81c · 2026-05-08T22:14:10.000Z
diff --git a/tests/adapters/hermes.test.ts b/tests/adapters/hermes.test.ts
@@ -40,25 +40,18 @@ describe("plugin manifest", () => {
   });
 });
 
-// ── Hook behavioral tests (via Python subprocess) ───────
+// ── Hook behavioral tests (via pytest) ──────────────────
 
 const describeBehavior = isWindows ? describe.skip : describe;
 
 describeBehavior("hook behavior", () => {
-  it("passes all Python hook tests", () => {
-    const result = spawnSync("python3", [TEST_SCRIPT, "--json"], {
+  it("passes all Python hook tests via pytest", () => {
+    const result = spawnSync("pytest", [TEST_SCRIPT, "-x", "-q"], {
       encoding: "utf-8",
       timeout: 15_000,
     });
 
     expect(result.error).toBeUndefined();
     expect(result.status).toBe(0);
-
-    const output = JSON.parse(result.stdout);
-    expect(output.all_passed).toBe(true);
-
-    for (const test of output.results) {
-      expect(test.passed).toBe(true);
-    }
   });
 });
diff --git a/tests/adapters/hermes_test.py b/tests/adapters/hermes_test.py
@@ -2,20 +2,46 @@
 Hermes plugin test suite for context-mode.
 
 Tests the hook functions directly by importing the module.
-Usage: python3 tests/adapters/hermes_test.py [--json]
+Usage: pytest tests/adapters/hermes_test.py -v
 """
 
-import json
-import sys
 import os
 import tempfile
 import sqlite3
 
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.hermes-plugin"))
+import pytest
+
+
+# ── Fixture ──────────────────────────────────────────────
+
+
+@pytest.fixture
+def mod():
+    """Import the plugin module and set up HERMES_HOME."""
+    import importlib.util
+
+    old_home = os.environ.get("HERMES_HOME")
+    test_home = tempfile.mkdtemp(prefix="hermes-test-")
+    os.environ["HERMES_HOME"] = test_home
+
+    spec = importlib.util.spec_from_file_location(
+        "hermes_context_mode",
+        os.path.join(os.path.dirname(__file__), "../../.hermes-plugin/__init__.py"),
+    )
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+
+    yield module
+
+    if old_home:
+        os.environ["HERMES_HOME"] = old_home
+    else:
+        os.environ.pop("HERMES_HOME", None)
 
 
 # ── pre_tool_call ────────────────────────────────────────
 
+
 def test_pre_tool_call_allows_allowed_command(mod):
     """pre_tool_call returns None for commands in the ALLOWED list."""
     result = mod.pre_tool_call(
@@ -64,6 +90,7 @@ def test_pre_tool_call_ignores_non_terminal(mod):
 
 # ── pre_llm_call ─────────────────────────────────────────
 
+
 def test_pre_llm_call_injects_on_first_turn(mod):
     """pre_llm_call returns context on first turn for a new session."""
     result = mod.pre_llm_call(
@@ -107,9 +134,9 @@ def test_pre_llm_call_skips_when_already_shown(mod):
 
 # ── pre_llm_call: memory leak cap ───────────────────────
 
+
 def test_guidance_cap_prevents_memory_leak(mod):
     """_GUIDANCE_CAP prevents unbounded growth by clearing SESSION_GUIDANCE_SHOWN."""
-    # Fill SESSION_GUIDANCE_SHOWN beyond _GUIDANCE_CAP
     for i in range(mod._GUIDANCE_CAP + 5):
         sid = f"leak-test-{i}"
         mod.pre_llm_call(
@@ -118,8 +145,6 @@ def test_guidance_cap_prevents_memory_leak(mod):
             is_first_turn=True,
         )
 
-    # After exceeding cap, the dict should have been cleared and only
-    # the most recent entry should exist
     assert len(mod.SESSION_GUIDANCE_SHOWN) <= mod._GUIDANCE_CAP + 1, (
         f"GUIDANCE_SHOWN grew to {len(mod.SESSION_GUIDANCE_SHOWN)} "
         f"(cap is {mod._GUIDANCE_CAP})"
@@ -128,6 +153,7 @@ def test_guidance_cap_prevents_memory_leak(mod):
 
 # ── transform_tool_result ────────────────────────────────
 
+
 def test_transform_tool_result_small_output(mod):
     """transform_tool_result returns None for small outputs (<3KB)."""
     result = mod.transform_tool_result(
@@ -137,7 +163,7 @@ def test_transform_tool_result_small_output(mod):
         session_id="test-session",
         task_id="test-task",
     )
-    assert result is None, f"Expected None for small output"
+    assert result is None, "Expected None for small output"
 
 
 def test_transform_tool_result_large_output(mod):
@@ -151,11 +177,12 @@ def test_transform_tool_result_large_output(mod):
         task_id="test-task",
     )
     assert result is not None, "Expected sandbox summary for large output"
-    assert "<sandboxed_output" in result, f"Expected sandbox summary"
+    assert "<sandboxed_output" in result, "Expected sandbox summary"
 
 
 # ── on_session_start ─────────────────────────────────────
 
+
 def test_on_session_start_clears_guidance(mod):
     """on_session_start removes session_id from SESSION_GUIDANCE_SHOWN."""
     mod.pre_llm_call(
@@ -175,9 +202,9 @@ def test_on_session_start_clears_guidance(mod):
 
 # ── on_session_end: metrics persistence ──────────────────
 
+
 def test_on_session_end_persists_metrics(mod):
     """on_session_end writes session metrics to SQLite."""
-    # Create a session and simulate tool calls that produce savings
     sid = "metrics-test-session"
     mod.on_session_start(session_id=sid, model="test-model", platform="test-platform")
 
@@ -190,86 +217,19 @@ def test_on_session_end_persists_metrics(mod):
         task_id="metrics-task",
     )
 
-    # End the session
     mod.on_session_end(session_id=sid, completed=True, interrupted=False)
 
-    # Verify metrics were persisted to SQLite
     conn = sqlite3.connect(str(mod.METRICS_DB))
     try:
         row = conn.execute(
             "SELECT session_id, platform, model, tool_calls, bytes_saved, blocks "
-            "FROM session_metrics WHERE session_id = ?", (sid,)
+            "FROM session_metrics WHERE session_id = ?",
+            (sid,),
         ).fetchone()
         assert row is not None, f"Expected metrics row for {sid}"
-        sid_db, platform, model, tool_calls, bytes_saved, blocks = row
+        _, platform, model, tool_calls, bytes_saved, blocks = row
         assert platform == "test-platform"
         assert model == "test-model"
         assert bytes_saved > 0, f"Expected bytes_saved > 0, got {bytes_saved}"
     finally:
         conn.close()
-
-
-# ── Runner ──────────────────────────────────────────────
-
-TEST_ENV = {"HERMES_HOME": tempfile.mkdtemp(prefix="hermes-test-")}
-
-
-def run_all():
-    import importlib.util
-
-    old_home = os.environ.get("HERMES_HOME")
-    os.environ["HERMES_HOME"] = TEST_ENV["HERMES_HOME"]
-
-    spec = importlib.util.spec_from_file_location(
-        "hermes_context_mode",
-        os.path.join(os.path.dirname(__file__), "../../.hermes-plugin/__init__.py"),
-    )
-    mod = importlib.util.module_from_spec(spec)
-    spec.loader.exec_module(mod)
-
-    tests = [
-        ("allows_allowed_command", test_pre_tool_call_allows_allowed_command),
-        ("blocks_curl", test_pre_tool_call_blocks_curl),
-        ("blocks_wget", test_pre_tool_call_blocks_wget),
-        ("ignores_non_terminal", test_pre_tool_call_ignores_non_terminal),
-        ("injects_on_first_turn", test_pre_llm_call_injects_on_first_turn),
-        ("skips_on_subsequent_turns", test_pre_llm_call_skips_on_subsequent_turns),
-        ("skips_when_already_shown", test_pre_llm_call_skips_when_already_shown),
-        ("guidance_cap_prevents_memory_leak", test_guidance_cap_prevents_memory_leak),
-        ("small_output_passthrough", test_transform_tool_result_small_output),
-        ("large_output_sandboxed", test_transform_tool_result_large_output),
-        ("session_start_clears_guidance", test_on_session_start_clears_guidance),
-        ("session_end_persists_metrics", test_on_session_end_persists_metrics),
-    ]
-
-    results = []
-    all_passed = True
-    for name, fn in tests:
-        try:
-            fn(mod)
-            results.append({"name": name, "passed": True})
-        except Exception as e:
-            results.append({"name": name, "passed": False, "error": str(e)})
-            all_passed = False
-
-    if "--json" in sys.argv:
-        print(json.dumps({"results": results, "all_passed": all_passed}, indent=2))
-    else:
-        for r in results:
-            status = "\u2713" if r["passed"] else "\u2717"
-            print(f"  {status} {r['name']}")
-            if not r["passed"]:
-                print(f"      {r['error']}")
-        failed = sum(1 for r in results if not r["passed"])
-        print(f"\n{'All passed!' if all_passed else f'{failed} failures'}")
-
-    if old_home:
-        os.environ["HERMES_HOME"] = old_home
-    else:
-        os.environ.pop("HERMES_HOME", None)
-
-    return 0 if all_passed else 1
-
-
-if __name__ == "__main__":
-    sys.exit(run_all())