Skip to content

Commit fb56d13

Browse files
committed
refactor(tests): migrate Python tests from custom runner to pytest
1 parent 3d06f33 commit fb56d13

2 files changed

Lines changed: 43 additions & 90 deletions

File tree

tests/adapters/hermes.test.ts

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,25 +40,18 @@ describe("plugin manifest", () => {
4040
});
4141
});
4242

43-
// ── Hook behavioral tests (via Python subprocess) ───────
43+
// ── Hook behavioral tests (via pytest) ──────────────────
4444

4545
const describeBehavior = isWindows ? describe.skip : describe;
4646

4747
describeBehavior("hook behavior", () => {
48-
it("passes all Python hook tests", () => {
49-
const result = spawnSync("python3", [TEST_SCRIPT, "--json"], {
48+
it("passes all Python hook tests via pytest", () => {
49+
const result = spawnSync("pytest", [TEST_SCRIPT, "-x", "-q"], {
5050
encoding: "utf-8",
5151
timeout: 15_000,
5252
});
5353

5454
expect(result.error).toBeUndefined();
5555
expect(result.status).toBe(0);
56-
57-
const output = JSON.parse(result.stdout);
58-
expect(output.all_passed).toBe(true);
59-
60-
for (const test of output.results) {
61-
expect(test.passed).toBe(true);
62-
}
6356
});
6457
});

tests/adapters/hermes_test.py

Lines changed: 40 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,46 @@
22
Hermes plugin test suite for context-mode.
33
44
Tests the hook functions directly by importing the module.
5-
Usage: python3 tests/adapters/hermes_test.py [--json]
5+
Usage: pytest tests/adapters/hermes_test.py -v
66
"""
77

8-
import json
9-
import sys
108
import os
119
import tempfile
1210
import sqlite3
1311

14-
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../.hermes-plugin"))
12+
import pytest
13+
14+
15+
# ── Fixture ──────────────────────────────────────────────
16+
17+
18+
@pytest.fixture
19+
def mod():
20+
"""Import the plugin module and set up HERMES_HOME."""
21+
import importlib.util
22+
23+
old_home = os.environ.get("HERMES_HOME")
24+
test_home = tempfile.mkdtemp(prefix="hermes-test-")
25+
os.environ["HERMES_HOME"] = test_home
26+
27+
spec = importlib.util.spec_from_file_location(
28+
"hermes_context_mode",
29+
os.path.join(os.path.dirname(__file__), "../../.hermes-plugin/__init__.py"),
30+
)
31+
module = importlib.util.module_from_spec(spec)
32+
spec.loader.exec_module(module)
33+
34+
yield module
35+
36+
if old_home:
37+
os.environ["HERMES_HOME"] = old_home
38+
else:
39+
os.environ.pop("HERMES_HOME", None)
1540

1641

1742
# ── pre_tool_call ────────────────────────────────────────
1843

44+
1945
def test_pre_tool_call_allows_allowed_command(mod):
2046
"""pre_tool_call returns None for commands in the ALLOWED list."""
2147
result = mod.pre_tool_call(
@@ -64,6 +90,7 @@ def test_pre_tool_call_ignores_non_terminal(mod):
6490

6591
# ── pre_llm_call ─────────────────────────────────────────
6692

93+
6794
def test_pre_llm_call_injects_on_first_turn(mod):
6895
"""pre_llm_call returns context on first turn for a new session."""
6996
result = mod.pre_llm_call(
@@ -107,9 +134,9 @@ def test_pre_llm_call_skips_when_already_shown(mod):
107134

108135
# ── pre_llm_call: memory leak cap ───────────────────────
109136

137+
110138
def test_guidance_cap_prevents_memory_leak(mod):
111139
"""_GUIDANCE_CAP prevents unbounded growth by clearing SESSION_GUIDANCE_SHOWN."""
112-
# Fill SESSION_GUIDANCE_SHOWN beyond _GUIDANCE_CAP
113140
for i in range(mod._GUIDANCE_CAP + 5):
114141
sid = f"leak-test-{i}"
115142
mod.pre_llm_call(
@@ -118,8 +145,6 @@ def test_guidance_cap_prevents_memory_leak(mod):
118145
is_first_turn=True,
119146
)
120147

121-
# After exceeding cap, the dict should have been cleared and only
122-
# the most recent entry should exist
123148
assert len(mod.SESSION_GUIDANCE_SHOWN) <= mod._GUIDANCE_CAP + 1, (
124149
f"GUIDANCE_SHOWN grew to {len(mod.SESSION_GUIDANCE_SHOWN)} "
125150
f"(cap is {mod._GUIDANCE_CAP})"
@@ -128,6 +153,7 @@ def test_guidance_cap_prevents_memory_leak(mod):
128153

129154
# ── transform_tool_result ────────────────────────────────
130155

156+
131157
def test_transform_tool_result_small_output(mod):
132158
"""transform_tool_result returns None for small outputs (<3KB)."""
133159
result = mod.transform_tool_result(
@@ -137,7 +163,7 @@ def test_transform_tool_result_small_output(mod):
137163
session_id="test-session",
138164
task_id="test-task",
139165
)
140-
assert result is None, f"Expected None for small output"
166+
assert result is None, "Expected None for small output"
141167

142168

143169
def test_transform_tool_result_large_output(mod):
@@ -151,11 +177,12 @@ def test_transform_tool_result_large_output(mod):
151177
task_id="test-task",
152178
)
153179
assert result is not None, "Expected sandbox summary for large output"
154-
assert "<sandboxed_output" in result, f"Expected sandbox summary"
180+
assert "<sandboxed_output" in result, "Expected sandbox summary"
155181

156182

157183
# ── on_session_start ─────────────────────────────────────
158184

185+
159186
def test_on_session_start_clears_guidance(mod):
160187
"""on_session_start removes session_id from SESSION_GUIDANCE_SHOWN."""
161188
mod.pre_llm_call(
@@ -175,9 +202,9 @@ def test_on_session_start_clears_guidance(mod):
175202

176203
# ── on_session_end: metrics persistence ──────────────────
177204

205+
178206
def test_on_session_end_persists_metrics(mod):
179207
"""on_session_end writes session metrics to SQLite."""
180-
# Create a session and simulate tool calls that produce savings
181208
sid = "metrics-test-session"
182209
mod.on_session_start(session_id=sid, model="test-model", platform="test-platform")
183210

@@ -190,86 +217,19 @@ def test_on_session_end_persists_metrics(mod):
190217
task_id="metrics-task",
191218
)
192219

193-
# End the session
194220
mod.on_session_end(session_id=sid, completed=True, interrupted=False)
195221

196-
# Verify metrics were persisted to SQLite
197222
conn = sqlite3.connect(str(mod.METRICS_DB))
198223
try:
199224
row = conn.execute(
200225
"SELECT session_id, platform, model, tool_calls, bytes_saved, blocks "
201-
"FROM session_metrics WHERE session_id = ?", (sid,)
226+
"FROM session_metrics WHERE session_id = ?",
227+
(sid,),
202228
).fetchone()
203229
assert row is not None, f"Expected metrics row for {sid}"
204-
sid_db, platform, model, tool_calls, bytes_saved, blocks = row
230+
_, platform, model, tool_calls, bytes_saved, blocks = row
205231
assert platform == "test-platform"
206232
assert model == "test-model"
207233
assert bytes_saved > 0, f"Expected bytes_saved > 0, got {bytes_saved}"
208234
finally:
209235
conn.close()
210-
211-
212-
# ── Runner ──────────────────────────────────────────────
213-
214-
TEST_ENV = {"HERMES_HOME": tempfile.mkdtemp(prefix="hermes-test-")}
215-
216-
217-
def run_all():
218-
import importlib.util
219-
220-
old_home = os.environ.get("HERMES_HOME")
221-
os.environ["HERMES_HOME"] = TEST_ENV["HERMES_HOME"]
222-
223-
spec = importlib.util.spec_from_file_location(
224-
"hermes_context_mode",
225-
os.path.join(os.path.dirname(__file__), "../../.hermes-plugin/__init__.py"),
226-
)
227-
mod = importlib.util.module_from_spec(spec)
228-
spec.loader.exec_module(mod)
229-
230-
tests = [
231-
("allows_allowed_command", test_pre_tool_call_allows_allowed_command),
232-
("blocks_curl", test_pre_tool_call_blocks_curl),
233-
("blocks_wget", test_pre_tool_call_blocks_wget),
234-
("ignores_non_terminal", test_pre_tool_call_ignores_non_terminal),
235-
("injects_on_first_turn", test_pre_llm_call_injects_on_first_turn),
236-
("skips_on_subsequent_turns", test_pre_llm_call_skips_on_subsequent_turns),
237-
("skips_when_already_shown", test_pre_llm_call_skips_when_already_shown),
238-
("guidance_cap_prevents_memory_leak", test_guidance_cap_prevents_memory_leak),
239-
("small_output_passthrough", test_transform_tool_result_small_output),
240-
("large_output_sandboxed", test_transform_tool_result_large_output),
241-
("session_start_clears_guidance", test_on_session_start_clears_guidance),
242-
("session_end_persists_metrics", test_on_session_end_persists_metrics),
243-
]
244-
245-
results = []
246-
all_passed = True
247-
for name, fn in tests:
248-
try:
249-
fn(mod)
250-
results.append({"name": name, "passed": True})
251-
except Exception as e:
252-
results.append({"name": name, "passed": False, "error": str(e)})
253-
all_passed = False
254-
255-
if "--json" in sys.argv:
256-
print(json.dumps({"results": results, "all_passed": all_passed}, indent=2))
257-
else:
258-
for r in results:
259-
status = "\u2713" if r["passed"] else "\u2717"
260-
print(f" {status} {r['name']}")
261-
if not r["passed"]:
262-
print(f" {r['error']}")
263-
failed = sum(1 for r in results if not r["passed"])
264-
print(f"\n{'All passed!' if all_passed else f'{failed} failures'}")
265-
266-
if old_home:
267-
os.environ["HERMES_HOME"] = old_home
268-
else:
269-
os.environ.pop("HERMES_HOME", None)
270-
271-
return 0 if all_passed else 1
272-
273-
274-
if __name__ == "__main__":
275-
sys.exit(run_all())

0 commit comments

Comments
 (0)