22Hermes plugin test suite for context-mode.
33
44Tests the hook functions directly by importing the module.
5- Usage: python3 tests/adapters/hermes_test.py [--json]
5+ Usage: pytest tests/adapters/hermes_test.py -v
66"""
77
8- import json
9- import sys
108import os
119import tempfile
1210import sqlite3
1311
14- sys .path .insert (0 , os .path .join (os .path .dirname (__file__ ), "../../.hermes-plugin" ))
12+ import pytest
13+
14+
15+ # ── Fixture ──────────────────────────────────────────────
16+
17+
18+ @pytest .fixture
19+ def mod ():
20+ """Import the plugin module and set up HERMES_HOME."""
21+ import importlib .util
22+
23+ old_home = os .environ .get ("HERMES_HOME" )
24+ test_home = tempfile .mkdtemp (prefix = "hermes-test-" )
25+ os .environ ["HERMES_HOME" ] = test_home
26+
27+ spec = importlib .util .spec_from_file_location (
28+ "hermes_context_mode" ,
29+ os .path .join (os .path .dirname (__file__ ), "../../.hermes-plugin/__init__.py" ),
30+ )
31+ module = importlib .util .module_from_spec (spec )
32+ spec .loader .exec_module (module )
33+
34+ yield module
35+
36+ if old_home :
37+ os .environ ["HERMES_HOME" ] = old_home
38+ else :
39+ os .environ .pop ("HERMES_HOME" , None )
1540
1641
1742# ── pre_tool_call ────────────────────────────────────────
1843
44+
1945def test_pre_tool_call_allows_allowed_command (mod ):
2046 """pre_tool_call returns None for commands in the ALLOWED list."""
2147 result = mod .pre_tool_call (
@@ -64,6 +90,7 @@ def test_pre_tool_call_ignores_non_terminal(mod):
6490
6591# ── pre_llm_call ─────────────────────────────────────────
6692
93+
6794def test_pre_llm_call_injects_on_first_turn (mod ):
6895 """pre_llm_call returns context on first turn for a new session."""
6996 result = mod .pre_llm_call (
@@ -107,9 +134,9 @@ def test_pre_llm_call_skips_when_already_shown(mod):
107134
108135# ── pre_llm_call: memory leak cap ───────────────────────
109136
137+
110138def test_guidance_cap_prevents_memory_leak (mod ):
111139 """_GUIDANCE_CAP prevents unbounded growth by clearing SESSION_GUIDANCE_SHOWN."""
112- # Fill SESSION_GUIDANCE_SHOWN beyond _GUIDANCE_CAP
113140 for i in range (mod ._GUIDANCE_CAP + 5 ):
114141 sid = f"leak-test-{ i } "
115142 mod .pre_llm_call (
@@ -118,8 +145,6 @@ def test_guidance_cap_prevents_memory_leak(mod):
118145 is_first_turn = True ,
119146 )
120147
121- # After exceeding cap, the dict should have been cleared and only
122- # the most recent entry should exist
123148 assert len (mod .SESSION_GUIDANCE_SHOWN ) <= mod ._GUIDANCE_CAP + 1 , (
124149 f"GUIDANCE_SHOWN grew to { len (mod .SESSION_GUIDANCE_SHOWN )} "
125150 f"(cap is { mod ._GUIDANCE_CAP } )"
@@ -128,6 +153,7 @@ def test_guidance_cap_prevents_memory_leak(mod):
128153
129154# ── transform_tool_result ────────────────────────────────
130155
156+
131157def test_transform_tool_result_small_output (mod ):
132158 """transform_tool_result returns None for small outputs (<3KB)."""
133159 result = mod .transform_tool_result (
@@ -137,7 +163,7 @@ def test_transform_tool_result_small_output(mod):
137163 session_id = "test-session" ,
138164 task_id = "test-task" ,
139165 )
140- assert result is None , f "Expected None for small output"
166+ assert result is None , "Expected None for small output"
141167
142168
143169def test_transform_tool_result_large_output (mod ):
@@ -151,11 +177,12 @@ def test_transform_tool_result_large_output(mod):
151177 task_id = "test-task" ,
152178 )
153179 assert result is not None , "Expected sandbox summary for large output"
154- assert "<sandboxed_output" in result , f "Expected sandbox summary"
180+ assert "<sandboxed_output" in result , "Expected sandbox summary"
155181
156182
157183# ── on_session_start ─────────────────────────────────────
158184
185+
159186def test_on_session_start_clears_guidance (mod ):
160187 """on_session_start removes session_id from SESSION_GUIDANCE_SHOWN."""
161188 mod .pre_llm_call (
@@ -175,9 +202,9 @@ def test_on_session_start_clears_guidance(mod):
175202
176203# ── on_session_end: metrics persistence ──────────────────
177204
205+
178206def test_on_session_end_persists_metrics (mod ):
179207 """on_session_end writes session metrics to SQLite."""
180- # Create a session and simulate tool calls that produce savings
181208 sid = "metrics-test-session"
182209 mod .on_session_start (session_id = sid , model = "test-model" , platform = "test-platform" )
183210
@@ -190,86 +217,19 @@ def test_on_session_end_persists_metrics(mod):
190217 task_id = "metrics-task" ,
191218 )
192219
193- # End the session
194220 mod .on_session_end (session_id = sid , completed = True , interrupted = False )
195221
196- # Verify metrics were persisted to SQLite
197222 conn = sqlite3 .connect (str (mod .METRICS_DB ))
198223 try :
199224 row = conn .execute (
200225 "SELECT session_id, platform, model, tool_calls, bytes_saved, blocks "
201- "FROM session_metrics WHERE session_id = ?" , (sid ,)
226+ "FROM session_metrics WHERE session_id = ?" ,
227+ (sid ,),
202228 ).fetchone ()
203229 assert row is not None , f"Expected metrics row for { sid } "
204- sid_db , platform , model , tool_calls , bytes_saved , blocks = row
230+ _ , platform , model , tool_calls , bytes_saved , blocks = row
205231 assert platform == "test-platform"
206232 assert model == "test-model"
207233 assert bytes_saved > 0 , f"Expected bytes_saved > 0, got { bytes_saved } "
208234 finally :
209235 conn .close ()
210-
211-
212- # ── Runner ──────────────────────────────────────────────
213-
214- TEST_ENV = {"HERMES_HOME" : tempfile .mkdtemp (prefix = "hermes-test-" )}
215-
216-
217- def run_all ():
218- import importlib .util
219-
220- old_home = os .environ .get ("HERMES_HOME" )
221- os .environ ["HERMES_HOME" ] = TEST_ENV ["HERMES_HOME" ]
222-
223- spec = importlib .util .spec_from_file_location (
224- "hermes_context_mode" ,
225- os .path .join (os .path .dirname (__file__ ), "../../.hermes-plugin/__init__.py" ),
226- )
227- mod = importlib .util .module_from_spec (spec )
228- spec .loader .exec_module (mod )
229-
230- tests = [
231- ("allows_allowed_command" , test_pre_tool_call_allows_allowed_command ),
232- ("blocks_curl" , test_pre_tool_call_blocks_curl ),
233- ("blocks_wget" , test_pre_tool_call_blocks_wget ),
234- ("ignores_non_terminal" , test_pre_tool_call_ignores_non_terminal ),
235- ("injects_on_first_turn" , test_pre_llm_call_injects_on_first_turn ),
236- ("skips_on_subsequent_turns" , test_pre_llm_call_skips_on_subsequent_turns ),
237- ("skips_when_already_shown" , test_pre_llm_call_skips_when_already_shown ),
238- ("guidance_cap_prevents_memory_leak" , test_guidance_cap_prevents_memory_leak ),
239- ("small_output_passthrough" , test_transform_tool_result_small_output ),
240- ("large_output_sandboxed" , test_transform_tool_result_large_output ),
241- ("session_start_clears_guidance" , test_on_session_start_clears_guidance ),
242- ("session_end_persists_metrics" , test_on_session_end_persists_metrics ),
243- ]
244-
245- results = []
246- all_passed = True
247- for name , fn in tests :
248- try :
249- fn (mod )
250- results .append ({"name" : name , "passed" : True })
251- except Exception as e :
252- results .append ({"name" : name , "passed" : False , "error" : str (e )})
253- all_passed = False
254-
255- if "--json" in sys .argv :
256- print (json .dumps ({"results" : results , "all_passed" : all_passed }, indent = 2 ))
257- else :
258- for r in results :
259- status = "\u2713 " if r ["passed" ] else "\u2717 "
260- print (f" { status } { r ['name' ]} " )
261- if not r ["passed" ]:
262- print (f" { r ['error' ]} " )
263- failed = sum (1 for r in results if not r ["passed" ])
264- print (f"\n { 'All passed!' if all_passed else f'{ failed } failures' } " )
265-
266- if old_home :
267- os .environ ["HERMES_HOME" ] = old_home
268- else :
269- os .environ .pop ("HERMES_HOME" , None )
270-
271- return 0 if all_passed else 1
272-
273-
274- if __name__ == "__main__" :
275- sys .exit (run_all ())
0 commit comments