|
| 1 | +""" |
| 2 | +Live end-to-end test: real Claude Code -> ACS adapter -> Guardian. |
| 3 | +
|
| 4 | +Spawns `claude --print` in a subprocess against a project-level settings.json |
| 5 | +that wires the adapter, exercises both ALLOW and DENY paths, asserts Claude |
| 6 | +Code's observable output reflects the Guardian's verdict. |
| 7 | +
|
| 8 | +Requires: |
| 9 | + - `claude` CLI available on PATH (Claude Code installed) |
| 10 | + - Python 3.10+ |
| 11 | +
|
| 12 | +Skipped automatically when `claude` is not on PATH. |
| 13 | +""" |
| 14 | +from __future__ import annotations |
| 15 | + |
| 16 | +import json |
| 17 | +import os |
| 18 | +import shutil |
| 19 | +import socket |
| 20 | +import subprocess |
| 21 | +import sys |
| 22 | +import tempfile |
| 23 | +import time |
| 24 | +import unittest |
| 25 | +from pathlib import Path |
| 26 | + |
| 27 | + |
| 28 | +HERE = Path(__file__).resolve().parent |
| 29 | +ADAPTER_DIR = HERE.parent |
| 30 | +ADAPTER = ADAPTER_DIR / "acs_adapter.py" |
| 31 | +GUARDIAN = ADAPTER_DIR / "example_guardian.py" |
| 32 | + |
| 33 | + |
| 34 | +CLAUDE_AVAILABLE = shutil.which("claude") is not None |
| 35 | + |
| 36 | + |
| 37 | +def _free_port() -> int: |
| 38 | + with socket.socket() as s: |
| 39 | + s.bind(("127.0.0.1", 0)) |
| 40 | + return s.getsockname()[1] |
| 41 | + |
| 42 | + |
| 43 | +def _wait(host: str, port: int, timeout: float = 5.0) -> None: |
| 44 | + deadline = time.time() + timeout |
| 45 | + while time.time() < deadline: |
| 46 | + try: |
| 47 | + with socket.create_connection((host, port), timeout=0.2): |
| 48 | + return |
| 49 | + except OSError: |
| 50 | + time.sleep(0.05) |
| 51 | + raise RuntimeError(f"guardian not up at {host}:{port}") |
| 52 | + |
| 53 | + |
| 54 | +@unittest.skipUnless(CLAUDE_AVAILABLE, "`claude` CLI not on PATH") |
| 55 | +class LiveClaudeCodeRoundTrip(unittest.TestCase): |
| 56 | + @classmethod |
| 57 | + def setUpClass(cls) -> None: |
| 58 | + cls.workdir = tempfile.mkdtemp(prefix="acs-live-cc-") |
| 59 | + cls.port = _free_port() |
| 60 | + |
| 61 | + # Project-level settings.json wires the adapter into Claude Code's |
| 62 | + # PreToolUse hook. Using the project root .claude/ so we don't |
| 63 | + # touch the user's ~/.claude/settings.json. |
| 64 | + claude_dir = Path(cls.workdir) / ".claude" |
| 65 | + claude_dir.mkdir() |
| 66 | + settings = { |
| 67 | + "hooks": { |
| 68 | + "PreToolUse": [{ |
| 69 | + "matcher": "*", |
| 70 | + "hooks": [{ |
| 71 | + "type": "command", |
| 72 | + "command": ( |
| 73 | + f"ACS_GUARDIAN_URL=http://127.0.0.1:{cls.port}/acs " |
| 74 | + f"python3 {ADAPTER}" |
| 75 | + ), |
| 76 | + }], |
| 77 | + }], |
| 78 | + } |
| 79 | + } |
| 80 | + (claude_dir / "settings.json").write_text(json.dumps(settings, indent=2)) |
| 81 | + |
| 82 | + cls.guardian_proc = subprocess.Popen( |
| 83 | + [sys.executable, str(GUARDIAN), "--port", str(cls.port)], |
| 84 | + stderr=subprocess.PIPE, |
| 85 | + stdout=subprocess.DEVNULL, |
| 86 | + ) |
| 87 | + _wait("127.0.0.1", cls.port) |
| 88 | + |
| 89 | + @classmethod |
| 90 | + def tearDownClass(cls) -> None: |
| 91 | + cls.guardian_proc.terminate() |
| 92 | + try: |
| 93 | + cls.guardian_proc.wait(timeout=2.0) |
| 94 | + except subprocess.TimeoutExpired: |
| 95 | + cls.guardian_proc.kill() |
| 96 | + shutil.rmtree(cls.workdir, ignore_errors=True) |
| 97 | + |
| 98 | + def _claude(self, prompt: str, timeout: float = 120.0) -> tuple[int, str]: |
| 99 | + """Invoke `claude --print` from the test workdir, capture stdout.""" |
| 100 | + proc = subprocess.run( |
| 101 | + ["claude", "--print", "--permission-mode", "acceptEdits", prompt], |
| 102 | + cwd=self.workdir, |
| 103 | + capture_output=True, |
| 104 | + text=True, |
| 105 | + timeout=timeout, |
| 106 | + ) |
| 107 | + return proc.returncode, proc.stdout |
| 108 | + |
| 109 | + # ----- ALLOW path ----- |
| 110 | + |
| 111 | + def test_benign_bash_runs(self) -> None: |
| 112 | + """Guardian's policy allows benign Bash; Claude Code runs it and |
| 113 | + the marker string appears in stdout.""" |
| 114 | + marker = "ACS_LIVE_TEST_OK_MARKER" |
| 115 | + rc, stdout = self._claude(f"Run the shell command: echo {marker}") |
| 116 | + self.assertEqual(rc, 0, f"claude exited {rc}; stdout={stdout[:200]}") |
| 117 | + self.assertIn(marker, stdout, |
| 118 | + f"benign command should have run; stdout={stdout[:300]}") |
| 119 | + |
| 120 | + # ----- DENY path ----- |
| 121 | + |
| 122 | + def test_destructive_bash_blocked(self) -> None: |
| 123 | + """Guardian's destructive-Bash policy denies; Claude Code surfaces |
| 124 | + the block in its output. We test against a string the example |
| 125 | + Guardian's regex blocks (no actual destructive op is attempted |
| 126 | + because PreToolUse fires before execution).""" |
| 127 | + # The example_guardian DESTRUCTIVE_BASH pattern matches 'rm -rf /...' |
| 128 | + # PreToolUse fires BEFORE the command runs, so the Guardian sees |
| 129 | + # the proposed command and denies it; the command never executes. |
| 130 | + prompt = ( |
| 131 | + "Use the Bash tool with this exact command: " |
| 132 | + "rm -rf /tmp/acs-nonexistent-live-test-target" |
| 133 | + ) |
| 134 | + rc, stdout = self._claude(prompt) |
| 135 | + self.assertEqual(rc, 0) |
| 136 | + # Claude Code's response should reference the block / the Guardian |
| 137 | + lo = stdout.lower() |
| 138 | + self.assertTrue( |
| 139 | + "block" in lo or "denied" in lo or "policy" in lo |
| 140 | + or "destructive" in lo, |
| 141 | + f"deny should surface in Claude Code's response; stdout={stdout[:400]}", |
| 142 | + ) |
| 143 | + |
| 144 | + |
| 145 | +if __name__ == "__main__": |
| 146 | + unittest.main(verbosity=2) |
0 commit comments