|
| 1 | +"""Tests for secret detection in the security-guidance plugin (#398). |
| 2 | +
|
| 3 | +Covers ``plugins/security-guidance/secrets.py``: |
| 4 | + * regex detection of well-known credential formats (AWS, GitHub, Slack, |
| 5 | + Google, Stripe, npm, PEM private key, JWT, generic assignment), |
| 6 | + * the conservative Shannon-entropy backstop, |
| 7 | + * false-positive sanity (benign code + placeholder/example values), |
| 8 | + * end-to-end wiring through the plugin's warn-mode hook. |
| 9 | +
|
| 10 | +Token-shaped fixtures are ASSEMBLED FROM PARTS at runtime so neither the |
| 11 | +repo's secret scanners (GitGuardian on the PR) nor the I/O redactor sees a |
| 12 | +contiguous credential in this file. The detector runs on the concatenated |
| 13 | +runtime value, so detection still exercises the real regexes. |
| 14 | +""" |
| 15 | + |
| 16 | +import importlib.util |
| 17 | +import sys |
| 18 | +import types |
| 19 | +from pathlib import Path |
| 20 | + |
| 21 | +import pytest |
| 22 | + |
| 23 | + |
| 24 | +def _repo_root() -> Path: |
| 25 | + return Path(__file__).resolve().parents[2] |
| 26 | + |
| 27 | + |
| 28 | +def _load_secrets(): |
| 29 | + """Import secrets.py in isolation (stdlib-only, no plugin glue).""" |
| 30 | + path = _repo_root() / "plugins" / "security-guidance" / "secrets.py" |
| 31 | + spec = importlib.util.spec_from_file_location( |
| 32 | + "security_guidance_secrets_under_test", path |
| 33 | + ) |
| 34 | + mod = importlib.util.module_from_spec(spec) |
| 35 | + spec.loader.exec_module(mod) |
| 36 | + return mod |
| 37 | + |
| 38 | + |
| 39 | +def _load_plugin_init(): |
| 40 | + """Import the plugin __init__.py with patterns.py + secrets.py as siblings.""" |
| 41 | + plugin_dir = _repo_root() / "plugins" / "security-guidance" |
| 42 | + if "hermes_plugins" not in sys.modules: |
| 43 | + ns = types.ModuleType("hermes_plugins") |
| 44 | + ns.__path__ = [] |
| 45 | + sys.modules["hermes_plugins"] = ns |
| 46 | + spec = importlib.util.spec_from_file_location( |
| 47 | + "hermes_plugins.security_guidance", |
| 48 | + plugin_dir / "__init__.py", |
| 49 | + submodule_search_locations=[str(plugin_dir)], |
| 50 | + ) |
| 51 | + mod = importlib.util.module_from_spec(spec) |
| 52 | + mod.__package__ = "hermes_plugins.security_guidance" |
| 53 | + mod.__path__ = [str(plugin_dir)] |
| 54 | + sys.modules["hermes_plugins.security_guidance"] = mod |
| 55 | + spec.loader.exec_module(mod) |
| 56 | + return mod |
| 57 | + |
| 58 | + |
| 59 | +# Assembled fake credentials (split so secret scanners don't match the file). |
| 60 | +_AWS_KEY = "AKIA" + "QKZ7X2MNOP3RTUV9" # AKIA + 16 upper/digits |
| 61 | +_GH_TOKEN = "ghp" + "_" + ("b" * 36) # gh?_ + 36 alnum |
| 62 | +_SLACK = "xoxb" + "-" + "123456789012" + "-" + "abcdefghijkl" |
| 63 | +_GOOGLE = "AIza" + "Sy" + ("C" * 33) # AIza + 35 |
| 64 | +_STRIPE = "sk" + "_live_" + ("9" * 24) |
| 65 | +_NPM = "npm" + "_" + ("a" * 36) |
| 66 | +_PEM = "-----BEGIN " + "RSA PRIVATE KEY-----" |
| 67 | +_JWT = "eyJ" + ("hbGciOiJIUzI1NiJ9") + "." + "eyJ" + ("zdWIiOiIxMjM0NTY3ODkwIn0") + "." + ("SflKxwRJ_signature_part") |
| 68 | +_HIGH_ENTROPY = "kJ8x2Qm9Zp4Lw7Nv1Rb6Tc3Yd5Fg0Hh" # 32 mixed chars |
| 69 | + |
| 70 | + |
| 71 | +class TestRegexSecretDetection: |
| 72 | + def setup_method(self): |
| 73 | + self.s = _load_secrets() |
| 74 | + |
| 75 | + def _names(self, content): |
| 76 | + return {name for name, _ in self.s.scan_secrets("f.py", content)} |
| 77 | + |
| 78 | + def test_aws_access_key_detected(self): |
| 79 | + assert "aws_access_key_id" in self._names(f'key = "{_AWS_KEY}"\n') |
| 80 | + |
| 81 | + def test_pem_private_key_detected(self): |
| 82 | + assert "private_key_pem" in self._names(_PEM + "\nMIIE...\n") |
| 83 | + |
| 84 | + def test_slack_token_detected(self): |
| 85 | + assert "slack_token" in self._names(f'tok = "{_SLACK}"\n') |
| 86 | + |
| 87 | + def test_github_token_detected(self): |
| 88 | + assert "github_token" in self._names(f'gh = "{_GH_TOKEN}"\n') |
| 89 | + |
| 90 | + def test_google_api_key_detected(self): |
| 91 | + assert "google_api_key" in self._names(f'g = "{_GOOGLE}"\n') |
| 92 | + |
| 93 | + def test_stripe_key_detected(self): |
| 94 | + assert "stripe_secret_key" in self._names(f'sk = "{_STRIPE}"\n') |
| 95 | + |
| 96 | + def test_npm_token_detected(self): |
| 97 | + assert "npm_token" in self._names(f'n = "{_NPM}"\n') |
| 98 | + |
| 99 | + def test_jwt_detected(self): |
| 100 | + assert "jwt_token" in self._names(f'jwt = "{_JWT}"\n') |
| 101 | + |
| 102 | + def test_generic_api_key_assignment_detected(self): |
| 103 | + names = self._names('api_key = "' + ("Z" * 24) + '"\n') |
| 104 | + assert "generic_secret_assignment" in names |
| 105 | + |
| 106 | + def test_prefix_key_with_filler_substring_still_detected(self): |
| 107 | + # A real fixed-prefix key that happens to contain "00000000" must NOT be |
| 108 | + # suppressed — placeholder exclusion for prefix rules is EXAMPLE-only, |
| 109 | + # so a real secret is never silently dropped (nit #1, fail-open fix). |
| 110 | + tok = "ghp" + "_" + "00000000" + ("c" * 28) # 36 chars after ghp_ |
| 111 | + assert "github_token" in self._names(f'gh = "{tok}"\n') |
| 112 | + |
| 113 | + def test_each_rule_fires_once(self): |
| 114 | + content = f'a = "{_AWS_KEY}"\nb = "{_AWS_KEY}"\n' |
| 115 | + findings = self.s.scan_secrets("f.py", content) |
| 116 | + assert sum(1 for n, _ in findings if n == "aws_access_key_id") == 1 |
| 117 | + |
| 118 | + |
| 119 | +class TestEntropyBackstop: |
| 120 | + def setup_method(self): |
| 121 | + self.s = _load_secrets() |
| 122 | + |
| 123 | + def test_high_entropy_secret_assignment_flagged(self): |
| 124 | + # 'db_credential' is in the entropy keyword set but is NOT a known-format |
| 125 | + # rule, so only the entropy backstop can catch this random value. |
| 126 | + names = {n for n, _ in self.s.scan_secrets("f.py", f'db_credential = "{_HIGH_ENTROPY}"\n')} |
| 127 | + assert "high_entropy_secret" in names |
| 128 | + |
| 129 | + def test_low_entropy_secret_named_value_not_flagged(self): |
| 130 | + # Long but low-entropy (repetitive) value assigned to a secret key. |
| 131 | + names = {n for n, _ in self.s.scan_secrets("f.py", 'password = "aaaaaaaaaaaaaaaaaaaaaaaa"\n')} |
| 132 | + assert "high_entropy_secret" not in names |
| 133 | + |
| 134 | + def test_shannon_entropy_sanity(self): |
| 135 | + assert self.s.shannon_entropy("") == 0.0 |
| 136 | + assert self.s.shannon_entropy("aaaaaaaa") < 1.0 |
| 137 | + assert self.s.shannon_entropy(_HIGH_ENTROPY) > 4.0 |
| 138 | + |
| 139 | + def test_entropy_skipped_when_known_secret_already_found(self): |
| 140 | + # AWS regex fires -> entropy backstop suppressed (no duplicate noise). |
| 141 | + names = {n for n, _ in self.s.scan_secrets("f.py", f'secret = "{_AWS_KEY}"\n')} |
| 142 | + assert "high_entropy_secret" not in names |
| 143 | + |
| 144 | + |
| 145 | +class TestFalsePositiveSanity: |
| 146 | + def setup_method(self): |
| 147 | + self.s = _load_secrets() |
| 148 | + |
| 149 | + def test_benign_code_no_findings(self): |
| 150 | + content = "def add(a, b):\n return a + b\n\nAPI_TIMEOUT = 30\n" |
| 151 | + assert self.s.scan_secrets("f.py", content) == [] |
| 152 | + |
| 153 | + def test_placeholder_api_key_not_flagged(self): |
| 154 | + assert self.s.scan_secrets("f.py", 'api_key = "your-api-key-here"\n') == [] |
| 155 | + |
| 156 | + def test_example_value_not_flagged(self): |
| 157 | + assert self.s.scan_secrets("f.py", 'token = "EXAMPLE_TOKEN_VALUE_1234567890"\n') == [] |
| 158 | + |
| 159 | + def test_empty_content_no_findings(self): |
| 160 | + assert self.s.scan_secrets("f.py", "") == [] |
| 161 | + |
| 162 | + def test_huge_content_skipped(self): |
| 163 | + big = "x = 1\n" * 60000 # > 256 KB |
| 164 | + assert self.s.scan_secrets("f.py", big) == [] |
| 165 | + |
| 166 | + |
| 167 | +class TestHookIntegration: |
| 168 | + def test_write_file_with_aws_key_warns(self, monkeypatch): |
| 169 | + monkeypatch.delenv("SECURITY_GUIDANCE_BLOCK", raising=False) |
| 170 | + monkeypatch.delenv("SECURITY_GUIDANCE_DISABLE", raising=False) |
| 171 | + mod = _load_plugin_init() |
| 172 | + args = {"path": "/tmp/config.py", "content": f'AWS = "{_AWS_KEY}"\n'} |
| 173 | + result = mod._on_transform_tool_result( |
| 174 | + tool_name="write_file", |
| 175 | + args=args, |
| 176 | + result='{"success": true, "bytes_written": 40}', |
| 177 | + ) |
| 178 | + assert isinstance(result, str) |
| 179 | + assert "Security guidance" in result |
| 180 | + assert "credential" in result.lower() |
| 181 | + |
| 182 | + def test_clean_write_no_warning(self, monkeypatch): |
| 183 | + monkeypatch.delenv("SECURITY_GUIDANCE_BLOCK", raising=False) |
| 184 | + monkeypatch.delenv("SECURITY_GUIDANCE_DISABLE", raising=False) |
| 185 | + mod = _load_plugin_init() |
| 186 | + args = {"path": "/tmp/ok.py", "content": "x = 1\n"} |
| 187 | + assert mod._on_transform_tool_result( |
| 188 | + tool_name="write_file", args=args, result='{"success": true}' |
| 189 | + ) is None |
| 190 | + |
| 191 | + def test_block_mode_refuses_write_with_secret(self, monkeypatch): |
| 192 | + monkeypatch.setenv("SECURITY_GUIDANCE_BLOCK", "1") |
| 193 | + monkeypatch.delenv("SECURITY_GUIDANCE_DISABLE", raising=False) |
| 194 | + mod = _load_plugin_init() |
| 195 | + args = {"path": "/tmp/config.py", "content": f'GH = "{_GH_TOKEN}"\n'} |
| 196 | + out = mod._on_pre_tool_call(tool_name="write_file", args=args) |
| 197 | + assert isinstance(out, dict) and out.get("action") == "block" |
0 commit comments