PACT-Plugin/pact-plugin/hooks/git_commit_check.py at main · Synaptic-Labs-AI/PACT-Plugin · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
#!/usr/bin/env python3
"""
Location: pact-plugin/hooks/git_commit_check.py
Summary: PreToolUse hook that validates git commits for PACT protocol compliance.
Used by: Claude Code settings.json PreToolUse hook (matcher: Bash for git commit)

Enforces:
- SACROSANCT Rule 1: No credentials/secrets in committed files
- SACROSANCT Rule 2: No frontend credential exposure, backend proxy pattern
- .env file protection in .gitignore

Input: JSON from stdin with tool_input containing the command
Output: Exit code 2 to block, 0 to allow; errors to stderr
"""

from __future__ import annotations

# ─── stdlib first (used by _emit_load_failure_deny BEFORE wrapped imports) ─
import sys
import json
import re
from typing import NoReturn

_SUPPRESS_OUTPUT = json.dumps({"suppressOutput": True})


def _emit_load_failure_deny(stage: str, error: BaseException) -> NoReturn:
    """Stdlib-only fail-closed deny for module-load failure. Mirrors the
    ``dispatch_gate`` / ``bootstrap_gate`` analogue.

    Without this, a raise from the cross-package imports below would crash the
    hook (exit 1), which the platform treats as a NON-blocking PreToolUse hook
    — the Bash tool would PROCEED and the commit-compliance gate (credential
    scanning included) would silently FAIL-OPEN. Emitting a deny + exit 2
    keeps the gate fail-CLOSED. hookEventName MUST be present.
    """
    print(json.dumps({
        "hookSpecificOutput": {
            "hookEventName": "PreToolUse",
            "permissionDecision": "deny",
            "permissionDecisionReason": (
                f"PACT git_commit_check {stage} failure — blocking for safety. "
                f"{type(error).__name__}: {error}. Check hook installation "
                "and shared module availability."
            ),
        }
    }))
    print(
        f"Hook load error (git_commit_check / {stage}): {error}",
        file=sys.stderr,
    )
    sys.exit(2)


# ─── fail-closed wrapper on cross-package imports ──────────────────────────
try:
    from shared.error_output import hook_error_json
    from shared.git_helpers import run_git
except BaseException as _module_load_error:  # noqa: BLE001 — fail-closed catch-all
    _emit_load_failure_deny("module imports", _module_load_error)


def get_staged_files():
    """Returns a list of staged files, EXCLUDING deletions.

    `--diff-filter=d` excludes deletion-only stagings so security scans (which
    inspect staged content for secrets / .env paths) do not flag a user's
    `git rm --cached <file>` remediation. The deleted path has no staged
    content to scan and no new secret to leak — excluding at the source of
    truth keeps downstream checks (check_security, check_hardcoded_secrets,
    check_frontend_credentials, check_direct_api_calls) correct by default.

    Fail-open empty list on any subprocess failure.
    """
    result = run_git(["diff", "--name-only", "--cached", "--diff-filter=d"])
    if result is None or result.returncode != 0:
        return []
    return result.stdout.strip().splitlines()


def get_staged_file_content(filename):
    """Returns the content of a staged file. Fail-open empty string on any subprocess failure."""
    result = run_git(["show", f":{filename}"])
    if result is None or result.returncode != 0:
        return ""
    return result.stdout


def check_security(staged_files):
    """
    Check for basic security violations in staged files.

    Args:
        staged_files: List of staged file paths

    Returns:
        List of error messages for any violations found
    """
    errors = []

    # 1. Check for .env files being committed
    for f in staged_files:
        if f.endswith('.env') or '/.env' in f or f.startswith('.env'):
            errors.append(
                f"SACROSANCT VIOLATION: Attempting to commit environment file: {f}. "
                "If this is a template, rename to env.example (no leading dot) "
                "to commit as a template file."
            )

    # 2. Check for sensitive data in logs
    risky_patterns = [
        r'console\.log\s*\(.*process\.env',
        r'print\s*\(.*os\.environ',
        r'console\.log\s*\(.*password',
        r'print\s*\(.*password',
        r'console\.log\s*\(.*secret',
        r'print\s*\(.*secret',
        r'console\.log\s*\(.*api[_-]?key',
        r'print\s*\(.*api[_-]?key',
        r'console\.log\s*\(.*token',
        r'print\s*\(.*token',
    ]

    code_extensions = ('.js', '.ts', '.jsx', '.tsx', '.py', '.mjs', '.cjs')

    for f in staged_files:
        if f.endswith(code_extensions):
            content = get_staged_file_content(f)
            for pattern in risky_patterns:
                if re.search(pattern, content, re.IGNORECASE):
                    errors.append(
                        f"SECURITY: Potential secret exposure in log in {f}: "
                        f"matches pattern '{pattern}'"
                    )

    return errors


def check_frontend_credentials(staged_files):
    """
    SACROSANCT Rule 2: Check for credential exposure in frontend code.

    Frontend environment variables with credential suffixes should not be used
    as they expose credentials in client-side bundles.

    Args:
        staged_files: List of staged file paths

    Returns:
        List of error messages for any violations found
    """
    errors = []

    # Patterns indicating credential usage in frontend env vars
    credential_patterns = [
        r'VITE_[A-Z_]*(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH)',
        r'REACT_APP_[A-Z_]*(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH)',
        r'NEXT_PUBLIC_[A-Z_]*(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH)',
        r'NUXT_PUBLIC_[A-Z_]*(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH)',
        r'process\.env\.(VITE_|REACT_APP_|NEXT_PUBLIC_|NUXT_PUBLIC_)[A-Z_]*(?:KEY|SECRET|TOKEN)',
        r'import\.meta\.env\.(VITE_)[A-Z_]*(?:KEY|SECRET|TOKEN)',
    ]

    # Frontend file extensions
    frontend_extensions = {'.jsx', '.tsx', '.vue', '.svelte'}
    # Also check .js and .ts if they're in frontend directories
    frontend_dirs = {'src', 'components', 'pages', 'app', 'frontend', 'client', 'ui'}

    for f in staged_files:
        is_frontend_ext = any(f.endswith(ext) for ext in frontend_extensions)
        is_frontend_dir = any(
            f'/{d}/' in f or f.startswith(f'{d}/') for d in frontend_dirs
        )

        # Check frontend-specific files or JS/TS in frontend directories
        should_check = is_frontend_ext or (
            f.endswith(('.js', '.ts')) and is_frontend_dir
        )

        if should_check:
            content = get_staged_file_content(f)
            for pattern in credential_patterns:
                matches = re.findall(pattern, content, re.IGNORECASE)
                if matches:
                    errors.append(
                        f"SACROSANCT VIOLATION: Frontend credential exposure in {f}. "
                        f"Found: {matches[0]}. Credentials must NEVER be in frontend code. "
                        "Use backend proxy pattern instead."
                    )

    return errors


def check_direct_api_calls(staged_files):
    """
    SACROSANCT Rule 2: Warn about potential direct API calls from frontend.

    Frontend code should call backend endpoints, not external APIs directly
    (which would require credentials in frontend).

    Args:
        staged_files: List of staged file paths

    Returns:
        List of warning messages (non-blocking)
    """
    warnings = []

    # Patterns suggesting direct external API calls
    direct_api_patterns = [
        (r'fetch\s*\(\s*[\'"`]https?://api\.', 'fetch to external API'),
        (r'axios\.[a-z]+\s*\(\s*[\'"`]https?://api\.', 'axios to external API'),
        (r'fetch\s*\(\s*[\'"`]https?://[^/]*\.stripe\.com', 'direct Stripe API call'),
        (r'fetch\s*\(\s*[\'"`]https?://[^/]*\.openai\.com', 'direct OpenAI API call'),
        (r'fetch\s*\(\s*[\'"`]https?://[^/]*\.anthropic\.com', 'direct Anthropic API call'),
        (r'fetch\s*\(\s*[\'"`]https?://[^/]*\.github\.com/(?!repos/[^/]+/[^/]+$)', 'direct GitHub API call'),
        (r'fetch\s*\(\s*[\'"`]https?://[^/]*\.googleapis\.com', 'direct Google API call'),
    ]

    # Frontend file extensions and directories
    frontend_extensions = {'.jsx', '.tsx', '.vue', '.svelte', '.js', '.ts'}
    frontend_dirs = {'src', 'components', 'pages', 'app', 'frontend', 'client', 'ui'}
    # Backend directories to exclude
    backend_dirs = {'server', 'api', 'backend', 'lib', 'services', 'handlers'}

    for f in staged_files:
        is_frontend_ext = any(f.endswith(ext) for ext in frontend_extensions)
        is_frontend_dir = any(
            f'/{d}/' in f or f.startswith(f'{d}/') for d in frontend_dirs
        )
        is_backend = any(
            f'/{d}/' in f or f.startswith(f'{d}/') for d in backend_dirs
        )

        # Only warn for frontend files, not backend
        if is_frontend_ext and is_frontend_dir and not is_backend:
            content = get_staged_file_content(f)
            for pattern, description in direct_api_patterns:
                if re.search(pattern, content, re.IGNORECASE):
                    warnings.append(
                        f"SACROSANCT Warning: Potential {description} in {f}. "
                        "Verify backend proxy pattern is used."
                    )
                    break  # One warning per file

    return warnings


def check_env_file_in_gitignore():
    """
    Verify .env is ignored by git's full ignore chain (global excludes,
    per-repo excludes, parent-dir .gitignores, repo-root .gitignore).

    Delegates to `git check-ignore -q .env` rather than reading .gitignore
    directly, which closes ignore-chain false negatives (global excludes,
    .git/info/exclude, parent .gitignores) and `!.env` false positives.

    Fail-open posture on detection-mechanism errors: returns
    (False, "SACROSANCT WARNING: ..."). The WARNING substring routes to
    main()'s warnings list (non-blocking). The complementary staged-file
    check (check_security) independently blocks .env committed files, so
    a warning here is safe.

    Returns:
        Tuple of (is_protected, error_message or None)
    """
    result = run_git(["check-ignore", "-q", ".env"])
    if result is None:
        # run_git collapses TimeoutExpired and FileNotFoundError into None.
        # Both resolve to the same user-actionable remediation
        # ("make sure git is installed and functional"), so a single merged
        # WARNING covers both cases. See arch §8 (wording-merge decision).
        return False, (
            "SACROSANCT WARNING: 'git check-ignore' could not be invoked "
            "(timeout or git binary missing); cannot verify .env protection."
        )

    if result.returncode == 0:
        return True, None
    if result.returncode == 1:
        return False, (
            "SACROSANCT VIOLATION: .env is not ignored by git. "
            "Add '.env' to .gitignore (repo), ~/.config/git/ignore (global), "
            "or .git/info/exclude (per-repo private). "
            "If .env already appears in .gitignore, you may have tracked it "
            "previously; run 'git rm --cached .env' to untrack."
        )
    if result.returncode == 128:
        return False, (
            "SACROSANCT WARNING: 'git check-ignore' reports not in a git repo "
            "(exit 128); cannot verify .env protection."
        )
    return False, (
        f"SACROSANCT WARNING: 'git check-ignore' exited {result.returncode}; "
        "cannot verify .env protection."
    )


def check_hardcoded_secrets(staged_files):
    """
    Check for hardcoded secrets and API keys in code.

    Args:
        staged_files: List of staged file paths

    Returns:
        List of error messages for any violations found
    """
    errors = []

    # Patterns that suggest hardcoded secrets
    secret_patterns = [
        # API keys with common prefixes
        (r'["\']sk-[a-zA-Z0-9]{20,}["\']', 'OpenAI API key'),
        (r'["\']sk_live_[a-zA-Z0-9]{20,}["\']', 'Stripe live key'),
        (r'["\']sk_test_[a-zA-Z0-9]{20,}["\']', 'Stripe test key'),
        (r'["\']ghp_[a-zA-Z0-9]{36,}["\']', 'GitHub personal access token'),
        (r'["\']gho_[a-zA-Z0-9]{36,}["\']', 'GitHub OAuth token'),
        (r'["\']xox[baprs]-[a-zA-Z0-9-]{10,}["\']', 'Slack token'),
        # Anthropic API keys (start with sk-ant-api)
        (r'["\']sk-ant-api[a-zA-Z0-9_-]{20,}["\']', 'Anthropic API key'),
        # Google API keys (start with AIza)
        (r'["\']AIza[a-zA-Z0-9_-]{30,}["\']', 'Google API key'),
        # Twilio Account SID (starts with AC followed by 32 hex chars)
        (r'["\']AC[a-f0-9]{32}["\']', 'Twilio Account SID'),
        # AWS access key IDs (always start with AKIA for long-term keys)
        (r'["\']AKIA[0-9A-Z]{16}["\']', 'AWS access key ID'),
        # Private key headers (PEM format)
        (r'-----BEGIN\s+(?:RSA\s+|EC\s+|DSA\s+|OPENSSH\s+)?PRIVATE\s+KEY-----', 'Private key'),
        # JWT tokens (three base64url segments separated by dots)
        (r'["\']eyJ[a-zA-Z0-9_-]{10,}\.eyJ[a-zA-Z0-9_-]{10,}\.[a-zA-Z0-9_-]{10,}["\']', 'JWT token'),
        # Google Cloud service account key (JSON key file marker)
        (r'"type"\s*:\s*"service_account"', 'Google Cloud service account key'),
        # Azure connection strings
        (r'DefaultEndpointsProtocol=https?;AccountName=[^;]+;AccountKey=[^;]+', 'Azure Storage connection string'),
        (r'Server=tcp:[^;]+;.*Password=[^;]+', 'Azure SQL connection string'),
        # Generic patterns
        (r'api[_-]?key\s*[=:]\s*["\'][a-zA-Z0-9]{20,}["\']', 'API key assignment'),
        (r'secret[_-]?key\s*[=:]\s*["\'][a-zA-Z0-9]{20,}["\']', 'Secret key assignment'),
        (r'password\s*[=:]\s*["\'][^"\']{8,}["\']', 'Hardcoded password'),
    ]

    code_extensions = ('.js', '.ts', '.jsx', '.tsx', '.py', '.java', '.go', '.rs', '.rb')

    for f in staged_files:
        if f.endswith(code_extensions):
            content = get_staged_file_content(f)
            for pattern, description in secret_patterns:
                matches = re.findall(pattern, content, re.IGNORECASE)
                if matches:
                    # Truncate the match for display
                    match_preview = matches[0][:30] + '...' if len(matches[0]) > 30 else matches[0]
                    errors.append(
                        f"SACROSANCT VIOLATION: Potential {description} in {f}: {match_preview}"
                    )

    return errors


def main():
    try:
        # Read input from stdin
        input_data = json.load(sys.stdin)
        tool_input = input_data.get("tool_input", {})
        command = tool_input.get("command", "")

        # Check if the command is a git commit
        if not re.search(r'\bgit\s+commit\b', command):
            print(_SUPPRESS_OUTPUT)
            sys.exit(0)  # Not a commit command, allow it

        staged_files = get_staged_files()

        # If no files are staged, let git handle the error
        if not staged_files:
            print(_SUPPRESS_OUTPUT)
            sys.exit(0)

        # Collect all errors and warnings
        security_errors = []
        warnings = []

        # --- SACROSANCT Security Checks ---

        # Basic security check (env files, logging secrets)
        security_errors.extend(check_security(staged_files))

        # SACROSANCT Rule 1: Check for hardcoded secrets
        security_errors.extend(check_hardcoded_secrets(staged_files))

        # SACROSANCT Rule 2: Frontend credential exposure
        security_errors.extend(check_frontend_credentials(staged_files))

        # SACROSANCT Rule 2: Direct API call warnings (non-blocking)
        warnings.extend(check_direct_api_calls(staged_files))

        # Check .gitignore protection for .env files
        env_protected, env_error = check_env_file_in_gitignore()
        if env_error:
            if "VIOLATION" in env_error:
                security_errors.append(env_error)
            else:
                warnings.append(env_error)

        # --- Output Warnings (non-blocking) ---
        if warnings:
            print("PACT Security Warnings:", file=sys.stderr)
            print("-" * 30, file=sys.stderr)
            for w in warnings:
                print(f"  * {w}", file=sys.stderr)
            print("-" * 30, file=sys.stderr)
            print("Review these warnings before deployment.", file=sys.stderr)
            print("", file=sys.stderr)

        # --- Block on Security Errors ---
        if security_errors:
            print("Error: PACT Security Violation", file=sys.stderr)
            print("=" * 40, file=sys.stderr)
            for err in security_errors:
                print(f"* {err}", file=sys.stderr)
            print("=" * 40, file=sys.stderr)
            print("Please fix security issues before committing.", file=sys.stderr)
            print("See SACROSANCT rules in CLAUDE.md for guidance.", file=sys.stderr)
            sys.exit(2)  # Block the tool execution

        print(_SUPPRESS_OUTPUT)
        sys.exit(0)  # Allow the commit

    except Exception as e:
        # If something goes wrong in the hook, log it but don't block
        print(f"Hook Error (git_commit_check): {e}", file=sys.stderr)
        print(hook_error_json("git_commit_check", e))
        sys.exit(0)


if __name__ == "__main__":
    main()