-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathscan-secrets.sh
More file actions
executable file
·121 lines (108 loc) · 3.9 KB
/
Copy pathscan-secrets.sh
File metadata and controls
executable file
·121 lines (108 loc) · 3.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env bash
set -euo pipefail
# scan-secrets.sh — Detect accidentally-staged API keys / secrets.
#
# Usage:
# scripts/scan-secrets.sh # scan git index (staged files)
# scripts/scan-secrets.sh --tracked # scan all tracked files
# scripts/scan-secrets.sh --all <path> # scan arbitrary path recursively
#
# Returns non-zero exit code if any match is found, and prints the offending
# file / line to stderr. Designed to be wired into pre-commit and CI.
#
# Pattern policy: we match on PROVIDER-SPECIFIC prefixes rather than generic
# "looks like a key" regexes. This keeps false positives close to zero for
# checked-in fixtures, tests, and docs.
# Patterns to match. Format: "label|regex"
# Extend this list whenever you add a new provider to qmd.
PATTERNS=(
"Jina API key|jina_[A-Za-z0-9]{32,}"
"OpenAI API key|sk-[A-Za-z0-9]{20,}"
"Anthropic API key|sk-ant-[A-Za-z0-9_-]{20,}"
"Voyage API key|pa-[A-Za-z0-9]{20,}"
"Cohere API key|co-[A-Za-z0-9]{20,}"
"GitHub token|gh[pousr]_[A-Za-z0-9]{20,}"
"AWS access key|AKIA[0-9A-Z]{16}"
"Generic private key block|-----BEGIN (RSA |EC |OPENSSH |DSA |)PRIVATE KEY-----"
)
# Files we explicitly allow to contain secret-looking strings (tests, docs
# that show the *shape* of a key but use an obvious placeholder).
# Match against the repo-relative path.
ALLOWLIST_REGEX='^(\.env\.example|test/.*\.test\.ts|README\.md|CHANGELOG\.md|scripts/scan-secrets\.sh)$'
MODE="${1:-}"
TARGET_PATH="${2:-.}"
mode_desc() {
case "$MODE" in
--tracked) echo "all tracked files" ;;
--all) echo "path: $TARGET_PATH" ;;
*) echo "staged files" ;;
esac
}
# Collect the file list based on mode.
collect_files() {
case "$MODE" in
--tracked)
git ls-files
;;
--all)
# Recursively list files under $TARGET_PATH, excluding node_modules/.git/dist.
find "$TARGET_PATH" -type f \
-not -path '*/node_modules/*' \
-not -path '*/.git/*' \
-not -path '*/dist/*'
;;
*)
# Staged files only (pre-commit mode). Skip deletions.
git diff --cached --name-only --diff-filter=ACMR
;;
esac
}
FILES=$(collect_files)
if [[ -z "$FILES" ]]; then
exit 0
fi
FOUND=0
while IFS= read -r file; do
[[ -z "$file" ]] && continue
[[ ! -f "$file" ]] && continue
# Skip allowlisted paths (e.g. .env.example which documents the shape).
if [[ "$file" =~ $ALLOWLIST_REGEX ]]; then
continue
fi
for entry in "${PATTERNS[@]}"; do
label="${entry%%|*}"
regex="${entry#*|}"
# -E for extended regex, -H to prefix filename, -n for line number,
# -o to show only the match (keeps output compact). -I skips binary.
# Get filename:line pairs for matches (no -o so we can see the line location
# without the full match leaking into output).
if matches=$(grep -HnI -E "$regex" "$file" 2>/dev/null); then
if [[ -n "$matches" ]]; then
echo >&2 ""
echo >&2 "🚨 POTENTIAL SECRET DETECTED: $label"
while IFS= read -r hit; do
# Extract file:line (first two colon-separated fields) and mask
# the rest to avoid re-leaking the key in terminal output.
location=$(echo "$hit" | awk -F: '{print $1 ":" $2}')
echo >&2 " $location (key redacted — check the file)"
done <<< "$matches"
FOUND=$((FOUND + 1))
fi
fi
done
done <<< "$FILES"
if [[ $FOUND -gt 0 ]]; then
echo >&2 ""
echo >&2 "❌ Found $FOUND potential secret(s) in $(mode_desc)."
echo >&2 ""
echo >&2 "What to do:"
echo >&2 " 1. Remove the secret from the file (move to .env / env var)."
echo >&2 " 2. Re-stage and re-run this check."
echo >&2 " 3. If the secret was ever committed, rotate it IMMEDIATELY"
echo >&2 " at the provider — git history is forever."
echo >&2 ""
echo >&2 "If this is a false positive, add the path to ALLOWLIST_REGEX"
echo >&2 "in scripts/scan-secrets.sh (use sparingly)."
exit 1
fi
exit 0