@@ -6,7 +6,23 @@ import (
66 "strings"
77)
88
9- var patterns = []* regexp.Regexp {
9+ // FilterLevel controls how aggressively content is filtered.
10+ type FilterLevel int
11+
12+ const (
13+ // Strict strips everything (emails, IPs, phones, secrets).
14+ Strict FilterLevel = iota
15+ // Moderate keeps infrastructure IPs (10.x, 192.168.x) and work-related emails.
16+ Moderate
17+ // Minimal only strips high-entropy secrets and explicit API keys.
18+ Minimal
19+ )
20+
21+ // DefaultFilterLevel is used by Filter() when no level is specified.
22+ var DefaultFilterLevel = Moderate
23+
24+ // secretPatterns are always stripped (all levels including Minimal).
25+ var secretPatterns = []* regexp.Regexp {
1026 // API keys
1127 regexp .MustCompile (`sk-[a-zA-Z0-9]{20,}` ), // OpenAI
1228 regexp .MustCompile (`AKIA[A-Z0-9]{16}` ), // AWS Access Key
@@ -26,23 +42,55 @@ var patterns = []*regexp.Regexp{
2642 regexp .MustCompile (`-----BEGIN\s+\w+\s+PRIVATE\s+KEY-----[\s\S]*?-----END\s+\w+\s+PRIVATE\s+KEY-----` ),
2743 // Connection strings with passwords
2844 regexp .MustCompile (`(?i)(postgres|mysql|mongodb)://[^\s"]+:[^\s"]+@[^\s"]+` ),
29- // PII: email addresses
30- regexp .MustCompile (`[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}` ),
45+ }
46+
47+ // piiPatterns are stripped by Strict and Moderate (but not Minimal).
48+ var piiPatterns = []* regexp.Regexp {
3149 // PII: US phone numbers
3250 regexp .MustCompile (`\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b` ),
3351 // PII: US Social Security Numbers
3452 regexp .MustCompile (`\b\d{3}-\d{2}-\d{4}\b` ),
35- // PII: IPv4 addresses (non-localhost, non-private documentation ranges)
36- regexp .MustCompile (`\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b` ),
3753 // PII: credit card numbers (basic Luhn-eligible patterns)
3854 regexp .MustCompile (`\b(?:\d[ -]*?){13,19}\b` ),
3955}
4056
41- // Filter replaces secrets in content with [REDACTED].
57+ // strictOnlyPatterns are only stripped in Strict mode.
58+ var strictOnlyPatterns = []* regexp.Regexp {
59+ // PII: email addresses (Moderate keeps work emails)
60+ regexp .MustCompile (`[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}` ),
61+ // PII: IPv4 addresses (Moderate keeps infrastructure IPs like 10.x, 192.168.x)
62+ regexp .MustCompile (`\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b` ),
63+ }
64+
65+ // patterns is kept for backward compatibility — contains all patterns (Strict behavior).
66+ var patterns = append (append (append ([]* regexp.Regexp {}, secretPatterns ... ), piiPatterns ... ), strictOnlyPatterns ... )
67+
68+ // Filter replaces secrets in content with [REDACTED] using DefaultFilterLevel.
4269func Filter (content string ) string {
43- for _ , p := range patterns {
70+ return FilterWithLevel (content , DefaultFilterLevel )
71+ }
72+
73+ // FilterWithLevel replaces secrets in content with [REDACTED] at the specified level.
74+ func FilterWithLevel (content string , level FilterLevel ) string {
75+ // All levels strip explicit secrets
76+ for _ , p := range secretPatterns {
4477 content = p .ReplaceAllString (content , "[REDACTED]" )
4578 }
79+
80+ // Moderate and Strict also strip PII patterns (phones, SSNs, credit cards)
81+ if level <= Moderate {
82+ for _ , p := range piiPatterns {
83+ content = p .ReplaceAllString (content , "[REDACTED]" )
84+ }
85+ }
86+
87+ // Strict strips everything including emails and all IPs
88+ if level == Strict {
89+ for _ , p := range strictOnlyPatterns {
90+ content = p .ReplaceAllString (content , "[REDACTED]" )
91+ }
92+ }
93+
4694 // Catch high-entropy tokens that regexes might miss.
4795 // Only target tokens that look like standalone secrets (no JSON, no code).
4896 for _ , word := range strings .Fields (content ) {
0 commit comments