@@ -13,10 +13,14 @@ import (
1313 "github.com/Pringled/agentcheck/internal/models"
1414)
1515
16- // nameRegexPatterns is compiled once at package init. It matches env var names that suggest
17- // they hold credentials for known providers or generic secret terms.
18- // Case-insensitive match on the full variable name.
19- var nameRegexPatterns = []* regexp.Regexp {
16+ // credentialSuffixRe matches env var names that contain a credential-related term.
17+ // Provider name patterns require this suffix to avoid false positives on non-credential
18+ // vars like GITHUB_WORKSPACE or OPENAI_BASE_URL.
19+ var credentialSuffixRe = regexp .MustCompile (`(?i)(KEY|TOKEN|SECRET|PASSWORD|CRED)` )
20+
21+ // providerNamePatterns matches env var names containing a known provider keyword.
22+ // These only produce a finding when the name also matches credentialSuffixRe.
23+ var providerNamePatterns = []* regexp.Regexp {
2024 // AI / ML providers
2125 regexp .MustCompile (`(?i)OPENAI` ),
2226 regexp .MustCompile (`(?i)ANTHROPIC` ),
@@ -81,15 +85,19 @@ var nameRegexPatterns = []*regexp.Regexp{
8185 regexp .MustCompile (`(?i)GITHUB` ),
8286 regexp .MustCompile (`(?i)GITLAB` ),
8387 regexp .MustCompile (`(?i)BITBUCKET` ),
84- // Productivity / project tools (common in agent contexts)
85- regexp .MustCompile (`(?i)(^|_)LINEAR_` ), // (^|_) avoids BILINEAR_FILTER while still matching MY_LINEAR_TOKEN
88+ // Productivity / project tools
89+ regexp .MustCompile (`(?i)(^|_)LINEAR_` ), // (^|_) avoids BILINEAR_FILTER while matching MY_LINEAR_TOKEN
8690 regexp .MustCompile (`(?i)NOTION` ),
8791 regexp .MustCompile (`(?i)AIRTABLE` ),
88- // Database-as-a-service (API keys / connection tokens)
92+ // Database-as-a-service
8993 regexp .MustCompile (`(?i)SUPABASE` ),
9094 regexp .MustCompile (`(?i)(^|_)NEON_` ), // (^|_) avoids ANEMONE_CONFIG, NEONLIGHTS_COLOR while matching MY_NEON_KEY
9195 regexp .MustCompile (`(?i)PLANETSCALE` ),
92- // Generic credential terms
96+ }
97+
98+ // credentialSuffixPatterns matches generic credential terms in env var names.
99+ // These match standalone without requiring a provider keyword.
100+ var credentialSuffixPatterns = []* regexp.Regexp {
93101 regexp .MustCompile (`(?i)API_KEY` ),
94102 regexp .MustCompile (`(?i)API_TOKEN` ),
95103 regexp .MustCompile (`(?i)SECRET_KEY` ),
@@ -109,15 +117,15 @@ type valuePattern struct {
109117
110118// valuePatterns lists known API key formats identified by a distinctive prefix and exact total length.
111119var valuePatterns = []valuePattern {
112- // OpenAI — more-specific prefixes listed first so they match before the generic sk- entry.
120+ // OpenAI - more-specific prefixes listed first so they match before the generic sk- entry.
113121 {prefix : "sk-proj-" , totalLen : 56 , severity : models .SeverityHigh , providerTag : "OpenAI project" },
114122 {prefix : "sk-admin-" , totalLen : 57 , severity : models .SeverityHigh , providerTag : "OpenAI admin" },
115- // sk- is shared by many tools (OpenAI legacy, LangChain proxies, self-hosted LLMs, … ).
123+ // sk- is shared by many tools (OpenAI legacy, LangChain proxies, self-hosted LLMs, etc. ).
116124 // Flag as UNCERTAIN so the user can confirm the actual provider via the variable name.
117125 {prefix : "sk-" , totalLen : 51 , severity : models .SeverityUncertain , providerTag : "possible OpenAI legacy or other sk- key" },
118- // Anthropic — prefix is distinctive enough for HIGH confidence.
126+ // Anthropic - prefix is distinctive enough for HIGH confidence.
119127 {prefix : "sk-ant-" , totalLen : 108 , severity : models .SeverityHigh , providerTag : "Anthropic" },
120- // Stripe — underscore separator makes these provider-specific.
128+ // Stripe - underscore separator makes these provider-specific.
121129 {prefix : "sk_live_" , totalLen : 55 , severity : models .SeverityHigh , providerTag : "Stripe live secret" },
122130 {prefix : "sk_test_" , totalLen : 55 , severity : models .SeverityHigh , providerTag : "Stripe test secret" },
123131 {prefix : "rk_live_" , totalLen : 55 , severity : models .SeverityHigh , providerTag : "Stripe live restricted" },
@@ -128,9 +136,8 @@ var valuePatterns = []valuePattern{
128136 {prefix : "npm_" , totalLen : 40 , severity : models .SeverityHigh , providerTag : "npm access token" },
129137 // Groq — gsk_ prefix confirmed in Groq docs.
130138 {prefix : "gsk_" , totalLen : 56 , severity : models .SeverityHigh , providerTag : "Groq" },
131- // Twilio API key SID — SK + 32 hex chars = 34 total.
132- // SeverityUncertain: the SK prefix is too broad (any 34-char string starting with SK
133- // would match); we don't validate the hex charset, so false positives are likely.
139+ // Twilio API key SID - SK + 32 hex chars = 34 total.
140+ // SeverityUncertain: SK prefix is broad, false positives are likely.
134141 {prefix : "SK" , totalLen : 34 , severity : models .SeverityUncertain , providerTag : "Twilio API key SID" },
135142 // SendGrid — SG. + 22 + . + 43 = 69 total (with the dots).
136143 {prefix : "SG." , totalLen : 69 , severity : models .SeverityHigh , providerTag : "SendGrid" },
@@ -160,9 +167,6 @@ var credentialFiles = []config.CredentialFile{
160167
161168// APIKeyScanner scans for high-risk API keys in environment variables and credential config files.
162169// Key names and file paths only are reported in findings; values and file contents are never emitted.
163- // Exception: scanValuePatterns transiently reads env var values solely for prefix+length pattern
164- // matching; values are discarded immediately and never stored in findings, logs, or any
165- // data structure. See scanValuePatterns for the full security contract.
166170// It never returns skipped=true.
167171type APIKeyScanner struct {
168172 Base
@@ -218,8 +222,6 @@ func (s *APIKeyScanner) Scan() models.ScanResult {
218222
219223// scanEnvKeys checks built-in and extra environment variable key names for presence.
220224// Key names only are reported; values are never read or stored.
221- // seenEnvNames is the shared cross-pass dedup set; matched names are added to it so
222- // that scanNameRegex and scanValuePatterns will skip variables already claimed here.
223225func (s * APIKeyScanner ) scanEnvKeys (seenEnvNames map [string ]bool ) []models.Finding {
224226 var findings []models.Finding
225227
@@ -284,30 +286,39 @@ func (s *APIKeyScanner) scanNameRegex(seenEnvNames map[string]bool) []models.Fin
284286 continue
285287 }
286288
287- for _ , re := range nameRegexPatterns {
288- if re .MatchString (name ) {
289- seenEnvNames [name ] = true
290- findings = append (findings , models.Finding {
291- Scanner : "api_keys" ,
292- Resource : name , // key name only, never the value
293- Severity : models .SeverityHigh ,
294- Description : "Can be used to make authenticated API calls." ,
295- })
289+ matched := false
290+ // Provider patterns require the name to also contain a credential suffix.
291+ for _ , re := range providerNamePatterns {
292+ if re .MatchString (name ) && credentialSuffixRe .MatchString (name ) {
293+ matched = true
296294 break
297295 }
298296 }
297+ // Credential suffix patterns match standalone.
298+ if ! matched {
299+ for _ , re := range credentialSuffixPatterns {
300+ if re .MatchString (name ) {
301+ matched = true
302+ break
303+ }
304+ }
305+ }
306+ if matched {
307+ seenEnvNames [name ] = true
308+ findings = append (findings , models.Finding {
309+ Scanner : "api_keys" ,
310+ Resource : name ,
311+ Severity : models .SeverityHigh ,
312+ Description : "Can be used to make authenticated API calls." ,
313+ })
314+ }
299315 }
300316
301317 return findings
302318}
303319
304320// scanValuePatterns reads env var values to match against known provider prefixes.
305- // NOTE: unlike scanEnvKeys and scanNameRegex, this method reads the actual value.
306- // Values are used only for prefix+length pattern matching and then discarded immediately.
307- // No value is stored in findings, logs, or returned data structures.
308- // This is a deliberate, scoped relaxation of the "values never read" contract.
309- // seenEnvNames is the shared cross-pass dedup set; names already claimed by scanNameRegex
310- // are skipped, and newly matched names are added.
321+ // Values are used only for prefix+length matching and then discarded.
311322func (s * APIKeyScanner ) scanValuePatterns (seenEnvNames map [string ]bool ) []models.Finding {
312323 var findings []models.Finding
313324
@@ -344,7 +355,6 @@ func (s *APIKeyScanner) scanValuePatterns(seenEnvNames map[string]bool) []models
344355 break // one finding per variable name
345356 }
346357 }
347- // value goes out of scope here; it is not stored anywhere
348358 }
349359
350360 return findings
@@ -359,9 +369,7 @@ func (s *APIKeyScanner) scanCredentialFiles() []models.Finding {
359369 // If home directory cannot be resolved, skip all ~-based paths to avoid
360370 // scanning incorrect root-relative paths (e.g. /.aws/credentials).
361371 homeDir := s .resolveHomeDir ()
362- // Combine built-in and extra credential files into a single pass.
363- // seenPath deduplicates so that an extra path duplicating a built-in
364- // (e.g. ~/.netrc in both lists) produces only one finding.
372+ // seenPath deduplicates built-in and extra paths.
365373 allCredFiles := append (credentialFiles , s .ExtraCredentialFiles ... )
366374 seenPath := make (map [string ]bool , len (allCredFiles ))
367375 for _ , cf := range allCredFiles {
@@ -386,11 +394,10 @@ func (s *APIKeyScanner) scanCredentialFiles() []models.Finding {
386394 return findings
387395}
388396
389- // envKeyFinding builds a HIGH severity finding for a detected environment variable key.
390397func envKeyFinding (key string ) models.Finding {
391398 return models.Finding {
392399 Scanner : "api_keys" ,
393- Resource : key , // key name only, never the value
400+ Resource : key ,
394401 Severity : models .SeverityHigh ,
395402 Description : "Can be used to make authenticated API calls." ,
396403 }
0 commit comments