Skip to content

Commit c7b9c00

Browse files
committed
Fixes
1 parent 602fbdd commit c7b9c00

2 files changed

Lines changed: 68 additions & 93 deletions

File tree

internal/scan/apikeys.go

Lines changed: 48 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,14 @@ import (
1313
"github.com/Pringled/agentcheck/internal/models"
1414
)
1515

16-
// nameRegexPatterns is compiled once at package init. It matches env var names that suggest
17-
// they hold credentials for known providers or generic secret terms.
18-
// Case-insensitive match on the full variable name.
19-
var nameRegexPatterns = []*regexp.Regexp{
16+
// credentialSuffixRe matches env var names that contain a credential-related term.
17+
// Provider name patterns require this suffix to avoid false positives on non-credential
18+
// vars like GITHUB_WORKSPACE or OPENAI_BASE_URL.
19+
var credentialSuffixRe = regexp.MustCompile(`(?i)(KEY|TOKEN|SECRET|PASSWORD|CRED)`)
20+
21+
// providerNamePatterns matches env var names containing a known provider keyword.
22+
// These only produce a finding when the name also matches credentialSuffixRe.
23+
var providerNamePatterns = []*regexp.Regexp{
2024
// AI / ML providers
2125
regexp.MustCompile(`(?i)OPENAI`),
2226
regexp.MustCompile(`(?i)ANTHROPIC`),
@@ -81,15 +85,19 @@ var nameRegexPatterns = []*regexp.Regexp{
8185
regexp.MustCompile(`(?i)GITHUB`),
8286
regexp.MustCompile(`(?i)GITLAB`),
8387
regexp.MustCompile(`(?i)BITBUCKET`),
84-
// Productivity / project tools (common in agent contexts)
85-
regexp.MustCompile(`(?i)(^|_)LINEAR_`), // (^|_) avoids BILINEAR_FILTER while still matching MY_LINEAR_TOKEN
88+
// Productivity / project tools
89+
regexp.MustCompile(`(?i)(^|_)LINEAR_`), // (^|_) avoids BILINEAR_FILTER while matching MY_LINEAR_TOKEN
8690
regexp.MustCompile(`(?i)NOTION`),
8791
regexp.MustCompile(`(?i)AIRTABLE`),
88-
// Database-as-a-service (API keys / connection tokens)
92+
// Database-as-a-service
8993
regexp.MustCompile(`(?i)SUPABASE`),
9094
regexp.MustCompile(`(?i)(^|_)NEON_`), // (^|_) avoids ANEMONE_CONFIG, NEONLIGHTS_COLOR while matching MY_NEON_KEY
9195
regexp.MustCompile(`(?i)PLANETSCALE`),
92-
// Generic credential terms
96+
}
97+
98+
// credentialSuffixPatterns matches generic credential terms in env var names.
99+
// These match standalone without requiring a provider keyword.
100+
var credentialSuffixPatterns = []*regexp.Regexp{
93101
regexp.MustCompile(`(?i)API_KEY`),
94102
regexp.MustCompile(`(?i)API_TOKEN`),
95103
regexp.MustCompile(`(?i)SECRET_KEY`),
@@ -109,15 +117,15 @@ type valuePattern struct {
109117

110118
// valuePatterns lists known API key formats identified by a distinctive prefix and exact total length.
111119
var valuePatterns = []valuePattern{
112-
// OpenAI more-specific prefixes listed first so they match before the generic sk- entry.
120+
// OpenAI - more-specific prefixes listed first so they match before the generic sk- entry.
113121
{prefix: "sk-proj-", totalLen: 56, severity: models.SeverityHigh, providerTag: "OpenAI project"},
114122
{prefix: "sk-admin-", totalLen: 57, severity: models.SeverityHigh, providerTag: "OpenAI admin"},
115-
// sk- is shared by many tools (OpenAI legacy, LangChain proxies, self-hosted LLMs, ).
123+
// sk- is shared by many tools (OpenAI legacy, LangChain proxies, self-hosted LLMs, etc.).
116124
// Flag as UNCERTAIN so the user can confirm the actual provider via the variable name.
117125
{prefix: "sk-", totalLen: 51, severity: models.SeverityUncertain, providerTag: "possible OpenAI legacy or other sk- key"},
118-
// Anthropic prefix is distinctive enough for HIGH confidence.
126+
// Anthropic - prefix is distinctive enough for HIGH confidence.
119127
{prefix: "sk-ant-", totalLen: 108, severity: models.SeverityHigh, providerTag: "Anthropic"},
120-
// Stripe underscore separator makes these provider-specific.
128+
// Stripe - underscore separator makes these provider-specific.
121129
{prefix: "sk_live_", totalLen: 55, severity: models.SeverityHigh, providerTag: "Stripe live secret"},
122130
{prefix: "sk_test_", totalLen: 55, severity: models.SeverityHigh, providerTag: "Stripe test secret"},
123131
{prefix: "rk_live_", totalLen: 55, severity: models.SeverityHigh, providerTag: "Stripe live restricted"},
@@ -128,9 +136,8 @@ var valuePatterns = []valuePattern{
128136
{prefix: "npm_", totalLen: 40, severity: models.SeverityHigh, providerTag: "npm access token"},
129137
// Groq — gsk_ prefix confirmed in Groq docs.
130138
{prefix: "gsk_", totalLen: 56, severity: models.SeverityHigh, providerTag: "Groq"},
131-
// Twilio API key SID — SK + 32 hex chars = 34 total.
132-
// SeverityUncertain: the SK prefix is too broad (any 34-char string starting with SK
133-
// would match); we don't validate the hex charset, so false positives are likely.
139+
// Twilio API key SID - SK + 32 hex chars = 34 total.
140+
// SeverityUncertain: SK prefix is broad, false positives are likely.
134141
{prefix: "SK", totalLen: 34, severity: models.SeverityUncertain, providerTag: "Twilio API key SID"},
135142
// SendGrid — SG. + 22 + . + 43 = 69 total (with the dots).
136143
{prefix: "SG.", totalLen: 69, severity: models.SeverityHigh, providerTag: "SendGrid"},
@@ -160,9 +167,6 @@ var credentialFiles = []config.CredentialFile{
160167

161168
// APIKeyScanner scans for high-risk API keys in environment variables and credential config files.
162169
// Key names and file paths only are reported in findings; values and file contents are never emitted.
163-
// Exception: scanValuePatterns transiently reads env var values solely for prefix+length pattern
164-
// matching; values are discarded immediately and never stored in findings, logs, or any
165-
// data structure. See scanValuePatterns for the full security contract.
166170
// It never returns skipped=true.
167171
type APIKeyScanner struct {
168172
Base
@@ -218,8 +222,6 @@ func (s *APIKeyScanner) Scan() models.ScanResult {
218222

219223
// scanEnvKeys checks built-in and extra environment variable key names for presence.
220224
// Key names only are reported; values are never read or stored.
221-
// seenEnvNames is the shared cross-pass dedup set; matched names are added to it so
222-
// that scanNameRegex and scanValuePatterns will skip variables already claimed here.
223225
func (s *APIKeyScanner) scanEnvKeys(seenEnvNames map[string]bool) []models.Finding {
224226
var findings []models.Finding
225227

@@ -284,30 +286,39 @@ func (s *APIKeyScanner) scanNameRegex(seenEnvNames map[string]bool) []models.Fin
284286
continue
285287
}
286288

287-
for _, re := range nameRegexPatterns {
288-
if re.MatchString(name) {
289-
seenEnvNames[name] = true
290-
findings = append(findings, models.Finding{
291-
Scanner: "api_keys",
292-
Resource: name, // key name only, never the value
293-
Severity: models.SeverityHigh,
294-
Description: "Can be used to make authenticated API calls.",
295-
})
289+
matched := false
290+
// Provider patterns require the name to also contain a credential suffix.
291+
for _, re := range providerNamePatterns {
292+
if re.MatchString(name) && credentialSuffixRe.MatchString(name) {
293+
matched = true
296294
break
297295
}
298296
}
297+
// Credential suffix patterns match standalone.
298+
if !matched {
299+
for _, re := range credentialSuffixPatterns {
300+
if re.MatchString(name) {
301+
matched = true
302+
break
303+
}
304+
}
305+
}
306+
if matched {
307+
seenEnvNames[name] = true
308+
findings = append(findings, models.Finding{
309+
Scanner: "api_keys",
310+
Resource: name,
311+
Severity: models.SeverityHigh,
312+
Description: "Can be used to make authenticated API calls.",
313+
})
314+
}
299315
}
300316

301317
return findings
302318
}
303319

304320
// scanValuePatterns reads env var values to match against known provider prefixes.
305-
// NOTE: unlike scanEnvKeys and scanNameRegex, this method reads the actual value.
306-
// Values are used only for prefix+length pattern matching and then discarded immediately.
307-
// No value is stored in findings, logs, or returned data structures.
308-
// This is a deliberate, scoped relaxation of the "values never read" contract.
309-
// seenEnvNames is the shared cross-pass dedup set; names already claimed by scanNameRegex
310-
// are skipped, and newly matched names are added.
321+
// Values are used only for prefix+length matching and then discarded.
311322
func (s *APIKeyScanner) scanValuePatterns(seenEnvNames map[string]bool) []models.Finding {
312323
var findings []models.Finding
313324

@@ -344,7 +355,6 @@ func (s *APIKeyScanner) scanValuePatterns(seenEnvNames map[string]bool) []models
344355
break // one finding per variable name
345356
}
346357
}
347-
// value goes out of scope here; it is not stored anywhere
348358
}
349359

350360
return findings
@@ -359,9 +369,7 @@ func (s *APIKeyScanner) scanCredentialFiles() []models.Finding {
359369
// If home directory cannot be resolved, skip all ~-based paths to avoid
360370
// scanning incorrect root-relative paths (e.g. /.aws/credentials).
361371
homeDir := s.resolveHomeDir()
362-
// Combine built-in and extra credential files into a single pass.
363-
// seenPath deduplicates so that an extra path duplicating a built-in
364-
// (e.g. ~/.netrc in both lists) produces only one finding.
372+
// seenPath deduplicates built-in and extra paths.
365373
allCredFiles := append(credentialFiles, s.ExtraCredentialFiles...)
366374
seenPath := make(map[string]bool, len(allCredFiles))
367375
for _, cf := range allCredFiles {
@@ -386,11 +394,10 @@ func (s *APIKeyScanner) scanCredentialFiles() []models.Finding {
386394
return findings
387395
}
388396

389-
// envKeyFinding builds a HIGH severity finding for a detected environment variable key.
390397
func envKeyFinding(key string) models.Finding {
391398
return models.Finding{
392399
Scanner: "api_keys",
393-
Resource: key, // key name only, never the value
400+
Resource: key,
394401
Severity: models.SeverityHigh,
395402
Description: "Can be used to make authenticated API calls.",
396403
}

internal/scan/apikeys_test.go

Lines changed: 20 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@ func clearHighRiskEnv(t *testing.T) {
1919
}
2020

2121
// clearAllEnv sets every environment variable to empty for the duration of the test.
22-
// Use this in tests that assert 0 findings, since nameRegex patterns (e.g. (?i)GITHUB)
23-
// can match CI variables like GITHUB_WORKSPACE that aren't credentials.
24-
// t.Setenv restores original values after the test.
2522
func clearAllEnv(t *testing.T) {
2623
t.Helper()
2724
for _, entry := range os.Environ() {
@@ -31,7 +28,6 @@ func clearAllEnv(t *testing.T) {
3128
}
3229
}
3330

34-
// newScannerWithHome creates an APIKeyScanner with HomeDir set to home and no extras.
3531
func newScannerWithHome(home string) *scan.APIKeyScanner {
3632
s := scan.NewAPIKeyScanner()
3733
s.HomeDir = home
@@ -342,11 +338,6 @@ func TestAPIKeyScanner_ExtraCredentialFiles_TildeExpanded(t *testing.T) {
342338
assertResource(t, result.Findings, tokenFile)
343339
}
344340

345-
// ── Name-regex tests ──────────────────────────────────────────────────────────
346-
347-
// TestAPIKeyScanner_NameRegex_ProviderKeyword verifies that an env var with a
348-
// provider keyword in its name (MY_OPENAI_KEY) is flagged even though it is not
349-
// in HighRiskEnvKeys.
350341
func TestAPIKeyScanner_NameRegex_ProviderKeyword(t *testing.T) {
351342
t.Setenv("MY_OPENAI_KEY", "sk-something")
352343
clearHighRiskEnv(t)
@@ -357,8 +348,6 @@ func TestAPIKeyScanner_NameRegex_ProviderKeyword(t *testing.T) {
357348
assertResource(t, result.Findings, "MY_OPENAI_KEY")
358349
}
359350

360-
// TestAPIKeyScanner_NameRegex_GenericTerm verifies that an env var containing a
361-
// generic credential term (INTERNAL_API_KEY) is flagged.
362351
func TestAPIKeyScanner_NameRegex_GenericTerm(t *testing.T) {
363352
t.Setenv("INTERNAL_API_KEY", "secret")
364353
clearHighRiskEnv(t)
@@ -369,9 +358,6 @@ func TestAPIKeyScanner_NameRegex_GenericTerm(t *testing.T) {
369358
assertResource(t, result.Findings, "INTERNAL_API_KEY")
370359
}
371360

372-
// TestAPIKeyScanner_NameRegex_NoDuplicateWithBuiltin verifies that a key already in
373-
// HighRiskEnvKeys (OPENAI_API_KEY) produces exactly ONE finding — scanEnvKeys() gets it
374-
// and scanNameRegex() skips it.
375361
func TestAPIKeyScanner_NameRegex_NoDuplicateWithBuiltin(t *testing.T) {
376362
t.Setenv("OPENAI_API_KEY", "sk-test")
377363
// Clear all built-in keys except OPENAI_API_KEY.
@@ -395,12 +381,6 @@ func TestAPIKeyScanner_NameRegex_NoDuplicateWithBuiltin(t *testing.T) {
395381
}
396382
}
397383

398-
// ── Value-pattern tests ───────────────────────────────────────────────────────
399-
400-
// TestAPIKeyScanner_ValuePatterns verifies that each known provider value pattern
401-
// produces a finding with the correct severity and provider tag in the description.
402-
// Variable names are intentionally neutral (no provider keyword) so the finding
403-
// comes from scanValuePatterns, not scanNameRegex.
404384
func TestAPIKeyScanner_ValuePatterns(t *testing.T) {
405385
cases := []struct {
406386
name string
@@ -447,9 +427,6 @@ func TestAPIKeyScanner_ValuePatterns(t *testing.T) {
447427
}
448428
}
449429

450-
// TestAPIKeyScanner_NameRegex_FLY_Anchored verifies that FLY_ matches FLY_API_TOKEN
451-
// but does NOT match BUTTERFLY_KEY (which contains the substring FLY_ but should not
452-
// be treated as a Fly.io credential due to the word-boundary anchor in the pattern).
453430
func TestAPIKeyScanner_NameRegex_FLY_Anchored(t *testing.T) {
454431
clearHighRiskEnv(t)
455432
t.Setenv("FLY_API_TOKEN", "real-token")
@@ -475,8 +452,6 @@ func TestAPIKeyScanner_NameRegex_FLY_Anchored(t *testing.T) {
475452
}
476453
}
477454

478-
// TestAPIKeyScanner_NameRegex_NewProviders verifies that new provider keywords
479-
// added in this session are recognised.
480455
func TestAPIKeyScanner_NameRegex_NewProviders(t *testing.T) {
481456
clearHighRiskEnv(t)
482457
cases := []struct {
@@ -509,8 +484,6 @@ func TestAPIKeyScanner_NameRegex_NewProviders(t *testing.T) {
509484
}
510485
}
511486

512-
// TestAPIKeyScanner_ValuePattern_NoMatchWrongLength verifies that a value with the
513-
// right prefix but wrong length does NOT produce a finding.
514487
func TestAPIKeyScanner_ValuePattern_NoMatchWrongLength(t *testing.T) {
515488
value := "sk-proj-" + strings.Repeat("x", 10) // total 18 chars, wrong length for any pattern
516489
t.Setenv("SOME_KEY", value)
@@ -526,10 +499,6 @@ func TestAPIKeyScanner_ValuePattern_NoMatchWrongLength(t *testing.T) {
526499
}
527500
}
528501

529-
// TestAPIKeyScanner_ValuePattern_TwilioSID verifies that a Twilio API key SID
530-
// (SK + 32 hex chars = 34 total) produces an UNCERTAIN finding.
531-
// The SK prefix is intentionally broad (any 34-char string starting with SK matches)
532-
// so we use SeverityUncertain rather than SeverityHigh to avoid false positives.
533502
func TestAPIKeyScanner_ValuePattern_TwilioSID(t *testing.T) {
534503
value := "SK" + strings.Repeat("f", 32) // total 34 chars
535504
t.Setenv("CRED_SID", value)
@@ -552,9 +521,6 @@ func TestAPIKeyScanner_ValuePattern_TwilioSID(t *testing.T) {
552521
assertNoSecretValue(t, result.Findings, value)
553522
}
554523

555-
// TestAPIKeyScanner_CrossPassDedup_NameRegexWins verifies that a variable whose name
556-
// matches a nameRegex pattern AND whose value matches a value pattern produces exactly
557-
// ONE finding — from the name-regex pass — not two.
558524
func TestAPIKeyScanner_CrossPassDedup_NameRegexWins(t *testing.T) {
559525
// CUSTOM_STRIPE_KEY matches the STRIPE name-regex.
560526
// sk_live_ + 47 chars matches the Stripe live secret value pattern.
@@ -585,10 +551,6 @@ func TestAPIKeyScanner_CrossPassDedup_NameRegexWins(t *testing.T) {
585551
}
586552
}
587553

588-
// ── Tightened-regex false-positive tests ─────────────────────────────────────
589-
590-
// TestAPIKeyScanner_NameRegex_NEON_NarrowedPattern verifies that the tightened \bNEON_
591-
// pattern does not fire on variable names that contain "neon" as part of a longer word.
592554
func TestAPIKeyScanner_NameRegex_NEON_NarrowedPattern(t *testing.T) {
593555
clearHighRiskEnv(t)
594556
// These should NOT be flagged.
@@ -613,8 +575,6 @@ func TestAPIKeyScanner_NameRegex_NEON_NarrowedPattern(t *testing.T) {
613575
}
614576
}
615577

616-
// TestAPIKeyScanner_NameRegex_LINEAR_NarrowedPattern verifies that the tightened \bLINEAR_
617-
// pattern does not fire on names containing "linear" as a substring.
618578
func TestAPIKeyScanner_NameRegex_LINEAR_NarrowedPattern(t *testing.T) {
619579
clearHighRiskEnv(t)
620580
t.Setenv("BILINEAR_FILTER", "some-value")
@@ -631,8 +591,6 @@ func TestAPIKeyScanner_NameRegex_LINEAR_NarrowedPattern(t *testing.T) {
631591
}
632592
}
633593

634-
// TestAPIKeyScanner_NameRegex_PALM_NarrowedPattern verifies that the tightened \bPALM_
635-
// pattern does not fire on names like NAPALM_MODE.
636594
func TestAPIKeyScanner_NameRegex_PALM_NarrowedPattern(t *testing.T) {
637595
clearHighRiskEnv(t)
638596
t.Setenv("NAPALM_MODE", "some-value")
@@ -652,8 +610,6 @@ func TestAPIKeyScanner_NameRegex_PALM_NarrowedPattern(t *testing.T) {
652610
}
653611
}
654612

655-
// TestAPIKeyScanner_NameRegex_NewAIProviders verifies that newly added AI provider
656-
// name patterns are recognised.
657613
func TestAPIKeyScanner_NameRegex_NewAIProviders(t *testing.T) {
658614
clearHighRiskEnv(t)
659615
cases := []struct {
@@ -683,10 +639,6 @@ func TestAPIKeyScanner_NameRegex_NewAIProviders(t *testing.T) {
683639
}
684640
}
685641

686-
// TestAPIKeyScanner_ExtraEnvKeys_NoDuplicateWithNameRegex verifies that a key listed in
687-
// ExtraEnvKeys whose name also matches a nameRegexPattern produces exactly ONE finding.
688-
// Previously scanEnvKeys and scanNameRegex were not sharing the seenEnvNames dedup map,
689-
// so MY_OPENAI_KEY in extra_env_keys would fire twice.
690642
func TestAPIKeyScanner_ExtraEnvKeys_NoDuplicateWithNameRegex(t *testing.T) {
691643
const key = "MY_OPENAI_KEY" // matches OPENAI nameRegexPattern AND is in ExtraEnvKeys
692644
t.Setenv(key, "sk-test-value")
@@ -709,11 +661,8 @@ func TestAPIKeyScanner_ExtraEnvKeys_NoDuplicateWithNameRegex(t *testing.T) {
709661
}
710662
}
711663

712-
// TestAPIKeyScanner_ValuePattern_BuiltinSkipped verifies that a key in HighRiskEnvKeys
713-
// whose value also matches a value pattern produces exactly ONE finding (from scanEnvKeys,
714-
// not from scanValuePatterns which skips it).
715664
func TestAPIKeyScanner_ValuePattern_BuiltinSkipped(t *testing.T) {
716-
value := "sk-proj-" + strings.Repeat("z", 48) // total 56 chars matches OpenAI project pattern
665+
value := "sk-proj-" + strings.Repeat("z", 48) // total 56 chars - matches OpenAI project pattern
717666
t.Setenv("OPENAI_API_KEY", value)
718667
// Clear all other built-in keys.
719668
for k := range scan.HighRiskEnvKeys {
@@ -735,3 +684,22 @@ func TestAPIKeyScanner_ValuePattern_BuiltinSkipped(t *testing.T) {
735684
t.Errorf("expected exactly 1 finding for OPENAI_API_KEY, got %d", count)
736685
}
737686
}
687+
688+
func TestAPIKeyScanner_NameRegex_ProviderWithoutSuffix_NotFlagged(t *testing.T) {
689+
clearAllEnv(t)
690+
// Provider keyword present but no credential suffix - should NOT be flagged.
691+
t.Setenv("GITHUB_WORKSPACE", "/home/runner/work")
692+
t.Setenv("GITHUB_ACTIONS", "true")
693+
t.Setenv("OPENAI_BASE_URL", "https://api.openai.com")
694+
t.Setenv("STRIPE_WEBHOOK_ENDPOINT", "https://example.com/webhook")
695+
696+
s := newScannerWithHome(t.TempDir())
697+
result := s.Scan()
698+
699+
for _, f := range result.Findings {
700+
switch f.Resource {
701+
case "GITHUB_WORKSPACE", "GITHUB_ACTIONS", "OPENAI_BASE_URL", "STRIPE_WEBHOOK_ENDPOINT":
702+
t.Errorf("%s should not be flagged (provider keyword without credential suffix)", f.Resource)
703+
}
704+
}
705+
}

0 commit comments

Comments
 (0)