Skip to content

Commit 994ba93

Browse files
committed
fix: complete cross-pass dedup, fix (^|_) regexes, add XAI/ASSEMBLYAI/AI21/NVIDIA_NIM
- Fix cross-pass dedup gap: scanEnvKeys now accepts and populates the shared seenEnvNames map, so extra_env_keys entries that also match a nameRegex pattern produce exactly one finding (scanEnvKeys wins as highest-priority pass). Regression test: TestAPIKeyScanner_ExtraEnvKeys_NoDuplicateWithNameRegex. - Fix FLY_, NEON_, PALM_ regexes: replace \bFLY_ / \bNEON_ / \bPALM_ with (^|_)FLY_ etc. In RE2, _ is a word character so \b does not fire between _ and a letter, meaning MY_FLY_TOKEN, MY_NEON_KEY, MY_PALM_KEY were silently missed. Tests updated to assert both the positive and negative cases. - Add name-regex patterns for XAI, ASSEMBLYAI, AI21, NVIDIA_NIM (reviewer suggestion). Tests added in TestAPIKeyScanner_NameRegex_NewAIProviders.
1 parent ce41c8d commit 994ba93

2 files changed

Lines changed: 61 additions & 12 deletions

File tree

internal/scan/apikeys.go

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,16 @@ var nameRegexPatterns = []*regexp.Regexp{
3535
regexp.MustCompile(`(?i)DEEPSEEK`),
3636
regexp.MustCompile(`(?i)PERPLEXITY`),
3737
regexp.MustCompile(`(?i)CEREBRAS`),
38+
regexp.MustCompile(`(?i)XAI`),
39+
regexp.MustCompile(`(?i)ASSEMBLYAI`),
40+
regexp.MustCompile(`(?i)AI21`),
41+
regexp.MustCompile(`(?i)NVIDIA_NIM`),
3842
// Secrets managers
3943
regexp.MustCompile(`(?i)DOPPLER`),
4044
// Google AI (Gemini, Vertex AI, PaLM)
4145
regexp.MustCompile(`(?i)GEMINI`),
4246
regexp.MustCompile(`(?i)VERTEX`),
43-
regexp.MustCompile(`(?i)\bPALM_`), // word boundary + underscore avoids NAPALM_MODE, PALM_BEACH_PROPERTY
47+
regexp.MustCompile(`(?i)(^|_)PALM_`), // (^|_) avoids NAPALM_MODE while matching MY_PALM_KEY
4448
// AWS AI
4549
regexp.MustCompile(`(?i)BEDROCK`),
4650
// Azure AI
@@ -72,7 +76,7 @@ var nameRegexPatterns = []*regexp.Regexp{
7276
regexp.MustCompile(`(?i)CLOUDFLARE`),
7377
regexp.MustCompile(`(?i)HEROKU`),
7478
regexp.MustCompile(`(?i)RAILWAY`),
75-
regexp.MustCompile(`(?i)\bFLY_`), // word boundary prevents false positives (BUTTERFLY_KEY)
79+
regexp.MustCompile(`(?i)(^|_)FLY_`), // (^|_) avoids BUTTERFLY_KEY, FLYWEIGHT_INDEX while matching MY_FLY_TOKEN
7680
// Source control
7781
regexp.MustCompile(`(?i)GITHUB`),
7882
regexp.MustCompile(`(?i)GITLAB`),
@@ -83,7 +87,7 @@ var nameRegexPatterns = []*regexp.Regexp{
8387
regexp.MustCompile(`(?i)AIRTABLE`),
8488
// Database-as-a-service (API keys / connection tokens)
8589
regexp.MustCompile(`(?i)SUPABASE`),
86-
regexp.MustCompile(`(?i)\bNEON_`), // word boundary + underscore avoids ANEMONE_CONFIG, NEON_LIGHTS_COLOR
90+
regexp.MustCompile(`(?i)(^|_)NEON_`), // (^|_) avoids ANEMONE_CONFIG, NEONLIGHTS_COLOR while matching MY_NEON_KEY
8791
regexp.MustCompile(`(?i)PLANETSCALE`),
8892
// Generic credential terms
8993
regexp.MustCompile(`(?i)API_KEY`),
@@ -194,11 +198,15 @@ func (s *APIKeyScanner) Name() string { return "api_keys" }
194198
// Implements Scanner. Never returns skipped=true.
195199
func (s *APIKeyScanner) Scan() models.ScanResult {
196200
var findings []models.Finding
197-
// seenEnvNames is shared across the name-regex and value-pattern passes so that a
198-
// variable matching both (e.g. CUSTOM_STRIPE_KEY=sk_live_...) produces exactly one
199-
// finding — the name-regex pass runs first and claims it.
201+
// seenEnvNames is shared across all three env-scanning passes so that any variable
202+
// claimed by an earlier pass is not re-reported by a later one. Order:
203+
// 1. scanEnvKeys — exact-match built-in + user-configured extra keys
204+
// 2. scanNameRegex — name-pattern heuristics (MY_OPENAI_KEY etc.)
205+
// 3. scanValuePatterns — prefix+length value matching
206+
// A variable in ExtraEnvKeys that also matches a nameRegex pattern therefore produces
207+
// exactly one finding (from scanEnvKeys, the highest-priority pass).
200208
seenEnvNames := make(map[string]bool)
201-
findings = append(findings, s.scanEnvKeys()...)
209+
findings = append(findings, s.scanEnvKeys(seenEnvNames)...)
202210
findings = append(findings, s.scanNameRegex(seenEnvNames)...)
203211
findings = append(findings, s.scanValuePatterns(seenEnvNames)...)
204212
findings = append(findings, s.scanCredentialFiles()...)
@@ -210,7 +218,9 @@ func (s *APIKeyScanner) Scan() models.ScanResult {
210218

211219
// scanEnvKeys checks built-in and extra environment variable key names for presence.
212220
// Key names only are reported; values are never read or stored.
213-
func (s *APIKeyScanner) scanEnvKeys() []models.Finding {
221+
// seenEnvNames is the shared cross-pass dedup set; matched names are added to it so
222+
// that scanNameRegex and scanValuePatterns will skip variables already claimed here.
223+
func (s *APIKeyScanner) scanEnvKeys(seenEnvNames map[string]bool) []models.Finding {
214224
var findings []models.Finding
215225

216226
// KEYS-01: Built-in high-risk env vars (sorted for deterministic output).
@@ -222,6 +232,7 @@ func (s *APIKeyScanner) scanEnvKeys() []models.Finding {
222232
for _, key := range keys {
223233
if val := os.Getenv(key); val != "" {
224234
_ = val // value is intentionally discarded; presence only
235+
seenEnvNames[key] = true
225236
findings = append(findings, envKeyFinding(key))
226237
}
227238
}
@@ -235,12 +246,13 @@ func (s *APIKeyScanner) scanEnvKeys() []models.Finding {
235246
copy(extraKeys, s.ExtraEnvKeys)
236247
sort.Strings(extraKeys)
237248
for _, key := range extraKeys {
238-
if HighRiskEnvKeys[key] || seenExtra[key] {
239-
continue // already covered by built-in check or earlier extra
249+
if HighRiskEnvKeys[key] || seenExtra[key] || seenEnvNames[key] {
250+
continue // already covered by built-in check, earlier extra, or another pass
240251
}
241252
seenExtra[key] = true
242253
if val := os.Getenv(key); val != "" {
243254
_ = val // value is intentionally discarded; presence only
255+
seenEnvNames[key] = true
244256
findings = append(findings, envKeyFinding(key))
245257
}
246258
}

internal/scan/apikeys_test.go

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -551,14 +551,16 @@ func TestAPIKeyScanner_ValuePattern_Anthropic(t *testing.T) {
551551
func TestAPIKeyScanner_NameRegex_FLY_Anchored(t *testing.T) {
552552
clearHighRiskEnv(t)
553553
t.Setenv("FLY_API_TOKEN", "real-token")
554+
t.Setenv("MY_FLY_TOKEN", "also-real-token")
554555
t.Setenv("BUTTERFLY_KEY", "not-a-fly-token")
555556
t.Setenv("FLYWEIGHT_INDEX", "not-a-token")
556557

557558
s := newScannerWithHome(t.TempDir())
558559
result := s.Scan()
559560

560-
// FLY_API_TOKEN must be flagged.
561+
// FLY_API_TOKEN and MY_FLY_TOKEN must both be flagged.
561562
assertResource(t, result.Findings, "FLY_API_TOKEN")
563+
assertResource(t, result.Findings, "MY_FLY_TOKEN")
562564

563565
// BUTTERFLY_KEY and FLYWEIGHT_INDEX must NOT be flagged.
564566
for _, f := range result.Findings {
@@ -754,13 +756,15 @@ func TestAPIKeyScanner_NameRegex_NEON_NarrowedPattern(t *testing.T) {
754756
// These should NOT be flagged.
755757
t.Setenv("ANEMONE_CONFIG", "some-value")
756758
t.Setenv("NEONLIGHTS_COLOR", "blue")
757-
// This SHOULD be flagged.
759+
// These SHOULD be flagged.
758760
t.Setenv("NEON_API_KEY", "real-neon-key")
761+
t.Setenv("MY_NEON_KEY", "also-real-neon-key")
759762

760763
s := newScannerWithHome(t.TempDir())
761764
result := s.Scan()
762765

763766
assertResource(t, result.Findings, "NEON_API_KEY")
767+
assertResource(t, result.Findings, "MY_NEON_KEY")
764768
for _, f := range result.Findings {
765769
if f.Resource == "ANEMONE_CONFIG" {
766770
t.Error("ANEMONE_CONFIG should not be flagged by NEON_ pattern")
@@ -794,12 +798,15 @@ func TestAPIKeyScanner_NameRegex_LINEAR_NarrowedPattern(t *testing.T) {
794798
func TestAPIKeyScanner_NameRegex_PALM_NarrowedPattern(t *testing.T) {
795799
clearHighRiskEnv(t)
796800
t.Setenv("NAPALM_MODE", "some-value")
801+
// These SHOULD be flagged.
797802
t.Setenv("PALM_API_KEY", "real-palm-key")
803+
t.Setenv("MY_PALM_KEY", "also-real-palm-key")
798804

799805
s := newScannerWithHome(t.TempDir())
800806
result := s.Scan()
801807

802808
assertResource(t, result.Findings, "PALM_API_KEY")
809+
assertResource(t, result.Findings, "MY_PALM_KEY")
803810
for _, f := range result.Findings {
804811
if f.Resource == "NAPALM_MODE" {
805812
t.Error("NAPALM_MODE should not be flagged by PALM_ pattern")
@@ -821,6 +828,10 @@ func TestAPIKeyScanner_NameRegex_NewAIProviders(t *testing.T) {
821828
{"PERPLEXITY_API_KEY", "pplx-key-value"},
822829
{"CEREBRAS_API_KEY", "cb-key-value"},
823830
{"DOPPLER_TOKEN", "dp-token-value"},
831+
{"XAI_API_KEY", "xai-key-value"},
832+
{"ASSEMBLYAI_API_KEY", "aai-key-value"},
833+
{"AI21_API_KEY", "ai21-key-value"},
834+
{"NVIDIA_NIM_API_KEY", "nim-key-value"},
824835
}
825836
for _, tc := range cases {
826837
t.Setenv(tc.envVar, tc.value)
@@ -834,6 +845,32 @@ func TestAPIKeyScanner_NameRegex_NewAIProviders(t *testing.T) {
834845
}
835846
}
836847

848+
// TestAPIKeyScanner_ExtraEnvKeys_NoDuplicateWithNameRegex verifies that a key listed in
849+
// ExtraEnvKeys whose name also matches a nameRegexPattern produces exactly ONE finding.
850+
// Previously scanEnvKeys and scanNameRegex were not sharing the seenEnvNames dedup map,
851+
// so MY_OPENAI_KEY in extra_env_keys would fire twice.
852+
func TestAPIKeyScanner_ExtraEnvKeys_NoDuplicateWithNameRegex(t *testing.T) {
853+
const key = "MY_OPENAI_KEY" // matches OPENAI nameRegexPattern AND is in ExtraEnvKeys
854+
t.Setenv(key, "sk-test-value")
855+
clearHighRiskEnv(t)
856+
857+
s := &scan.APIKeyScanner{
858+
HomeDir: t.TempDir(),
859+
ExtraEnvKeys: []string{key},
860+
}
861+
result := s.Scan()
862+
863+
count := 0
864+
for _, f := range result.Findings {
865+
if f.Resource == key {
866+
count++
867+
}
868+
}
869+
if count != 1 {
870+
t.Errorf("expected exactly 1 finding for %q (ExtraEnvKeys + nameRegex cross-pass dedup), got %d", key, count)
871+
}
872+
}
873+
837874
// TestAPIKeyScanner_ValuePattern_BuiltinSkipped verifies that a key in HighRiskEnvKeys
838875
// whose value also matches a value pattern produces exactly ONE finding (from scanEnvKeys,
839876
// not from scanValuePatterns which skips it).

0 commit comments

Comments
 (0)