Skip to content

Commit ce41c8d

Browse files
committed
fix: restore deleted tests, downgrade Twilio SK to UNCERTAIN, fix LINEAR_ regex
- Restore 14 value-pattern and name-regex tests accidentally deleted in 124de66 (recovered from db20f03 and merged with tests added in HEAD) - Downgrade Twilio SK prefix from SeverityHigh to SeverityUncertain: the bare 'SK' prefix is too broad (no hex charset validation), so false positives are likely; test updated to assert UNCERTAIN - Fix LINEAR_ name-regex: replace \bLINEAR_ with (^|_)LINEAR_ so that MY_LINEAR_TOKEN matches (underscore is a word char in RE2, so \b fails there) while BILINEAR_FILTER still does not match
1 parent 124de66 commit ce41c8d

2 files changed

Lines changed: 332 additions & 7 deletions

File tree

internal/scan/apikeys.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ var nameRegexPatterns = []*regexp.Regexp{
7878
regexp.MustCompile(`(?i)GITLAB`),
7979
regexp.MustCompile(`(?i)BITBUCKET`),
8080
// Productivity / project tools (common in agent contexts)
81-
regexp.MustCompile(`(?i)\bLINEAR_`), // word boundary + underscore avoids BILINEAR_FILTER
81+
regexp.MustCompile(`(?i)(^|_)LINEAR_`), // (^|_) avoids BILINEAR_FILTER while still matching MY_LINEAR_TOKEN
8282
regexp.MustCompile(`(?i)NOTION`),
8383
regexp.MustCompile(`(?i)AIRTABLE`),
8484
// Database-as-a-service (API keys / connection tokens)
@@ -125,7 +125,9 @@ var valuePatterns = []valuePattern{
125125
// Groq — gsk_ prefix confirmed in Groq docs.
126126
{prefix: "gsk_", totalLen: 56, severity: models.SeverityHigh, providerTag: "Groq"},
127127
// Twilio API key SID — SK + 32 hex chars = 34 total.
128-
{prefix: "SK", totalLen: 34, severity: models.SeverityHigh, providerTag: "Twilio API key SID"},
128+
// SeverityUncertain: the SK prefix is too broad (any 34-char string starting with SK
129+
// would match); we don't validate the hex charset, so false positives are likely.
130+
{prefix: "SK", totalLen: 34, severity: models.SeverityUncertain, providerTag: "Twilio API key SID"},
129131
// SendGrid — SG. + 22 + . + 43 = 69 total (with the dots).
130132
{prefix: "SG.", totalLen: 69, severity: models.SeverityHigh, providerTag: "SendGrid"},
131133
// HuggingFace

internal/scan/apikeys_test.go

Lines changed: 328 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -363,10 +363,333 @@ func TestAPIKeyScanner_NameRegex_NoDuplicateWithBuiltin(t *testing.T) {
363363
}
364364
}
365365

366+
// ── Value-pattern tests ───────────────────────────────────────────────────────
367+
368+
// TestAPIKeyScanner_ValuePattern_AmbiguousSK verifies that a value matching the
369+
// generic sk- format (51 chars) produces an UNCERTAIN finding, not HIGH, because
370+
// sk- is used by many tools beyond OpenAI legacy.
371+
func TestAPIKeyScanner_ValuePattern_AmbiguousSK(t *testing.T) {
372+
value := "sk-" + strings.Repeat("x", 48) // total 51 chars
373+
t.Setenv("SOME_CRED", value)
374+
clearHighRiskEnv(t)
375+
376+
s := newScannerWithHome(t.TempDir())
377+
result := s.Scan()
378+
379+
assertResource(t, result.Findings, "SOME_CRED")
380+
for _, f := range result.Findings {
381+
if f.Resource == "SOME_CRED" {
382+
if f.Severity != "UNCERTAIN" {
383+
t.Errorf("expected UNCERTAIN severity for ambiguous sk- key, got %q", f.Severity)
384+
}
385+
}
386+
}
387+
assertNoSecretValue(t, result.Findings, value)
388+
}
389+
390+
// TestAPIKeyScanner_ValuePattern_StripeLiveSecret verifies that a Stripe live secret key
391+
// (sk_live_ + 47 chars = 55 total) produces a HIGH finding.
392+
func TestAPIKeyScanner_ValuePattern_StripeLiveSecret(t *testing.T) {
393+
value := "sk_live_" + strings.Repeat("s", 47) // total 55 chars
394+
t.Setenv("PAYMENT_KEY", value)
395+
clearHighRiskEnv(t)
396+
397+
s := newScannerWithHome(t.TempDir())
398+
result := s.Scan()
399+
400+
assertResource(t, result.Findings, "PAYMENT_KEY")
401+
for _, f := range result.Findings {
402+
if f.Resource == "PAYMENT_KEY" {
403+
if f.Severity != "HIGH" {
404+
t.Errorf("expected HIGH severity for Stripe live key, got %q", f.Severity)
405+
}
406+
if !strings.Contains(f.Description, "Stripe") {
407+
t.Errorf("expected description to contain %q, got %q", "Stripe", f.Description)
408+
}
409+
}
410+
}
411+
assertNoSecretValue(t, result.Findings, value)
412+
}
413+
414+
// TestAPIKeyScanner_ValuePattern_StripeTestSecret verifies that a Stripe test secret key
415+
// (sk_test_ + 47 chars = 55 total) produces a HIGH finding.
416+
func TestAPIKeyScanner_ValuePattern_StripeTestSecret(t *testing.T) {
417+
value := "sk_test_" + strings.Repeat("t", 47) // total 55 chars
418+
t.Setenv("TEST_PAYMENT_KEY", value)
419+
clearHighRiskEnv(t)
420+
421+
s := newScannerWithHome(t.TempDir())
422+
result := s.Scan()
423+
424+
assertResource(t, result.Findings, "TEST_PAYMENT_KEY")
425+
for _, f := range result.Findings {
426+
if f.Resource == "TEST_PAYMENT_KEY" {
427+
if !strings.Contains(f.Description, "Stripe") {
428+
t.Errorf("expected description to contain %q, got %q", "Stripe", f.Description)
429+
}
430+
}
431+
}
432+
}
433+
434+
// TestAPIKeyScanner_ValuePattern_GitLabPAT verifies that a GitLab personal access token
435+
// (glpat- + 20 chars = 26 total) produces a HIGH finding.
436+
func TestAPIKeyScanner_ValuePattern_GitLabPAT(t *testing.T) {
437+
value := "glpat-" + strings.Repeat("g", 20) // total 26 chars
438+
t.Setenv("REPO_TOKEN", value)
439+
clearHighRiskEnv(t)
440+
441+
s := newScannerWithHome(t.TempDir())
442+
result := s.Scan()
443+
444+
assertResource(t, result.Findings, "REPO_TOKEN")
445+
for _, f := range result.Findings {
446+
if f.Resource == "REPO_TOKEN" {
447+
if f.Severity != "HIGH" {
448+
t.Errorf("expected HIGH severity for GitLab PAT, got %q", f.Severity)
449+
}
450+
if !strings.Contains(f.Description, "GitLab") {
451+
t.Errorf("expected description to contain %q, got %q", "GitLab", f.Description)
452+
}
453+
}
454+
}
455+
assertNoSecretValue(t, result.Findings, value)
456+
}
457+
458+
// TestAPIKeyScanner_ValuePattern_NpmToken verifies that an npm granular access token
459+
// (npm_ + 36 chars = 40 total) produces a HIGH finding.
460+
func TestAPIKeyScanner_ValuePattern_NpmToken(t *testing.T) {
461+
value := "npm_" + strings.Repeat("n", 36) // total 40 chars
462+
t.Setenv("REGISTRY_KEY", value)
463+
clearHighRiskEnv(t)
464+
465+
s := newScannerWithHome(t.TempDir())
466+
result := s.Scan()
467+
468+
assertResource(t, result.Findings, "REGISTRY_KEY")
469+
for _, f := range result.Findings {
470+
if f.Resource == "REGISTRY_KEY" {
471+
if !strings.Contains(f.Description, "npm") {
472+
t.Errorf("expected description to contain %q, got %q", "npm", f.Description)
473+
}
474+
}
475+
}
476+
}
477+
478+
// TestAPIKeyScanner_ValuePattern_Groq verifies that a Groq key (gsk_ + 52 chars = 56 total)
479+
// produces a HIGH finding.
480+
func TestAPIKeyScanner_ValuePattern_Groq(t *testing.T) {
481+
value := "gsk_" + strings.Repeat("q", 52) // total 56 chars
482+
t.Setenv("INFERENCE_KEY", value)
483+
clearHighRiskEnv(t)
484+
485+
s := newScannerWithHome(t.TempDir())
486+
result := s.Scan()
487+
488+
assertResource(t, result.Findings, "INFERENCE_KEY")
489+
for _, f := range result.Findings {
490+
if f.Resource == "INFERENCE_KEY" {
491+
if f.Severity != "HIGH" {
492+
t.Errorf("expected HIGH severity for Groq key, got %q", f.Severity)
493+
}
494+
if !strings.Contains(f.Description, "Groq") {
495+
t.Errorf("expected description to contain %q, got %q", "Groq", f.Description)
496+
}
497+
}
498+
}
499+
assertNoSecretValue(t, result.Findings, value)
500+
}
501+
502+
// TestAPIKeyScanner_ValuePattern_SendGrid verifies that a SendGrid key
503+
// (SG. + 22 chars + . + 43 chars = 69 total) produces a HIGH finding.
504+
func TestAPIKeyScanner_ValuePattern_SendGrid(t *testing.T) {
505+
// SG. (3) + 22 chars + . (1) + 43 chars = 69 total
506+
value := "SG." + strings.Repeat("a", 22) + "." + strings.Repeat("b", 43)
507+
t.Setenv("MAIL_KEY", value)
508+
clearHighRiskEnv(t)
509+
510+
s := newScannerWithHome(t.TempDir())
511+
result := s.Scan()
512+
513+
assertResource(t, result.Findings, "MAIL_KEY")
514+
for _, f := range result.Findings {
515+
if f.Resource == "MAIL_KEY" {
516+
if !strings.Contains(f.Description, "SendGrid") {
517+
t.Errorf("expected description to contain %q, got %q", "SendGrid", f.Description)
518+
}
519+
}
520+
}
521+
assertNoSecretValue(t, result.Findings, value)
522+
}
523+
524+
// TestAPIKeyScanner_ValuePattern_Anthropic verifies that an Anthropic key
525+
// (sk-ant- prefix, 108 total chars) produces a HIGH finding.
526+
func TestAPIKeyScanner_ValuePattern_Anthropic(t *testing.T) {
527+
value := "sk-ant-" + strings.Repeat("a", 101) // total 108 chars
528+
t.Setenv("LLM_KEY", value)
529+
clearHighRiskEnv(t)
530+
531+
s := newScannerWithHome(t.TempDir())
532+
result := s.Scan()
533+
534+
assertResource(t, result.Findings, "LLM_KEY")
535+
for _, f := range result.Findings {
536+
if f.Resource == "LLM_KEY" {
537+
if f.Severity != "HIGH" {
538+
t.Errorf("expected HIGH severity for Anthropic key, got %q", f.Severity)
539+
}
540+
if !strings.Contains(f.Description, "Anthropic") {
541+
t.Errorf("expected description to contain %q, got %q", "Anthropic", f.Description)
542+
}
543+
}
544+
}
545+
assertNoSecretValue(t, result.Findings, value)
546+
}
547+
548+
// TestAPIKeyScanner_NameRegex_FLY_Anchored verifies that FLY_ matches FLY_API_TOKEN
549+
// but does NOT match BUTTERFLY_KEY (which contains the substring FLY_ but should not
550+
// be treated as a Fly.io credential due to the word-boundary anchor in the pattern).
551+
func TestAPIKeyScanner_NameRegex_FLY_Anchored(t *testing.T) {
552+
clearHighRiskEnv(t)
553+
t.Setenv("FLY_API_TOKEN", "real-token")
554+
t.Setenv("BUTTERFLY_KEY", "not-a-fly-token")
555+
t.Setenv("FLYWEIGHT_INDEX", "not-a-token")
556+
557+
s := newScannerWithHome(t.TempDir())
558+
result := s.Scan()
559+
560+
// FLY_API_TOKEN must be flagged.
561+
assertResource(t, result.Findings, "FLY_API_TOKEN")
562+
563+
// BUTTERFLY_KEY and FLYWEIGHT_INDEX must NOT be flagged.
564+
for _, f := range result.Findings {
565+
if f.Resource == "BUTTERFLY_KEY" {
566+
t.Error("BUTTERFLY_KEY should not be flagged by FLY_ pattern")
567+
}
568+
if f.Resource == "FLYWEIGHT_INDEX" {
569+
t.Error("FLYWEIGHT_INDEX should not be flagged by FLY_ pattern")
570+
}
571+
}
572+
}
573+
574+
// TestAPIKeyScanner_NameRegex_NewProviders verifies that new provider keywords
575+
// added in this session are recognised.
576+
func TestAPIKeyScanner_NameRegex_NewProviders(t *testing.T) {
577+
clearHighRiskEnv(t)
578+
cases := []struct {
579+
envVar string
580+
value string
581+
}{
582+
{"MY_GEMINI_KEY", "gemini-key-value"},
583+
{"VERTEX_API_KEY", "vertex-key-value"},
584+
{"BEDROCK_ACCESS_KEY", "bedrock-key-value"},
585+
{"AZURE_OPENAI_KEY", "azure-openai-key"},
586+
{"RESEND_API_KEY", "resend-key-value"},
587+
{"POSTMARK_TOKEN", "postmark-key-value"},
588+
{"MY_LINEAR_TOKEN", "linear-key-value"},
589+
{"NOTION_API_KEY", "notion-key-value"},
590+
{"AIRTABLE_KEY", "airtable-key-value"},
591+
{"SUPABASE_KEY", "supabase-key-value"},
592+
{"NEON_API_KEY", "neon-key-value"},
593+
{"PLANETSCALE_TOKEN", "ps-key-value"},
594+
}
595+
596+
for _, tc := range cases {
597+
t.Setenv(tc.envVar, tc.value)
598+
}
599+
600+
s := newScannerWithHome(t.TempDir())
601+
result := s.Scan()
602+
603+
for _, tc := range cases {
604+
assertResource(t, result.Findings, tc.envVar)
605+
}
606+
}
607+
608+
// TestAPIKeyScanner_ValuePattern_OpenAIProject verifies that a value matching the
609+
// OpenAI project key format (sk-proj- + 48 chars = 56 total) produces a finding
610+
// with the correct resource name and provider tag in the description.
611+
func TestAPIKeyScanner_ValuePattern_OpenAIProject(t *testing.T) {
612+
value := "sk-proj-" + strings.Repeat("a", 48) // total 56 chars
613+
t.Setenv("SOME_AI_CRED", value)
614+
clearHighRiskEnv(t)
615+
616+
s := newScannerWithHome(t.TempDir())
617+
result := s.Scan()
618+
619+
assertResource(t, result.Findings, "SOME_AI_CRED")
620+
for _, f := range result.Findings {
621+
if f.Resource == "SOME_AI_CRED" {
622+
if !strings.Contains(f.Description, "OpenAI project") {
623+
t.Errorf("expected description to contain %q, got %q", "OpenAI project", f.Description)
624+
}
625+
}
626+
}
627+
assertNoSecretValue(t, result.Findings, value)
628+
}
629+
630+
// TestAPIKeyScanner_ValuePattern_HuggingFace verifies that a value matching the
631+
// HuggingFace token format (hf_ + 34 chars = 37 total) produces a correct finding.
632+
func TestAPIKeyScanner_ValuePattern_HuggingFace(t *testing.T) {
633+
value := "hf_" + strings.Repeat("b", 34) // total 37 chars
634+
// Use a variable name that does NOT match any nameRegex pattern so the finding
635+
// comes from scanValuePatterns (and the HuggingFace provider tag is in the description).
636+
t.Setenv("ML_MODEL_CRED", value)
637+
clearHighRiskEnv(t)
638+
639+
s := newScannerWithHome(t.TempDir())
640+
result := s.Scan()
641+
642+
assertResource(t, result.Findings, "ML_MODEL_CRED")
643+
for _, f := range result.Findings {
644+
if f.Resource == "ML_MODEL_CRED" {
645+
if !strings.Contains(f.Description, "HuggingFace") {
646+
t.Errorf("expected description to contain %q, got %q", "HuggingFace", f.Description)
647+
}
648+
}
649+
}
650+
}
651+
652+
// TestAPIKeyScanner_ValuePattern_GitHub_ClassicPAT verifies that a value matching the
653+
// GitHub classic PAT format (ghp_ + 36 chars = 40 total) produces a correct finding.
654+
func TestAPIKeyScanner_ValuePattern_GitHub_ClassicPAT(t *testing.T) {
655+
value := "ghp_" + strings.Repeat("c", 36) // total 40 chars
656+
t.Setenv("WORK_GH_TOKEN", value)
657+
clearHighRiskEnv(t)
658+
659+
s := newScannerWithHome(t.TempDir())
660+
result := s.Scan()
661+
662+
assertResource(t, result.Findings, "WORK_GH_TOKEN")
663+
for _, f := range result.Findings {
664+
if f.Resource == "WORK_GH_TOKEN" {
665+
if !strings.Contains(f.Description, "GitHub") {
666+
t.Errorf("expected description to contain %q, got %q", "GitHub", f.Description)
667+
}
668+
}
669+
}
670+
}
671+
672+
// TestAPIKeyScanner_ValuePattern_NoMatchWrongLength verifies that a value with the
673+
// right prefix but wrong length does NOT produce a finding.
674+
func TestAPIKeyScanner_ValuePattern_NoMatchWrongLength(t *testing.T) {
675+
value := "sk-proj-" + strings.Repeat("x", 10) // total 18 chars, wrong length for any pattern
676+
t.Setenv("SOME_KEY", value)
677+
clearHighRiskEnv(t)
678+
679+
s := newScannerWithHome(t.TempDir())
680+
result := s.Scan()
681+
682+
for _, f := range result.Findings {
683+
if f.Resource == "SOME_KEY" {
684+
t.Errorf("got unexpected finding for SOME_KEY with wrong-length value")
685+
}
686+
}
687+
}
688+
366689
// TestAPIKeyScanner_ValuePattern_TwilioSID verifies that a Twilio API key SID
367-
// (SK + 32 hex chars = 34 total) produces a HIGH finding.
368-
// The variable name is intentionally neutral (no provider keyword) so the finding
369-
// comes from the value-pattern pass, confirming the pattern itself works.
690+
// (SK + 32 hex chars = 34 total) produces an UNCERTAIN finding.
691+
// The SK prefix is intentionally broad (any 34-char string starting with SK matches)
692+
// so we use SeverityUncertain rather than SeverityHigh to avoid false positives.
370693
func TestAPIKeyScanner_ValuePattern_TwilioSID(t *testing.T) {
371694
value := "SK" + strings.Repeat("f", 32) // total 34 chars
372695
t.Setenv("CRED_SID", value)
@@ -378,8 +701,8 @@ func TestAPIKeyScanner_ValuePattern_TwilioSID(t *testing.T) {
378701
assertResource(t, result.Findings, "CRED_SID")
379702
for _, f := range result.Findings {
380703
if f.Resource == "CRED_SID" {
381-
if f.Severity != "HIGH" {
382-
t.Errorf("expected HIGH severity for Twilio SID, got %q", f.Severity)
704+
if f.Severity != "UNCERTAIN" {
705+
t.Errorf("expected UNCERTAIN severity for Twilio SID (broad SK prefix), got %q", f.Severity)
383706
}
384707
if !strings.Contains(f.Description, "Twilio") {
385708
t.Errorf("expected description to contain %q, got %q", "Twilio", f.Description)

0 commit comments

Comments
 (0)