Skip to content

Commit 005815f

Browse files
committed
fix(security): tier-driven approval gate + preserve baseline classification
Addresses two Codex findings on the Spec 077 US1 two-tier scanner model. #1 (HIGH): ApproveServer gated only on Summary.Critical, so a HARD-tier phrase.injection finding (SeverityHigh, not Critical) let a dangerous server be unforce-approved. The gate now blocks on any isBlockingFinding — the SAME predicate that drives the "dangerous" summary verdict, so the gate and the verdict can never disagree — while critical severity still blocks for back-compat and --force still overrides. #2 (HIGH): ClassifyThreat re-derived threat_level from description keywords, which could downgrade a HARD baseline finding dangerous->warning while its Tier stayed "hard", breaking the tier<->level coupling. It now returns early for any finding that already carries a Tier (baseline detect output); legacy/external findings (no Tier) are still classified as before. Tests: a High-severity hard phrase_injection cannot be unforce-approved but can with --force; a soft finding never blocks; ClassifyThreat leaves a hard baseline finding dangerous and still classifies legacy findings. Related: Spec 077 (specs/077-scanner-simplification)
1 parent 0f5838e commit 005815f

4 files changed

Lines changed: 208 additions & 3 deletions

File tree

internal/security/scanner/sarif.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,19 @@ func parsePackageFromMessage(msg string) (pkg, installed, fixed string) {
403403
// ClassifyThreat assigns user-facing threat_type and threat_level to a finding
404404
// based on rule ID, category, description, and severity.
405405
func ClassifyThreat(f *ScanFinding) {
406+
// Baseline detect findings (Spec 076/077) already carry authoritative
407+
// threat_type / threat_level / tier from the deterministic engine, so the
408+
// legacy keyword classifier must NOT rewrite them. Overriding here can flip a
409+
// soft (review-only) finding to "dangerous" — every soft check's threat_type
410+
// string (e.g. "exfiltration", "prompt_injection", "tool_poisoning") happens
411+
// to match a dangerous keyword branch below — which breaks the tier↔level
412+
// coupling (tier==hard ⇔ dangerous) the summary and approval gate depend on. A
413+
// baseline finding is identified by its non-empty Tier; legacy / external
414+
// scanner findings (empty Tier) still get classified as before.
415+
if f.Tier != "" {
416+
return
417+
}
418+
406419
ruleLC := strings.ToLower(f.RuleID)
407420
catLC := strings.ToLower(f.Category)
408421
titleLC := strings.ToLower(f.Title)

internal/security/scanner/sarif_test.go

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,3 +398,76 @@ func TestSARIFRoundTrip(t *testing.T) {
398398
t.Error("round-trip failed: ruleId mismatch")
399399
}
400400
}
401+
402+
// TestClassifyThreat_PreservesBaselineTier locks Spec 077 US1 Codex finding #2:
403+
// the legacy keyword classifier must NOT rewrite a baseline detect finding (one
404+
// that carries a Tier). Before the fix, ClassifyThreat re-derived threat_level
405+
// from the description keywords, so a HARD finding whose text lacked a
406+
// "dangerous" keyword was downgraded dangerous→warning while its Tier stayed
407+
// "hard" — breaking the tier↔level coupling the summary and approval gate rely
408+
// on. A baseline finding must pass through untouched.
409+
func TestClassifyThreat_PreservesBaselineTier(t *testing.T) {
410+
// A hard phrase_injection finding whose description contains no keyword the
411+
// classifier would map to "dangerous" (it would otherwise fall through to the
412+
// default branch and be set to "warning" at High severity).
413+
f := ScanFinding{
414+
RuleID: "phrase.injection",
415+
Severity: SeverityHigh,
416+
Category: "phrase_injection",
417+
ThreatType: ThreatPromptInjection,
418+
ThreatLevel: ThreatLevelDangerous,
419+
Title: "Curated injection directive",
420+
Description: "Description contains a high-confidence directive to the agent.",
421+
Tier: TierHard,
422+
}
423+
ClassifyThreat(&f)
424+
if f.ThreatLevel != ThreatLevelDangerous {
425+
t.Errorf("baseline hard finding downgraded: threat_level = %q, want %q", f.ThreatLevel, ThreatLevelDangerous)
426+
}
427+
if f.Tier != TierHard {
428+
t.Errorf("Tier mutated to %q, want %q", f.Tier, TierHard)
429+
}
430+
// The hard/dangerous coupling isBlockingFinding depends on must survive.
431+
if !isBlockingFinding(f) {
432+
t.Error("hard finding must remain blocking after classification")
433+
}
434+
435+
// A soft baseline finding must likewise not be promoted to dangerous even
436+
// though its threat_type ("prompt_injection") matches a dangerous keyword.
437+
soft := ScanFinding{
438+
RuleID: "directive.imperative",
439+
Severity: SeverityHigh,
440+
Category: "prompt_injection",
441+
ThreatType: ThreatPromptInjection,
442+
ThreatLevel: ThreatLevelWarning,
443+
Description: "prompt injection phrasing present but soft-tier",
444+
Tier: TierSoft,
445+
}
446+
ClassifyThreat(&soft)
447+
if soft.ThreatLevel != ThreatLevelWarning {
448+
t.Errorf("soft baseline finding rewritten: threat_level = %q, want %q", soft.ThreatLevel, ThreatLevelWarning)
449+
}
450+
if isBlockingFinding(soft) {
451+
t.Error("soft finding must never block, even with an injection threat_type")
452+
}
453+
}
454+
455+
// TestClassifyThreat_StillClassifiesLegacy proves the guard is scoped to
456+
// baseline findings only: a legacy/external finding (no Tier) is still
457+
// classified by keyword as before, so back-compat is preserved.
458+
func TestClassifyThreat_StillClassifiesLegacy(t *testing.T) {
459+
f := ScanFinding{
460+
RuleID: "cisco-mcp-001",
461+
Severity: SeverityHigh,
462+
Category: "prompt-injection",
463+
Description: "detected prompt injection payload",
464+
// no Tier — legacy finding
465+
}
466+
ClassifyThreat(&f)
467+
if f.ThreatType != ThreatPromptInjection {
468+
t.Errorf("legacy finding threat_type = %q, want %q", f.ThreatType, ThreatPromptInjection)
469+
}
470+
if f.ThreatLevel != ThreatLevelDangerous {
471+
t.Errorf("legacy finding threat_level = %q, want %q", f.ThreatLevel, ThreatLevelDangerous)
472+
}
473+
}

internal/security/scanner/service.go

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,9 +1408,27 @@ func (s *Service) ApproveServer(ctx context.Context, serverName string, force bo
14081408
}
14091409
}
14101410

1411-
// Check for critical findings (block unless force)
1412-
if aggReport != nil && aggReport.Summary.Critical > 0 && !force {
1413-
return fmt.Errorf("server has %d critical findings; resolve them or use --force to approve anyway", aggReport.Summary.Critical)
1411+
// Block approval on blocking findings unless forced. Spec 077 FR-021: the
1412+
// approval gate is tier-driven, mirroring the server verdict and the Approve
1413+
// modal. Any HARD-tier baseline finding (dangerous) blocks — and a curated
1414+
// hard phrase.injection is SeverityHigh, not Critical, so gating on Critical
1415+
// severity alone let a dangerous server be unquarantined. isBlockingFinding is
1416+
// the SAME predicate that drives the "dangerous" summary status, so the gate
1417+
// and the verdict can never disagree. Critical severity (e.g. a critical CVE)
1418+
// still blocks for back-compat.
1419+
if aggReport != nil && !force {
1420+
blocking := 0
1421+
for _, f := range aggReport.Findings {
1422+
if isBlockingFinding(f) {
1423+
blocking++
1424+
}
1425+
}
1426+
if blocking > 0 {
1427+
return fmt.Errorf("server has %d dangerous (hard-tier) finding(s); resolve them or use --force to approve anyway", blocking)
1428+
}
1429+
if aggReport.Summary.Critical > 0 {
1430+
return fmt.Errorf("server has %d critical findings; resolve them or use --force to approve anyway", aggReport.Summary.Critical)
1431+
}
14141432
}
14151433

14161434
// Create integrity baseline

internal/security/scanner/service_test.go

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,107 @@ func TestServiceApproveServerForce(t *testing.T) {
830830
}
831831
}
832832

833+
// TestServiceApproveServerBlockedByHardFinding locks Spec 077 US1 Codex finding
834+
// #1: the approval gate must block on any HARD-tier baseline finding, not only on
835+
// Summary.Critical. A curated hard phrase.injection is SeverityHigh (not
836+
// Critical) with threat_level "dangerous", so the old Critical-only gate let a
837+
// dangerous server be unquarantined. The gate now reuses isBlockingFinding — the
838+
// SAME predicate that drives the "dangerous" verdict — so it cannot disagree with
839+
// the summary. --force must still override.
840+
func TestServiceApproveServerBlockedByHardFinding(t *testing.T) {
841+
svc, store, _ := newTestService(t)
842+
843+
job := &ScanJob{
844+
ID: "job-hard",
845+
ServerName: "poisoned-server",
846+
Status: ScanJobStatusCompleted,
847+
Scanners: []string{"tpa-descriptions"},
848+
StartedAt: time.Now().Add(-1 * time.Minute),
849+
}
850+
_ = store.SaveScanJob(job)
851+
852+
// A hard phrase_injection finding: High severity (NOT Critical), dangerous
853+
// threat level, hard tier — exactly the shape the Critical-only gate missed.
854+
report := &ScanReport{
855+
ID: "report-hard",
856+
JobID: "job-hard",
857+
ServerName: "poisoned-server",
858+
ScannerID: "tpa-descriptions",
859+
Findings: []ScanFinding{
860+
{
861+
RuleID: "phrase.injection",
862+
Severity: SeverityHigh,
863+
Category: "phrase_injection",
864+
ThreatType: ThreatPromptInjection,
865+
ThreatLevel: ThreatLevelDangerous,
866+
Title: "Instruction-override directive",
867+
Tier: TierHard,
868+
},
869+
},
870+
ScannedAt: time.Now(),
871+
}
872+
_ = store.SaveScanReport(report)
873+
874+
// Unforced approve must fail even though there are zero Critical findings.
875+
if err := svc.ApproveServer(context.Background(), "poisoned-server", false, "admin@test.com"); err == nil {
876+
t.Fatal("expected error: a hard-tier (dangerous) finding must block unforced approval")
877+
}
878+
if _, err := store.GetIntegrityBaseline("poisoned-server"); err == nil {
879+
t.Fatal("expected no baseline after a rejected approval")
880+
}
881+
882+
// --force must still override.
883+
if err := svc.ApproveServer(context.Background(), "poisoned-server", true, "admin@test.com"); err != nil {
884+
t.Fatalf("force approve should succeed despite the hard finding: %v", err)
885+
}
886+
if _, err := store.GetIntegrityBaseline("poisoned-server"); err != nil {
887+
t.Fatalf("expected baseline after forced approval: %v", err)
888+
}
889+
}
890+
891+
// TestServiceApproveServerSoftFindingDoesNotBlock proves the gate's counterpart:
892+
// a SOFT baseline finding (review-only) must NOT block an unforced approval, even
893+
// at High severity — the two-tier model, not raw severity, governs blocking.
894+
func TestServiceApproveServerSoftFindingDoesNotBlock(t *testing.T) {
895+
svc, store, _ := newTestService(t)
896+
897+
job := &ScanJob{
898+
ID: "job-soft",
899+
ServerName: "reviewable-server",
900+
Status: ScanJobStatusCompleted,
901+
Scanners: []string{"tpa-descriptions"},
902+
StartedAt: time.Now().Add(-1 * time.Minute),
903+
}
904+
_ = store.SaveScanJob(job)
905+
906+
report := &ScanReport{
907+
ID: "report-soft",
908+
JobID: "job-soft",
909+
ServerName: "reviewable-server",
910+
ScannerID: "tpa-descriptions",
911+
Findings: []ScanFinding{
912+
{
913+
RuleID: "directive.imperative",
914+
Severity: SeverityHigh,
915+
Category: "prompt_injection",
916+
ThreatType: ThreatPromptInjection,
917+
ThreatLevel: ThreatLevelWarning,
918+
Title: "Soft directive",
919+
Tier: TierSoft,
920+
},
921+
},
922+
ScannedAt: time.Now(),
923+
}
924+
_ = store.SaveScanReport(report)
925+
926+
if err := svc.ApproveServer(context.Background(), "reviewable-server", false, "admin@test.com"); err != nil {
927+
t.Fatalf("a soft finding must not block unforced approval: %v", err)
928+
}
929+
if _, err := store.GetIntegrityBaseline("reviewable-server"); err != nil {
930+
t.Fatalf("expected baseline after approving a soft-only server: %v", err)
931+
}
932+
}
933+
833934
func TestServiceApproveServerNoScanForce(t *testing.T) {
834935
svc, store, _ := newTestService(t)
835936

0 commit comments

Comments
 (0)