fix(security): tier-driven approval gate + preserve baseline classification

Dumbris · Dumbris · commit 005815f2d641 · 2026-07-01T20:58:05.000+03:00
Addresses two Codex findings on the Spec 077 US1 two-tier scanner model. #1 (HIGH): ApproveServer gated only on Summary.Critical, so a HARD-tier phrase.injection finding (SeverityHigh, not Critical) let a dangerous server be unforce-approved. The gate now blocks on any isBlockingFinding — the SAME predicate that drives the "dangerous" summary verdict, so the gate and the verdict can never disagree — while critical severity still blocks for back-compat and --force still overrides. #2 (HIGH): ClassifyThreat re-derived threat_level from description keywords, which could downgrade a HARD baseline finding dangerous->warning while its Tier stayed "hard", breaking the tier<->level coupling. It now returns early for any finding that already carries a Tier (baseline detect output); legacy/external findings (no Tier) are still classified as before. Tests: a High-severity hard phrase_injection cannot be unforce-approved but can with --force; a soft finding never blocks; ClassifyThreat leaves a hard baseline finding dangerous and still classifies legacy findings. Related: Spec 077 (specs/077-scanner-simplification)
diff --git a/internal/security/scanner/sarif.go b/internal/security/scanner/sarif.go
@@ -403,6 +403,19 @@ func parsePackageFromMessage(msg string) (pkg, installed, fixed string) {
 // ClassifyThreat assigns user-facing threat_type and threat_level to a finding
 // based on rule ID, category, description, and severity.
 func ClassifyThreat(f *ScanFinding) {
+	// Baseline detect findings (Spec 076/077) already carry authoritative
+	// threat_type / threat_level / tier from the deterministic engine, so the
+	// legacy keyword classifier must NOT rewrite them. Overriding here can flip a
+	// soft (review-only) finding to "dangerous" — every soft check's threat_type
+	// string (e.g. "exfiltration", "prompt_injection", "tool_poisoning") happens
+	// to match a dangerous keyword branch below — which breaks the tier↔level
+	// coupling (tier==hard ⇔ dangerous) the summary and approval gate depend on. A
+	// baseline finding is identified by its non-empty Tier; legacy / external
+	// scanner findings (empty Tier) still get classified as before.
+	if f.Tier != "" {
+		return
+	}
+
 	ruleLC := strings.ToLower(f.RuleID)
 	catLC := strings.ToLower(f.Category)
 	titleLC := strings.ToLower(f.Title)
diff --git a/internal/security/scanner/sarif_test.go b/internal/security/scanner/sarif_test.go
@@ -398,3 +398,76 @@ func TestSARIFRoundTrip(t *testing.T) {
 		t.Error("round-trip failed: ruleId mismatch")
 	}
 }
+
+// TestClassifyThreat_PreservesBaselineTier locks Spec 077 US1 Codex finding #2:
+// the legacy keyword classifier must NOT rewrite a baseline detect finding (one
+// that carries a Tier). Before the fix, ClassifyThreat re-derived threat_level
+// from the description keywords, so a HARD finding whose text lacked a
+// "dangerous" keyword was downgraded dangerous→warning while its Tier stayed
+// "hard" — breaking the tier↔level coupling the summary and approval gate rely
+// on. A baseline finding must pass through untouched.
+func TestClassifyThreat_PreservesBaselineTier(t *testing.T) {
+	// A hard phrase_injection finding whose description contains no keyword the
+	// classifier would map to "dangerous" (it would otherwise fall through to the
+	// default branch and be set to "warning" at High severity).
+	f := ScanFinding{
+		RuleID:      "phrase.injection",
+		Severity:    SeverityHigh,
+		Category:    "phrase_injection",
+		ThreatType:  ThreatPromptInjection,
+		ThreatLevel: ThreatLevelDangerous,
+		Title:       "Curated injection directive",
+		Description: "Description contains a high-confidence directive to the agent.",
+		Tier:        TierHard,
+	}
+	ClassifyThreat(&f)
+	if f.ThreatLevel != ThreatLevelDangerous {
+		t.Errorf("baseline hard finding downgraded: threat_level = %q, want %q", f.ThreatLevel, ThreatLevelDangerous)
+	}
+	if f.Tier != TierHard {
+		t.Errorf("Tier mutated to %q, want %q", f.Tier, TierHard)
+	}
+	// The hard/dangerous coupling isBlockingFinding depends on must survive.
+	if !isBlockingFinding(f) {
+		t.Error("hard finding must remain blocking after classification")
+	}
+
+	// A soft baseline finding must likewise not be promoted to dangerous even
+	// though its threat_type ("prompt_injection") matches a dangerous keyword.
+	soft := ScanFinding{
+		RuleID:      "directive.imperative",
+		Severity:    SeverityHigh,
+		Category:    "prompt_injection",
+		ThreatType:  ThreatPromptInjection,
+		ThreatLevel: ThreatLevelWarning,
+		Description: "prompt injection phrasing present but soft-tier",
+		Tier:        TierSoft,
+	}
+	ClassifyThreat(&soft)
+	if soft.ThreatLevel != ThreatLevelWarning {
+		t.Errorf("soft baseline finding rewritten: threat_level = %q, want %q", soft.ThreatLevel, ThreatLevelWarning)
+	}
+	if isBlockingFinding(soft) {
+		t.Error("soft finding must never block, even with an injection threat_type")
+	}
+}
+
+// TestClassifyThreat_StillClassifiesLegacy proves the guard is scoped to
+// baseline findings only: a legacy/external finding (no Tier) is still
+// classified by keyword as before, so back-compat is preserved.
+func TestClassifyThreat_StillClassifiesLegacy(t *testing.T) {
+	f := ScanFinding{
+		RuleID:      "cisco-mcp-001",
+		Severity:    SeverityHigh,
+		Category:    "prompt-injection",
+		Description: "detected prompt injection payload",
+		// no Tier — legacy finding
+	}
+	ClassifyThreat(&f)
+	if f.ThreatType != ThreatPromptInjection {
+		t.Errorf("legacy finding threat_type = %q, want %q", f.ThreatType, ThreatPromptInjection)
+	}
+	if f.ThreatLevel != ThreatLevelDangerous {
+		t.Errorf("legacy finding threat_level = %q, want %q", f.ThreatLevel, ThreatLevelDangerous)
+	}
+}
diff --git a/internal/security/scanner/service.go b/internal/security/scanner/service.go
@@ -1408,9 +1408,27 @@ func (s *Service) ApproveServer(ctx context.Context, serverName string, force bo
 		}
 	}
 
-	// Check for critical findings (block unless force)
-	if aggReport != nil && aggReport.Summary.Critical > 0 && !force {
-		return fmt.Errorf("server has %d critical findings; resolve them or use --force to approve anyway", aggReport.Summary.Critical)
+	// Block approval on blocking findings unless forced. Spec 077 FR-021: the
+	// approval gate is tier-driven, mirroring the server verdict and the Approve
+	// modal. Any HARD-tier baseline finding (dangerous) blocks — and a curated
+	// hard phrase.injection is SeverityHigh, not Critical, so gating on Critical
+	// severity alone let a dangerous server be unquarantined. isBlockingFinding is
+	// the SAME predicate that drives the "dangerous" summary status, so the gate
+	// and the verdict can never disagree. Critical severity (e.g. a critical CVE)
+	// still blocks for back-compat.
+	if aggReport != nil && !force {
+		blocking := 0
+		for _, f := range aggReport.Findings {
+			if isBlockingFinding(f) {
+				blocking++
+			}
+		}
+		if blocking > 0 {
+			return fmt.Errorf("server has %d dangerous (hard-tier) finding(s); resolve them or use --force to approve anyway", blocking)
+		}
+		if aggReport.Summary.Critical > 0 {
+			return fmt.Errorf("server has %d critical findings; resolve them or use --force to approve anyway", aggReport.Summary.Critical)
+		}
 	}
 
 	// Create integrity baseline
diff --git a/internal/security/scanner/service_test.go b/internal/security/scanner/service_test.go
@@ -830,6 +830,107 @@ func TestServiceApproveServerForce(t *testing.T) {
 	}
 }
 
+// TestServiceApproveServerBlockedByHardFinding locks Spec 077 US1 Codex finding
+// #1: the approval gate must block on any HARD-tier baseline finding, not only on
+// Summary.Critical. A curated hard phrase.injection is SeverityHigh (not
+// Critical) with threat_level "dangerous", so the old Critical-only gate let a
+// dangerous server be unquarantined. The gate now reuses isBlockingFinding — the
+// SAME predicate that drives the "dangerous" verdict — so it cannot disagree with
+// the summary. --force must still override.
+func TestServiceApproveServerBlockedByHardFinding(t *testing.T) {
+	svc, store, _ := newTestService(t)
+
+	job := &ScanJob{
+		ID:         "job-hard",
+		ServerName: "poisoned-server",
+		Status:     ScanJobStatusCompleted,
+		Scanners:   []string{"tpa-descriptions"},
+		StartedAt:  time.Now().Add(-1 * time.Minute),
+	}
+	_ = store.SaveScanJob(job)
+
+	// A hard phrase_injection finding: High severity (NOT Critical), dangerous
+	// threat level, hard tier — exactly the shape the Critical-only gate missed.
+	report := &ScanReport{
+		ID:         "report-hard",
+		JobID:      "job-hard",
+		ServerName: "poisoned-server",
+		ScannerID:  "tpa-descriptions",
+		Findings: []ScanFinding{
+			{
+				RuleID:      "phrase.injection",
+				Severity:    SeverityHigh,
+				Category:    "phrase_injection",
+				ThreatType:  ThreatPromptInjection,
+				ThreatLevel: ThreatLevelDangerous,
+				Title:       "Instruction-override directive",
+				Tier:        TierHard,
+			},
+		},
+		ScannedAt: time.Now(),
+	}
+	_ = store.SaveScanReport(report)
+
+	// Unforced approve must fail even though there are zero Critical findings.
+	if err := svc.ApproveServer(context.Background(), "poisoned-server", false, "admin@test.com"); err == nil {
+		t.Fatal("expected error: a hard-tier (dangerous) finding must block unforced approval")
+	}
+	if _, err := store.GetIntegrityBaseline("poisoned-server"); err == nil {
+		t.Fatal("expected no baseline after a rejected approval")
+	}
+
+	// --force must still override.
+	if err := svc.ApproveServer(context.Background(), "poisoned-server", true, "admin@test.com"); err != nil {
+		t.Fatalf("force approve should succeed despite the hard finding: %v", err)
+	}
+	if _, err := store.GetIntegrityBaseline("poisoned-server"); err != nil {
+		t.Fatalf("expected baseline after forced approval: %v", err)
+	}
+}
+
+// TestServiceApproveServerSoftFindingDoesNotBlock proves the gate's counterpart:
+// a SOFT baseline finding (review-only) must NOT block an unforced approval, even
+// at High severity — the two-tier model, not raw severity, governs blocking.
+func TestServiceApproveServerSoftFindingDoesNotBlock(t *testing.T) {
+	svc, store, _ := newTestService(t)
+
+	job := &ScanJob{
+		ID:         "job-soft",
+		ServerName: "reviewable-server",
+		Status:     ScanJobStatusCompleted,
+		Scanners:   []string{"tpa-descriptions"},
+		StartedAt:  time.Now().Add(-1 * time.Minute),
+	}
+	_ = store.SaveScanJob(job)
+
+	report := &ScanReport{
+		ID:         "report-soft",
+		JobID:      "job-soft",
+		ServerName: "reviewable-server",
+		ScannerID:  "tpa-descriptions",
+		Findings: []ScanFinding{
+			{
+				RuleID:      "directive.imperative",
+				Severity:    SeverityHigh,
+				Category:    "prompt_injection",
+				ThreatType:  ThreatPromptInjection,
+				ThreatLevel: ThreatLevelWarning,
+				Title:       "Soft directive",
+				Tier:        TierSoft,
+			},
+		},
+		ScannedAt: time.Now(),
+	}
+	_ = store.SaveScanReport(report)
+
+	if err := svc.ApproveServer(context.Background(), "reviewable-server", false, "admin@test.com"); err != nil {
+		t.Fatalf("a soft finding must not block unforced approval: %v", err)
+	}
+	if _, err := store.GetIntegrityBaseline("reviewable-server"); err != nil {
+		t.Fatalf("expected baseline after approving a soft-only server: %v", err)
+	}
+}
+
 func TestServiceApproveServerNoScanForce(t *testing.T) {
 	svc, store, _ := newTestService(t)