Skip to content

Commit 5c14f79

Browse files
committed
fix(security): score consensus group at max severity; preserve stronger fields on merge
CalculateRiskScore consensus branch now scores each (location, threat_type) group at the most-severe threat category and the max per-finding weight among all agreeing findings, instead of whichever finding was encountered first. For severity-derived threat_types (supply_chain, uncategorized, malicious_code fallback) agreeing findings can carry different threat_levels, so the previous first-wins behavior made the score order-dependent and could drop a genuine high/warning finding. Extracted a threatCategory helper and precompute the group max in the existing consensus pass for determinism. MergeFindings phase-1 dedup now absorbs the duplicate's stronger fields: it takes max(Confidence), the more-severe Tier (hard > soft via tierRank), and the union of Signals, so merging a hard/high-confidence finding with a same- (rule_id, location) soft/low-confidence duplicate no longer discards the hard tier and higher confidence. Adds tests locking order-independent max-severity consensus scoring and max-confidence + hard-tier-wins on merge. Related: Spec 077
1 parent 2a3ae2c commit 5c14f79

2 files changed

Lines changed: 164 additions & 21 deletions

File tree

internal/security/scanner/sarif.go

Lines changed: 84 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,13 @@ func CalculateRiskScore(findings []ScanFinding) int {
290290
// dedup (so existing single-scanner scoring is unchanged).
291291
type consensusKey struct{ location, threatType string }
292292
groupSources := make(map[consensusKey]map[string]struct{})
293+
// A consensus group is scored ONCE, at the MOST-SEVERE threat category among
294+
// all agreeing findings (not whichever is encountered first) and at the
295+
// MAX per-finding weight — so the score is order-independent even when
296+
// severity-derived threat_types (supply_chain, uncategorized, the
297+
// malicious_code fallback) put different threat_levels on agreeing findings.
298+
groupMaxCat := make(map[consensusKey]int)
299+
groupMaxWeight := make(map[consensusKey]int)
293300
for i := range findings {
294301
f := &findings[i]
295302
if f.ThreatType == "" {
@@ -304,6 +311,12 @@ func CalculateRiskScore(findings []ScanFinding) int {
304311
for _, s := range findingSources(*f) {
305312
set[s] = struct{}{}
306313
}
314+
if cat := threatCategory(f); cat > groupMaxCat[ck] {
315+
groupMaxCat[ck] = cat
316+
}
317+
if w := consensusWeight(*f); w > groupMaxWeight[ck] {
318+
groupMaxWeight[ck] = w
319+
}
307320
}
308321

309322
// Deduplicate for scoring:
@@ -317,46 +330,36 @@ func CalculateRiskScore(findings []ScanFinding) int {
317330
seenConsensus := make(map[consensusKey]bool)
318331
var dangerousCount, warningCount, infoCount int
319332

320-
addWeight := func(f *ScanFinding, weight int) {
321-
switch f.ThreatLevel {
322-
case ThreatLevelDangerous:
333+
addWeight := func(category, weight int) {
334+
switch category {
335+
case threatCatDangerous:
323336
dangerousCount += weight
324-
case ThreatLevelWarning:
337+
case threatCatWarning:
325338
warningCount += weight
326-
case ThreatLevelInfo:
339+
case threatCatInfo:
327340
infoCount += weight
328-
default:
329-
// Unclassified: use severity as fallback
330-
switch f.Severity {
331-
case SeverityCritical:
332-
dangerousCount += weight
333-
case SeverityHigh:
334-
warningCount += weight
335-
case SeverityMedium:
336-
warningCount += weight
337-
case SeverityLow:
338-
infoCount += weight
339-
}
340341
}
341342
}
342343

343344
for i := range findings {
344345
f := &findings[i]
345346

346347
// Cross-source consensus path: only when ≥2 distinct sources agree on a
347-
// classified (location, threat_type). Counted once, weighted by agreement.
348+
// classified (location, threat_type). Counted once, at the most-severe
349+
// category and max weight of the whole group so the result is
350+
// order-independent (not the first finding encountered).
348351
if f.ThreatType != "" {
349352
ck := consensusKey{f.Location, f.ThreatType}
350353
if n := len(groupSources[ck]); n >= 2 {
351354
if seenConsensus[ck] {
352355
continue
353356
}
354357
seenConsensus[ck] = true
355-
weight := consensusWeight(*f)
358+
weight := groupMaxWeight[ck]
356359
if n > weight {
357360
weight = n
358361
}
359-
addWeight(f, weight)
362+
addWeight(groupMaxCat[ck], weight)
360363
continue
361364
}
362365
}
@@ -372,7 +375,7 @@ func CalculateRiskScore(findings []ScanFinding) int {
372375

373376
// Consensus weight: independent signals on one tool ADD to the score
374377
// (Spec 076 FR-006). A single-signal or signal-less finding weighs 1.
375-
addWeight(f, consensusWeight(*f))
378+
addWeight(threatCategory(f), consensusWeight(*f))
376379
}
377380

378381
// Logarithmic diminishing returns: score = weight * log2(1 + count)
@@ -398,6 +401,41 @@ func CalculateRiskScore(findings []ScanFinding) int {
398401
return score
399402
}
400403

404+
// Threat categories rank a finding's contribution to the risk score. Higher is
405+
// more severe; threatCatNone (the zero value) is not counted, so a group whose
406+
// members are all unclassified keeps the map default without spuriously scoring.
407+
const (
408+
threatCatNone = iota
409+
threatCatInfo
410+
threatCatWarning
411+
threatCatDangerous
412+
)
413+
414+
// threatCategory maps a finding to its risk bucket, preferring the explicit
415+
// user-facing ThreatLevel and falling back to CVSS severity for unclassified
416+
// findings. It mirrors the previous inline switch in addWeight so legacy scoring
417+
// is unchanged; extracting it lets a consensus group be scored at the
418+
// most-severe member instead of whichever finding is encountered first.
419+
func threatCategory(f *ScanFinding) int {
420+
switch f.ThreatLevel {
421+
case ThreatLevelDangerous:
422+
return threatCatDangerous
423+
case ThreatLevelWarning:
424+
return threatCatWarning
425+
case ThreatLevelInfo:
426+
return threatCatInfo
427+
}
428+
switch f.Severity {
429+
case SeverityCritical:
430+
return threatCatDangerous
431+
case SeverityHigh, SeverityMedium:
432+
return threatCatWarning
433+
case SeverityLow:
434+
return threatCatInfo
435+
}
436+
return threatCatNone
437+
}
438+
401439
// consensusWeight returns how much a single (deduplicated) finding contributes
402440
// to its risk category. The deterministic scanner (Spec 076) aggregates every
403441
// independent check that fired on a tool into one finding's Signals list, so a
@@ -411,6 +449,19 @@ func consensusWeight(f ScanFinding) int {
411449
return 1
412450
}
413451

452+
// tierRank orders finding tiers so the more-severe tier wins on merge:
453+
// hard > soft > empty/unknown.
454+
func tierRank(tier string) int {
455+
switch tier {
456+
case TierHard:
457+
return 2
458+
case TierSoft:
459+
return 1
460+
default:
461+
return 0
462+
}
463+
}
464+
414465
// findingSources returns the contributing scanner ids for a finding, preferring
415466
// the explicit Sources list (Spec 077) and falling back to the single Scanner
416467
// id for legacy findings that predate multi-source attribution.
@@ -473,6 +524,18 @@ func MergeFindings(findings []ScanFinding) []ScanFinding {
473524
k := key{f.RuleID, f.Location}
474525
if pos, ok := index[k]; ok {
475526
result[pos].Sources = sortedUnion(result[pos].Sources, srcs)
527+
// Absorb the duplicate's stronger fields (Spec 077): keep the
528+
// higher confidence, the more-severe tier (hard > soft), and the
529+
// union of signals — otherwise merging a hard/high-confidence
530+
// finding with a same-(rule_id,location) soft/low-confidence
531+
// duplicate would silently drop the hard tier and confidence.
532+
if f.Confidence > result[pos].Confidence {
533+
result[pos].Confidence = f.Confidence
534+
}
535+
if tierRank(f.Tier) > tierRank(result[pos].Tier) {
536+
result[pos].Tier = f.Tier
537+
}
538+
result[pos].Signals = sortedUnion(result[pos].Signals, f.Signals)
476539
continue
477540
}
478541
f.Sources = sortedUnion(f.Sources, srcs)

internal/security/scanner/sarif_test.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,86 @@ func TestCalculateRiskScoreCrossSourceConsensusAdds(t *testing.T) {
424424
}
425425
}
426426

427+
// TestCalculateRiskScoreConsensusUsesMaxSeverity proves the consensus group is
428+
// scored at its MOST-SEVERE member's threat level, not whichever finding is
429+
// encountered first. For severity-derived threat_types (supply_chain here)
430+
// agreeing findings can carry different threat_levels; the previous code counted
431+
// the group at the first finding's level, making the score order-dependent and
432+
// able to drop a genuine warning. The score must be identical in both orders and
433+
// reflect the warning (high) member, not the info (low) one.
434+
func TestCalculateRiskScoreConsensusUsesMaxSeverity(t *testing.T) {
435+
warningFinding := ScanFinding{
436+
RuleID: "trivy.CVE-1",
437+
Location: "srv:tool",
438+
ThreatType: ThreatSupplyChain,
439+
ThreatLevel: ThreatLevelWarning,
440+
Severity: SeverityHigh,
441+
Scanner: "trivy",
442+
}
443+
infoFinding := ScanFinding{
444+
RuleID: "grype.CVE-2",
445+
Location: "srv:tool",
446+
ThreatType: ThreatSupplyChain,
447+
ThreatLevel: ThreatLevelInfo,
448+
Severity: SeverityLow,
449+
Scanner: "grype",
450+
}
451+
452+
ab := CalculateRiskScore([]ScanFinding{warningFinding, infoFinding})
453+
ba := CalculateRiskScore([]ScanFinding{infoFinding, warningFinding})
454+
455+
if ab != ba {
456+
t.Fatalf("consensus score is order-dependent: [warning,info]=%d [info,warning]=%d", ab, ba)
457+
}
458+
// Two distinct sources agree → weight 2 at the warning level: 6*log2(3)=9.
459+
// Scoring at the info level (the bug) would give 2*log2(3)=3.
460+
if ab != 9 {
461+
t.Errorf("consensus score = %d, want 9 (warning-level, 2 sources)", ab)
462+
}
463+
}
464+
465+
// TestMergeFindingsAbsorbsStrongerFields proves that phase-1 dedup keeps the
466+
// absorbed duplicate's stronger fields: on merge the result takes max(Confidence),
467+
// the more-severe Tier (hard > soft), and the union of Signals — regardless of
468+
// which finding is encountered first.
469+
func TestMergeFindingsAbsorbsStrongerFields(t *testing.T) {
470+
hard := ScanFinding{
471+
RuleID: "detect.tpa", Location: "srv:tool",
472+
Tier: TierHard, Confidence: 0.9, Signals: []string{"unicode.hidden"},
473+
Scanner: "tpa-descriptions",
474+
}
475+
soft := ScanFinding{
476+
RuleID: "detect.tpa", Location: "srv:tool",
477+
Tier: TierSoft, Confidence: 0.2, Signals: []string{"directive.imperative"},
478+
Scanner: "cisco-mcp-scanner",
479+
}
480+
481+
for _, tc := range []struct {
482+
name string
483+
in []ScanFinding
484+
}{
485+
{"soft-first", []ScanFinding{soft, hard}},
486+
{"hard-first", []ScanFinding{hard, soft}},
487+
} {
488+
t.Run(tc.name, func(t *testing.T) {
489+
merged := MergeFindings(tc.in)
490+
if len(merged) != 1 {
491+
t.Fatalf("expected 1 merged finding, got %d: %+v", len(merged), merged)
492+
}
493+
m := merged[0]
494+
if m.Tier != TierHard {
495+
t.Errorf("merged tier = %q, want hard (more-severe tier must win)", m.Tier)
496+
}
497+
if m.Confidence != 0.9 {
498+
t.Errorf("merged confidence = %v, want 0.9 (max)", m.Confidence)
499+
}
500+
if len(m.Signals) != 2 {
501+
t.Errorf("merged signals = %v, want union of both (2)", m.Signals)
502+
}
503+
})
504+
}
505+
}
506+
427507
// TestClassifyThreatBackfillsSeverity proves Spec 077 (T022): a finding that
428508
// arrives with no severity (as some external/legacy SARIF findings do) is given
429509
// a user-readable severity derived from its classified threat level.

0 commit comments

Comments
 (0)