Skip to content

Commit 772868c

Browse files
committed
fix(security): absorb stronger severity + threat_level on merge dedup
MergeFindings phase-1 dedup by (rule_id, location) took max Confidence and the most-severe Tier but kept the first occurrence's Severity and ThreatLevel, discarding a later duplicate's if it was more severe. A low/info finding followed by a high/warning duplicate at the same (rule_id, location) merged at the LOWER severity, making the aggregate CalculateRiskScore and the report summary order-dependent, contradicting US2's "max severity among agreeing findings" intent. Add severityRank and threatLevelRank ordering helpers (strict refinements of threatCategory's bucketing, so they never disagree with CalculateRiskScore) and take the more-severe Severity and ThreatLevel on absorb. Merging the same two findings in either order now yields identical Severity/ThreatLevel/Confidence/Tier and an identical risk score. Related: Spec 077
1 parent 5c14f79 commit 772868c

2 files changed

Lines changed: 108 additions & 4 deletions

File tree

internal/security/scanner/sarif.go

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,45 @@ func tierRank(tier string) int {
462462
}
463463
}
464464

465+
// severityRank orders CVSS severities so the more-severe one wins on merge:
466+
// critical > high > medium > low > info > empty/unknown. The ordering is a
467+
// strict refinement of threatCategory's severity fallback (critical→dangerous,
468+
// high/medium→warning, low→info), so taking the max severity here never
469+
// disagrees with CalculateRiskScore's bucketing.
470+
func severityRank(sev string) int {
471+
switch sev {
472+
case SeverityCritical:
473+
return 5
474+
case SeverityHigh:
475+
return 4
476+
case SeverityMedium:
477+
return 3
478+
case SeverityLow:
479+
return 2
480+
case SeverityInfo:
481+
return 1
482+
default:
483+
return 0
484+
}
485+
}
486+
487+
// threatLevelRank orders user-facing threat levels so the more-severe one wins
488+
// on merge: dangerous > warning > info > empty/unknown. This mirrors
489+
// threatCategory's ThreatLevel bucketing exactly, so taking the max threat
490+
// level here is consistent with CalculateRiskScore.
491+
func threatLevelRank(level string) int {
492+
switch level {
493+
case ThreatLevelDangerous:
494+
return 3
495+
case ThreatLevelWarning:
496+
return 2
497+
case ThreatLevelInfo:
498+
return 1
499+
default:
500+
return 0
501+
}
502+
}
503+
465504
// findingSources returns the contributing scanner ids for a finding, preferring
466505
// the explicit Sources list (Spec 077) and falling back to the single Scanner
467506
// id for legacy findings that predate multi-source attribution.
@@ -525,16 +564,24 @@ func MergeFindings(findings []ScanFinding) []ScanFinding {
525564
if pos, ok := index[k]; ok {
526565
result[pos].Sources = sortedUnion(result[pos].Sources, srcs)
527566
// Absorb the duplicate's stronger fields (Spec 077): keep the
528-
// higher confidence, the more-severe tier (hard > soft), and the
529-
// union of signals — otherwise merging a hard/high-confidence
530-
// finding with a same-(rule_id,location) soft/low-confidence
531-
// duplicate would silently drop the hard tier and confidence.
567+
// higher confidence, the more-severe tier (hard > soft), the
568+
// more-severe CVSS severity and user-facing threat level, and the
569+
// union of signals — otherwise merging a hard/high finding with a
570+
// same-(rule_id,location) soft/low duplicate would silently drop
571+
// the stronger fields, making CalculateRiskScore and the summary
572+
// order-dependent.
532573
if f.Confidence > result[pos].Confidence {
533574
result[pos].Confidence = f.Confidence
534575
}
535576
if tierRank(f.Tier) > tierRank(result[pos].Tier) {
536577
result[pos].Tier = f.Tier
537578
}
579+
if severityRank(f.Severity) > severityRank(result[pos].Severity) {
580+
result[pos].Severity = f.Severity
581+
}
582+
if threatLevelRank(f.ThreatLevel) > threatLevelRank(result[pos].ThreatLevel) {
583+
result[pos].ThreatLevel = f.ThreatLevel
584+
}
538585
result[pos].Signals = sortedUnion(result[pos].Signals, f.Signals)
539586
continue
540587
}

internal/security/scanner/sarif_test.go

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,63 @@ func TestMergeFindingsDedupByRuleAndLocation(t *testing.T) {
358358
}
359359
}
360360

361+
// TestMergeFindingsAbsorbsStrongerSeverity proves Spec 077 (US2): when a
362+
// low/info duplicate and a high/warning duplicate share the same
363+
// (rule_id, location), the merged finding takes the MORE-SEVERE Severity and
364+
// ThreatLevel (alongside max Confidence and most-severe Tier) regardless of the
365+
// order in which the two are presented. Absorbing only some of the stronger
366+
// fields would make CalculateRiskScore and the summary order-dependent.
367+
func TestMergeFindingsAbsorbsStrongerSeverity(t *testing.T) {
368+
weak := ScanFinding{
369+
RuleID: "detect.tpa", Location: "srv:tool", ThreatType: ThreatToolPoisoning,
370+
Severity: SeverityInfo, ThreatLevel: ThreatLevelInfo, Tier: TierSoft,
371+
Confidence: 0.3, Scanner: "scanner-a", Sources: []string{"scanner-a"},
372+
}
373+
strong := ScanFinding{
374+
RuleID: "detect.tpa", Location: "srv:tool", ThreatType: ThreatToolPoisoning,
375+
Severity: SeverityHigh, ThreatLevel: ThreatLevelWarning, Tier: TierHard,
376+
Confidence: 0.9, Scanner: "scanner-b", Sources: []string{"scanner-b"},
377+
}
378+
379+
assertMerged := func(t *testing.T, merged []ScanFinding) {
380+
t.Helper()
381+
if len(merged) != 1 {
382+
t.Fatalf("expected exactly 1 merged finding, got %d: %+v", len(merged), merged)
383+
}
384+
f := merged[0]
385+
if f.Severity != SeverityHigh {
386+
t.Errorf("expected merged Severity=high, got %q", f.Severity)
387+
}
388+
if f.ThreatLevel != ThreatLevelWarning {
389+
t.Errorf("expected merged ThreatLevel=warning, got %q", f.ThreatLevel)
390+
}
391+
if f.Tier != TierHard {
392+
t.Errorf("expected merged Tier=hard, got %q", f.Tier)
393+
}
394+
// Max of the two confidences (0.9), possibly raised further by the
395+
// two-source consensus boost — never the weak 0.3.
396+
if f.Confidence < 0.9 {
397+
t.Errorf("expected merged Confidence>=0.9, got %v", f.Confidence)
398+
}
399+
}
400+
401+
weakFirst := MergeFindings([]ScanFinding{weak, strong})
402+
strongFirst := MergeFindings([]ScanFinding{strong, weak})
403+
assertMerged(t, weakFirst)
404+
assertMerged(t, strongFirst)
405+
406+
// The whole point: the merge — and therefore the aggregate risk score — is
407+
// order-independent. A weak-then-strong ordering must not score lower than a
408+
// strong-then-weak ordering.
409+
if got, want := CalculateRiskScore(weakFirst), CalculateRiskScore(strongFirst); got != want {
410+
t.Errorf("CalculateRiskScore is order-dependent: weak-first=%d strong-first=%d", got, want)
411+
}
412+
// And both must reflect the high/warning severity, not the info floor.
413+
if s := CalculateRiskScore(weakFirst); s == 0 {
414+
t.Errorf("expected non-zero risk score after absorbing the warning-level duplicate, got %d", s)
415+
}
416+
}
417+
361418
// TestMergeFindingsConsensusBoostsConfidence proves Spec 077 FR-012: when two
362419
// independent sources agree on the same (location, threat_type) — even via
363420
// different rule ids — the merged finding's confidence rises above the

0 commit comments

Comments
 (0)