diff --git a/cmd/mal/mal.go b/cmd/mal/mal.go index a062ca8c1..c0b011213 100644 --- a/cmd/mal/mal.go +++ b/cmd/mal/mal.go @@ -71,6 +71,7 @@ var ( outputFlag string profileFlag bool quantityIncreasesRiskFlag bool + ruleCategoriesFlag []string sensitivityFlag int statsFlag bool thirdPartyFlag bool @@ -269,6 +270,7 @@ func main() { OCI: ociFlag, QuantityIncreasesRisk: quantityIncreasesRiskFlag, Renderer: renderer, + RuleCategories: ruleCategoriesFlag, Rules: yrs, Stats: statsFlag, } @@ -425,6 +427,13 @@ func main() { Destination: &quantityIncreasesRiskFlag, Local: false, }, + &cli.StringSliceFlag{ + Name: "rule-category", + Value: []string{}, + Usage: "Only show matches whose rule path starts with one of the given categories (e.g. exfil, exfil/stealer); repeatable, no-op when unset", + Destination: &ruleCategoriesFlag, + Local: false, + }, &cli.BoolFlag{ Name: "stats", Aliases: []string{"s"}, diff --git a/pkg/action/category.go b/pkg/action/category.go new file mode 100644 index 000000000..1c3a9d7fa --- /dev/null +++ b/pkg/action/category.go @@ -0,0 +1,122 @@ +// Copyright 2026 Chainguard, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package action + +import ( + "strings" + + "github.com/chainguard-dev/malcontent/pkg/malcontent" +) + +// MatchesAnyCategory reports whether ruleID matches any of the supplied +// categories. A category matches when it is equal to ruleID or is a +// '/'-bounded prefix of it (so "exfil" matches "exfil/stealer/foo" but +// not "exfiltrate/foo"). An empty or nil categories slice is a no-op +// (returns true) so the filter is opt-in. +func MatchesAnyCategory(ruleID string, categories []string) bool { + if len(categories) == 0 { + return true + } + for _, c := range categories { + if c == "" { + continue + } + if ruleID == c || strings.HasPrefix(ruleID, c+"/") { + return true + } + } + return false +} + +// FilterBehaviorsByCategory returns the subset of behaviors whose ID +// matches any of the supplied categories, plus the count of dropped +// entries. Empty/nil categories returns the input slice unchanged. +func FilterBehaviorsByCategory(behaviors []*malcontent.Behavior, categories []string) ([]*malcontent.Behavior, int) { + if len(categories) == 0 { + return behaviors, 0 + } + prefixes := buildCategoryPrefixes(categories) + kept := make([]*malcontent.Behavior, 0, len(behaviors)) + dropped := 0 + for _, b := range behaviors { + if b == nil { + continue + } + if matchesPrefixes(b.ID, prefixes) { + kept = append(kept, b) + } else { + dropped++ + } + } + return kept, dropped +} + +// trimFileReportBehaviors applies the category filter to one FileReport +// in place. It never drops the report itself — callers that want empty +// reports removed must do so explicitly. Returns false only when the +// report ended up with zero matching behaviors. +func trimFileReportBehaviors(fr *malcontent.FileReport, categories []string) bool { + if fr == nil || len(categories) == 0 { + return true + } + if fr.Skipped != "" { + return true + } + kept, dropped := FilterBehaviorsByCategory(fr.Behaviors, categories) + fr.Behaviors = kept + fr.FilteredBehaviors += dropped + return len(kept) > 0 +} + +// TrimFileReport trims behaviors on a single FileReport to those matching +// any of the categories. Returns true when at least one behavior remains +// (useful for callers that want to skip rendering empty reports). Empty/nil +// categories is a no-op (returns true). +func TrimFileReport(fr *malcontent.FileReport, categories []string) bool { + return trimFileReportBehaviors(fr, categories) +} + +// ApplyCategoryFilter trims each FileReport in the report so that only +// behaviors matching one of the categories remain, then removes any +// FileReport whose behavior list became empty. Empty/nil categories is +// a no-op. Use this for analyze/scan output where empty entries are +// noise — for the diff path, prefer TrimFileReport per-file so that +// reconciliation can still see both sides of a change. +func ApplyCategoryFilter(r *malcontent.Report, categories []string) { + if r == nil || r.Files == nil || len(categories) == 0 { + return + } + r.Files.Range(func(key string, fr *malcontent.FileReport) bool { + if !trimFileReportBehaviors(fr, categories) { + r.Files.Delete(key) + } + return true + }) +} + +// categoryPrefix is "/" and exact is "" for each non-empty +// category, hoisted out of the inner loop to avoid repeated allocation. +type categoryPrefix struct { + exact, withSlash string +} + +func buildCategoryPrefixes(categories []string) []categoryPrefix { + out := make([]categoryPrefix, 0, len(categories)) + for _, c := range categories { + if c == "" { + continue + } + out = append(out, categoryPrefix{exact: c, withSlash: c + "/"}) + } + return out +} + +func matchesPrefixes(ruleID string, prefixes []categoryPrefix) bool { + for _, p := range prefixes { + if ruleID == p.exact || strings.HasPrefix(ruleID, p.withSlash) { + return true + } + } + return false +} diff --git a/pkg/action/category_test.go b/pkg/action/category_test.go new file mode 100644 index 000000000..7fbe14575 --- /dev/null +++ b/pkg/action/category_test.go @@ -0,0 +1,302 @@ +// Copyright 2026 Chainguard, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package action + +import ( + "testing" + + "github.com/chainguard-dev/malcontent/pkg/malcontent" + "github.com/puzpuzpuz/xsync/v4" +) + +func TestMatchesAnyCategory(t *testing.T) { + t.Parallel() + tests := []struct { + name string + id string + cats []string + want bool + }{ + {"empty cats is no-op", "exfil/stealer/foo", nil, true}, + {"empty slice cats is no-op", "exfil/stealer/foo", []string{}, true}, + {"exact category match", "exfil", []string{"exfil"}, true}, + {"category prefix matches", "exfil/stealer/foo", []string{"exfil"}, true}, + {"deeper category prefix matches", "exfil/stealer/foo", []string{"exfil/stealer"}, true}, + {"non-matching category", "net/http/get", []string{"exfil"}, false}, + {"prefix boundary respected", "exfiltrate/foo", []string{"exfil"}, false}, + {"deeper prefix boundary respected", "exfil/stealerz/foo", []string{"exfil/stealer"}, false}, + {"union of categories: matches second", "net/http/get", []string{"exfil", "net"}, true}, + {"union of categories: matches none", "fs/read", []string{"exfil", "net"}, false}, + {"empty string category never matches", "exfil/foo", []string{""}, false}, + {"empty rule id never matches non-empty cat", "", []string{"exfil"}, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + if got := MatchesAnyCategory(tt.id, tt.cats); got != tt.want { + t.Errorf("MatchesAnyCategory(%q, %v) = %v, want %v", tt.id, tt.cats, got, tt.want) + } + }) + } +} + +func TestFilterBehaviorsByCategory_Empty(t *testing.T) { + t.Parallel() + bs := []*malcontent.Behavior{ + {ID: "exfil/stealer/foo"}, + {ID: "net/http/get"}, + } + got, dropped := FilterBehaviorsByCategory(bs, nil) + if dropped != 0 { + t.Errorf("dropped = %d, want 0 for empty cats", dropped) + } + if len(got) != len(bs) { + t.Errorf("len(got) = %d, want %d", len(got), len(bs)) + } + for i := range bs { + if got[i] != bs[i] { + t.Errorf("got[%d] = %v, want %v", i, got[i], bs[i]) + } + } +} + +func TestFilterBehaviorsByCategory_Single(t *testing.T) { + t.Parallel() + bs := []*malcontent.Behavior{ + {ID: "exfil/stealer/foo"}, + {ID: "net/http/get"}, + {ID: "exfil/discord"}, + } + got, dropped := FilterBehaviorsByCategory(bs, []string{"exfil"}) + if dropped != 1 { + t.Errorf("dropped = %d, want 1", dropped) + } + if len(got) != 2 { + t.Fatalf("len(got) = %d, want 2", len(got)) + } + if got[0].ID != "exfil/stealer/foo" || got[1].ID != "exfil/discord" { + t.Errorf("unexpected ids: %v, %v", got[0].ID, got[1].ID) + } +} + +func TestFilterBehaviorsByCategory_Multiple(t *testing.T) { + t.Parallel() + bs := []*malcontent.Behavior{ + {ID: "exfil/stealer/foo"}, + {ID: "net/http/get"}, + {ID: "fs/read/secret"}, + } + got, dropped := FilterBehaviorsByCategory(bs, []string{"exfil", "net"}) + if dropped != 1 { + t.Errorf("dropped = %d, want 1", dropped) + } + if len(got) != 2 { + t.Fatalf("len(got) = %d, want 2", len(got)) + } +} + +func TestFilterBehaviorsByCategory_Unknown(t *testing.T) { + t.Parallel() + bs := []*malcontent.Behavior{ + {ID: "exfil/stealer/foo"}, + {ID: "net/http/get"}, + } + got, dropped := FilterBehaviorsByCategory(bs, []string{"definitely-not-a-category"}) + if dropped != 2 { + t.Errorf("dropped = %d, want 2", dropped) + } + if len(got) != 0 { + t.Errorf("len(got) = %d, want 0", len(got)) + } +} + +func TestFilterBehaviorsByCategory_NeverAdds(t *testing.T) { + t.Parallel() + bs := []*malcontent.Behavior{ + {ID: "a/b/c"}, + {ID: "d/e/f"}, + {ID: "g/h/i"}, + } + for _, cats := range [][]string{ + nil, + {"a"}, + {"a", "d"}, + {"a", "d", "g"}, + {"nope"}, + {""}, + } { + got, _ := FilterBehaviorsByCategory(bs, cats) + if len(got) > len(bs) { + t.Errorf("filter added behaviors for cats=%v: len(got)=%d > len(in)=%d", cats, len(got), len(bs)) + } + seen := map[*malcontent.Behavior]bool{} + for _, b := range bs { + seen[b] = true + } + for _, b := range got { + if !seen[b] { + t.Errorf("filter produced behavior not in input for cats=%v", cats) + } + } + } +} + +func TestFilterBehaviorsByCategory_PrefixBoundary(t *testing.T) { + t.Parallel() + bs := []*malcontent.Behavior{ + {ID: "exfil/stealer/foo"}, + {ID: "exfiltrate/oops"}, + } + got, _ := FilterBehaviorsByCategory(bs, []string{"exfil"}) + if len(got) != 1 || got[0].ID != "exfil/stealer/foo" { + t.Errorf("got = %v, want exactly [exfil/stealer/foo]", got) + } +} + +func TestApplyCategoryFilter_DropsEmptyFiles(t *testing.T) { + t.Parallel() + files := xsync.NewMap[string, *malcontent.FileReport]() + files.Store("/a", &malcontent.FileReport{ + Path: "/a", + Behaviors: []*malcontent.Behavior{{ID: "net/http/get"}}, + }) + files.Store("/b", &malcontent.FileReport{ + Path: "/b", + Behaviors: []*malcontent.Behavior{{ID: "exfil/stealer/foo"}}, + }) + r := &malcontent.Report{Files: files} + + ApplyCategoryFilter(r, []string{"exfil"}) + + if _, ok := r.Files.Load("/a"); ok { + t.Errorf("expected /a to be removed (no exfil behaviors)") + } + bRep, ok := r.Files.Load("/b") + if !ok { + t.Fatalf("expected /b to remain") + } + if len(bRep.Behaviors) != 1 || bRep.Behaviors[0].ID != "exfil/stealer/foo" { + t.Errorf("/b behaviors = %v, want [exfil/stealer/foo]", bRep.Behaviors) + } +} + +func TestApplyCategoryFilter_PreservesOthers(t *testing.T) { + t.Parallel() + files := xsync.NewMap[string, *malcontent.FileReport]() + files.Store("/x", &malcontent.FileReport{ + Path: "/x", + Behaviors: []*malcontent.Behavior{ + {ID: "exfil/stealer/foo"}, + {ID: "net/http/get"}, + }, + }) + r := &malcontent.Report{Files: files} + + ApplyCategoryFilter(r, []string{"exfil"}) + + xRep, ok := r.Files.Load("/x") + if !ok { + t.Fatalf("expected /x to remain") + } + if len(xRep.Behaviors) != 1 || xRep.Behaviors[0].ID != "exfil/stealer/foo" { + t.Errorf("/x behaviors = %v, want only exfil/stealer/foo", xRep.Behaviors) + } +} + +func TestApplyCategoryFilter_NoOpWhenUnset(t *testing.T) { + t.Parallel() + files := xsync.NewMap[string, *malcontent.FileReport]() + files.Store("/x", &malcontent.FileReport{ + Path: "/x", + Behaviors: []*malcontent.Behavior{ + {ID: "exfil/stealer/foo"}, + {ID: "net/http/get"}, + }, + }) + r := &malcontent.Report{Files: files} + + ApplyCategoryFilter(r, nil) + ApplyCategoryFilter(r, []string{}) + + xRep, ok := r.Files.Load("/x") + if !ok { + t.Fatalf("expected /x to remain") + } + if len(xRep.Behaviors) != 2 { + t.Errorf("/x behaviors len = %d, want 2 (unchanged)", len(xRep.Behaviors)) + } +} + +func TestTrimFileReport_NoOpWhenUnset(t *testing.T) { + t.Parallel() + fr := &malcontent.FileReport{ + Path: "/x", + Behaviors: []*malcontent.Behavior{ + {ID: "exfil/foo"}, + {ID: "net/get"}, + }, + } + if !TrimFileReport(fr, nil) { + t.Fatalf("TrimFileReport(nil cats) = false, want true") + } + if !TrimFileReport(fr, []string{}) { + t.Fatalf("TrimFileReport(empty cats) = false, want true") + } + if len(fr.Behaviors) != 2 { + t.Errorf("behaviors mutated by no-op: len=%d, want 2", len(fr.Behaviors)) + } +} + +func TestTrimFileReport_TrimsInPlace(t *testing.T) { + t.Parallel() + fr := &malcontent.FileReport{ + Path: "/x", + Behaviors: []*malcontent.Behavior{ + {ID: "exfil/foo"}, + {ID: "net/get"}, + }, + } + if !TrimFileReport(fr, []string{"net"}) { + t.Fatalf("TrimFileReport = false, want true (one match remains)") + } + if len(fr.Behaviors) != 1 || fr.Behaviors[0].ID != "net/get" { + t.Errorf("behaviors = %v, want [net/get]", fr.Behaviors) + } +} + +func TestTrimFileReport_KeepsFileWhenNoMatch(t *testing.T) { + t.Parallel() + fr := &malcontent.FileReport{ + Path: "/x", + Behaviors: []*malcontent.Behavior{ + {ID: "data/foo"}, + }, + } + if TrimFileReport(fr, []string{"net"}) { + t.Fatalf("TrimFileReport = true, want false (no match)") + } + if len(fr.Behaviors) != 0 { + t.Errorf("behaviors = %v, want empty after no-match trim", fr.Behaviors) + } +} + +func TestFilterFileReportByCategory_ComposesWithMinRiskInputs(t *testing.T) { + t.Parallel() + bs := []*malcontent.Behavior{ + {ID: "exfil/stealer/foo", RiskScore: 1}, + {ID: "exfil/discord", RiskScore: 4}, + {ID: "net/http/get", RiskScore: 4}, + } + cat, _ := FilterBehaviorsByCategory(bs, []string{"exfil"}) + + highOnly := make([]*malcontent.Behavior, 0, len(cat)) + for _, b := range cat { + if b.RiskScore >= 3 { + highOnly = append(highOnly, b) + } + } + if len(highOnly) != 1 || highOnly[0].ID != "exfil/discord" { + t.Errorf("intersection = %v, want only [exfil/discord]", highOnly) + } +} diff --git a/pkg/action/scan.go b/pkg/action/scan.go index 4a1f52dd4..a1f283d9b 100644 --- a/pkg/action/scan.go +++ b/pkg/action/scan.go @@ -486,7 +486,10 @@ func setupMatchHandler(ctx context.Context, matchChan chan matchResult, c malcon case <-ctx.Done(): return case match := <-matchChan: - if match.fr != nil && c.Renderer != nil && match.fr.RiskScore >= c.MinFileRisk { + if match.fr != nil { + TrimFileReport(match.fr, c.RuleCategories) + } + if match.fr != nil && c.Renderer != nil && match.fr.RiskScore >= c.MinFileRisk && len(match.fr.Behaviors) > 0 { if err := c.Renderer.File(ctx, match.fr); err != nil { logger.Errorf("render error: %v", err) } @@ -549,12 +552,14 @@ func handleArchiveFile(ctx context.Context, path string, c malcontent.Config, r return true } + TrimFileReport(fr, c.RuleCategories) + if len(c.TrimPrefixes) > 0 { key = report.TrimPrefixes(key, c.TrimPrefixes) } r.Files.Store(key, fr) - if c.Renderer != nil && r.Diff == nil && fr.RiskScore >= c.MinFileRisk { + if c.Renderer != nil && r.Diff == nil && fr.RiskScore >= c.MinFileRisk && len(fr.Behaviors) > 0 { if err := c.Renderer.File(ctx, fr); err != nil { logger.Errorf("render error: %v", err) } @@ -597,11 +602,13 @@ func handleSingleFile(ctx context.Context, path string, scanInfo scanPathInfo, c } } + TrimFileReport(fr, c.RuleCategories) + if len(c.TrimPrefixes) > 0 { path = report.TrimPrefixes(path, c.TrimPrefixes) } r.Files.Store(path, fr) - if c.Renderer != nil && r.Diff == nil && fr.RiskScore >= c.MinFileRisk { + if c.Renderer != nil && r.Diff == nil && fr.RiskScore >= c.MinFileRisk && len(fr.Behaviors) > 0 { if err := c.Renderer.File(ctx, fr); err != nil { return fmt.Errorf("render: %w", err) } @@ -771,6 +778,8 @@ func Scan(ctx context.Context, c malcontent.Config) (*malcontent.Report, error) return nil, nil } + ApplyCategoryFilter(r, c.RuleCategories) + r.Files.Range(func(key string, fr *malcontent.FileReport) bool { if scanCtx.Err() != nil { return false diff --git a/pkg/malcontent/malcontent.go b/pkg/malcontent/malcontent.go index 85a4083c9..bac900a4c 100644 --- a/pkg/malcontent/malcontent.go +++ b/pkg/malcontent/malcontent.go @@ -43,6 +43,7 @@ type Config struct { QuantityIncreasesRisk bool Renderer Renderer Report bool + RuleCategories []string RuleFS []fs.FS Rules *yarax.Rules Scan bool