Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ require (
github.com/ebitengine/purego v0.8.4 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
github.com/go-ole/go-ole v1.3.0 // indirect
github.com/klauspost/cpuid/v2 v2.2.3 // indirect
github.com/kr/pretty v0.1.0 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/lufia/plan9stats v0.0.0-20240909124753-873cd0166683 // indirect
Expand All @@ -55,6 +56,7 @@ require (
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/minio/sha256-simd v1.0.1 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
Expand Down
5 changes: 5 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ github.com/google/go-containerregistry v0.20.6 h1:cvWX87UxxLgaH76b4hIvya6Dzz9qHB
github.com/google/go-containerregistry v0.20.6/go.mod h1:T0x8MuoAoKX/873bkeSfLD2FAkwCDf9/HZgsFJ02E2Y=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/klauspost/cpuid/v2 v2.2.3 h1:sxCkb+qR91z4vsqw4vGGZlDgPz3G7gjaLyK3V8y70BU=
github.com/klauspost/cpuid/v2 v2.2.3/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU=
github.com/klauspost/pgzip v1.2.6/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
Expand All @@ -84,6 +86,8 @@ github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2J
github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM=
github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI=
Expand Down Expand Up @@ -150,6 +154,7 @@ golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201204225414-ed752295db88/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
33 changes: 20 additions & 13 deletions pkg/action/scan.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ import (
"sync/atomic"
"syscall"

"github.com/minio/sha256-simd"

"github.com/chainguard-dev/clog"
"github.com/chainguard-dev/malcontent/pkg/archive"
"github.com/chainguard-dev/malcontent/pkg/compile"
Expand Down Expand Up @@ -49,20 +51,22 @@ var (

// scanFD scans a file descriptor using memory mapping for efficient large file handling.
// This avoids loading the entire file into memory while still using yara-x's byte slice scanning.
func scanFD(scanner *yarax.Scanner, fd uintptr, logger *clog.Logger) ([]byte, *yarax.ScanResults, error) {
// scanFD also returns the file's contents for match string extraction,
// as well as the file's size and its checksum which were originally calculated separately as part of report generation.
func scanFD(scanner *yarax.Scanner, fd uintptr, logger *clog.Logger) ([]byte, *yarax.ScanResults, int64, string, error) {
var stat syscall.Stat_t
if err := syscall.Fstat(int(fd), &stat); err != nil {
return nil, nil, fmt.Errorf("fstat failed: %w", err)
return nil, nil, 0, "", fmt.Errorf("fstat failed: %w", err)
}

size := stat.Size
if size == 0 {
mrs, err := scanner.Scan([]byte{})
return nil, mrs, err
return nil, mrs, 0, "", err
}

if size < 0 {
return nil, nil, fmt.Errorf("invalid file size: %d", size)
return nil, nil, 0, "", fmt.Errorf("invalid file size: %d", size)
}

if size > maxMmapSize {
Expand All @@ -73,26 +77,29 @@ func scanFD(scanner *yarax.Scanner, fd uintptr, logger *clog.Logger) ([]byte, *y

data, err := syscall.Mmap(int(fd), 0, int(size), syscall.PROT_READ, syscall.MAP_PRIVATE)
if err != nil {
return nil, nil, fmt.Errorf("mmap failed: %w", err)
return nil, nil, 0, "", fmt.Errorf("mmap failed: %w", err)
}
defer func() {
if unmapErr := syscall.Munmap(data); unmapErr != nil {
logger.Error("failed to unmap memory", "error", unmapErr)
}
}()

h := sha256.New()
h.Write(data)
checksum := fmt.Sprintf("%x", h.Sum(nil))

mrs, err := scanner.Scan(data)
if err != nil {
return nil, nil, err
return nil, nil, 0, "", err
}

// Create a copy of the data to return since the mmap will be unmapped
// This is necessary because report generation needs access to file content
// for checksum calculation and match string extraction
fc := make([]byte, len(data))
copy(fc, data)
// for match string extraction
fc := append([]byte(nil), data...)

return fc, mrs, err
return fc, mrs, size, checksum, err
}

// scanSinglePath YARA scans a single path and converts it to a fileReport.
Expand Down Expand Up @@ -164,7 +171,7 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
scanner := scannerPool.Get()
defer scannerPool.Put(scanner)

fc, mrs, err := scanFD(scanner, fd, logger)
fc, mrs, size, checksum, err := scanFD(scanner, fd, logger)
if err != nil {
logger.Debug("skipping", slog.Any("error", err))
return nil, err
Expand All @@ -173,7 +180,7 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
// If running a scan, only generate reports for mrs that satisfy the risk threshold of 3
// This is a short-circuit that avoids any report generation logic
risk := report.HighestMatchRisk(mrs)
threshold := max(3, c.MinFileRisk, c.MinRisk)
threshold := max(report.HIGH, c.MinFileRisk, c.MinRisk)
if c.Scan && risk < threshold && !c.QuantityIncreasesRisk {
fr := &malcontent.FileReport{Skipped: "overall risk too low for scan", Path: path}
if isArchive {
Expand All @@ -182,7 +189,7 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
return fr, nil
}

fr, err := report.Generate(ctx, path, mrs, c, archiveRoot, logger, fc, kind, risk)
fr, err := report.Generate(ctx, path, mrs, c, archiveRoot, logger, fc, size, checksum, kind, risk)
if err != nil {
return nil, NewFileReportError(err, path, TypeGenerateError)
}
Expand Down
144 changes: 80 additions & 64 deletions pkg/report/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ package report

import (
"context"
"crypto/sha256"
"fmt"
"net/url"
"path/filepath"
Expand All @@ -25,7 +24,8 @@ import (
const NAME string = "malcontent"

const (
HARMLESS int = iota
INVALID int = iota - 1
HARMLESS
LOW
MEDIUM
HIGH
Expand All @@ -34,11 +34,12 @@ const (

// Map to handle RiskScore -> RiskLevel conversions.
var RiskLevels = map[int]string{
0: "NONE", // harmless: common to all executables, no system impact
1: "LOW", // undefined: low impact, common to good and bad executables
2: "MEDIUM", // notable: may have impact, but common
3: "HIGH", // suspicious: uncommon, but could be legit
4: "CRITICAL", // critical: certainly malware
INVALID: "NONE", // inalid: unmodified initial value which should not happen
HARMLESS: "NONE", // harmless: common to all executables, no system impact
LOW: "LOW", // undefined: low impact, common to good and bad executables
MEDIUM: "MEDIUM", // notable: may have impact, but common
HIGH: "HIGH", // suspicious: uncommon, but could be legit
CRITICAL: "CRITICAL", // critical: certainly malware
}

// yaraForge has some very, very long rule names.
Expand Down Expand Up @@ -88,17 +89,17 @@ var (

// Map to handle RiskLevel -> RiskScore conversions.
var Levels = map[string]int{
"ignore": -1,
"none": -1,
"harmless": 0,
"low": 1,
"notable": 2,
"medium": 2,
"suspicious": 3,
"weird": 3,
"high": 3,
"crit": 4,
"critical": 4,
"ignore": INVALID,
"none": INVALID,
"harmless": HARMLESS,
"low": LOW,
"notable": MEDIUM,
"medium": MEDIUM,
"suspicious": HIGH,
"weird": HIGH,
"high": HIGH,
"crit": CRITICAL,
"critical": CRITICAL,
}

func thirdPartyKey(path string, rule string) string {
Expand Down Expand Up @@ -203,29 +204,29 @@ func ignoreMatch(tags []string, ignoreTags map[string]bool) bool {
}

func behaviorRisk(ns string, rule string, tags []string) int {
risk := 1
risk := LOW

if thirdParty(ns) {
risk = 3
risk = HIGH
src := strings.Split(ns, "/")[1]

switch src {
case "JPCERT", "YARAForge", "bartblaze", "huntress", "elastic":
risk = 4
risk = CRITICAL
if strings.Contains(strings.ToLower(ns), "generic") ||
strings.Contains(strings.ToLower(rule), "generic") {
risk = 3
risk = HIGH
}
}

if strings.Contains(strings.ToLower(ns), "keyword") ||
strings.Contains(strings.ToLower(rule), "keyword") {
risk = 2
risk = MEDIUM
}
}

if strings.Contains(ns, "combo/") {
risk = 2
risk = MEDIUM
}

for _, tag := range tags {
Expand Down Expand Up @@ -317,21 +318,6 @@ func matchStrings(ruleName string, ms []string) []string {
return longestUnique(raw)
}

// sizeAndChecksum calculates size and checksum using already-read file contents if available.
func sizeAndChecksum(fc []byte) (int64, string) {
var checksum string
var size int64

if len(fc) > 0 {
size = int64(len(fc))
h := sha256.New()
h.Write(fc)
checksum = fmt.Sprintf("%x", h.Sum(nil))
}

return size, checksum
}

// fixURL fixes badly formed URLs.
func fixURL(s string) string {
// YARAforge forgets to encode spaces, but encodes everything else
Expand Down Expand Up @@ -377,7 +363,52 @@ func fileMatchesRule(meta []yarax.Metadata, ext string) bool {
return true
}

func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcontent.Config, expath string, _ *clog.Logger, fc []byte, kind *programkind.FileType, highestRisk int) (*malcontent.FileReport, error) {
// skipMatch determines whether to avoid processing a rule match.
func skipMatch(ignoreMalcontent, override, scan bool, risk, threshold, highestRisk int) bool {
switch {
case risk == INVALID:
return true
// The malcontent rule is classified as harmless
// A !ignoreMalcontent condition will prevent the rule from being filtered
case !scan && risk < threshold && !ignoreMalcontent && !override:
return true
// If running a scan as opposed to an analyze,
// drop any matches that fall below the highest risk
case scan && risk < highestRisk && !ignoreMalcontent && !override:
return true
}
return false
}

// skipScanFile determines whether a scanned file should
// be ignored when running a scan and the file's risk is below HIGH.
func skipScanFile(scan bool, overallRiskScore int) bool {
if scan && overallRiskScore < HIGH {
return true
}
return false
}

// applyCriticalUpgrade evaluates whether to apply a risk increase
// depending on c.QuantityIncreasesRisk, the file's high behavior count, and the file's size.
func applyCriticalUpgrade(ctx context.Context, quantityIncreasesRisk bool, riskCounts map[int]int, overallRiskScore int, size int64) bool {
// If something has a lot of high, it's probably critical
if quantityIncreasesRisk && upgradeRisk(ctx, overallRiskScore, riskCounts, size) {
return true
}
return false
}

// isMalcontent determines whether the scanned file is the malcontent binary itself
// which causes false positives and is generally better to ignore entirely.
func isMalcontent(path string) bool {
if strings.ToLower(filepath.Base(path)) == NAME || strings.ToLower(filepath.Base(path)) == "mal" {
return true
}
return false
}

func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcontent.Config, expath string, _ *clog.Logger, fc []byte, size int64, checksum string, kind *programkind.FileType, highestRisk int) (*malcontent.FileReport, error) {
if ctx.Err() != nil {
return &malcontent.FileReport{}, ctx.Err()
}
Expand All @@ -391,7 +422,6 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon
ignoreSelf := c.IgnoreSelf

ignore := buildIgnoreMap(ignoreTags)
size, checksum := sizeAndChecksum(fc)

displayPath := trimDisplayPath(path, expath, c)

Expand All @@ -416,27 +446,17 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon
ignoreMalcontent = true
}

if kind != nil && kind.Ext != "" {
if !fileMatchesRule(m.Metadata(), kind.Ext) {
continue
}
if kind != nil && kind.Ext != "" && !fileMatchesRule(m.Metadata(), kind.Ext) {
continue
}

override := slices.Contains(m.Tags(), "override")

risk = behaviorRisk(m.Namespace(), m.Identifier(), m.Tags())
overallRiskScore = max(overallRiskScore, risk)
riskCounts[risk]++
// The malcontent rule is classified as harmless
// A !ignoreMalcontent condition will prevent the rule from being filtered
// If running a scan as opposed to an analyze,
// drop any matches that fall below the highest risk
switch {
case risk == -1:
continue
case !c.Scan && risk < minScore && !ignoreMalcontent && !override:
continue
case c.Scan && risk < highestRisk && !c.QuantityIncreasesRisk && !ignoreMalcontent && !override:

if skipMatch(ignoreMalcontent, override, c.Scan, risk, minScore, highestRisk) {
continue
}

Expand Down Expand Up @@ -488,19 +508,15 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon
// Scans will still need to drop <= medium results
overallRiskScore = highestBehaviorRisk(fr)

// If something has a lot of high, it's probably critical
if c.QuantityIncreasesRisk && upgradeRisk(ctx, overallRiskScore, riskCounts, size) {
if applyCriticalUpgrade(ctx, c.QuantityIncreasesRisk, riskCounts, overallRiskScore, size) {
overallRiskScore = CRITICAL
}

if c.Scan && overallRiskScore < HIGH {
if skipScanFile(c.Scan, overallRiskScore) {
fr.Skipped = "overall risk too low for scan"
}

// Check for both the full and shortened variants of malcontent
isMalBinary := (filepath.Base(path) == NAME || filepath.Base(path) == "mal")

if all(ignoreSelf, fr.IsMalcontent, ignoreMalcontent, isMalBinary) {
if all(ignoreSelf, fr.IsMalcontent, ignoreMalcontent, isMalcontent(path)) {
fr.Skipped = "ignoring malcontent binary"
}

Expand Down Expand Up @@ -706,10 +722,10 @@ func updateBehavior(fr *malcontent.FileReport, b *malcontent.Behavior, key strin

// upgradeRisk determines whether to upgrade risk based on finding density.
func upgradeRisk(ctx context.Context, riskScore int, riskCounts map[int]int, size int64) bool {
if riskScore != 3 {
if riskScore != HIGH {
return false
}
highCount := riskCounts[3]
highCount := riskCounts[HIGH]
sizeMB := size / 1024 / 1024
upgrade := false

Expand Down
Loading
Loading