From 62925297576f5538551e2a9ecbba896dda7f9948 Mon Sep 17 00:00:00 2001 From: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> Date: Fri, 6 Jun 2025 20:41:10 +0530 Subject: [PATCH 1/8] Clean up Generate function in report.go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broken down the functionality of the generate function into modular functions: `buildIgnoreMap()` — creates the ignore tag map `trimDisplayPath()` — handles trimming file paths `initFileReport()` — initializes the report struct `createMatchRulesMap()` — Store match rules in a map `processMatchedStrings()` — processes all pattern matches `buildBehavior()` — creates a behavior object from rule data `handleMetadata()` — processes metadata and applies overrides `updateBehavior()` — updates behavior entries in the file report Signed-off-by: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> --- pkg/report/report.go | 312 ++++++++++++++++++++++++------------------- 1 file changed, 171 insertions(+), 141 deletions(-) diff --git a/pkg/report/report.go b/pkg/report/report.go index d933edc7f..aa317edda 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -391,30 +391,13 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon minScore := c.MinRisk ignoreSelf := c.IgnoreSelf - ignore := make(map[string]bool, len(ignoreTags)) - for _, t := range ignoreTags { - ignore[t] = true - } - + ignore := buildIgnoreMap(ignoreTags) size, checksum := sizeAndChecksum(fc) - displayPath := path - if c.OCI { - displayPath = strings.TrimPrefix(path, expath) - } - if len(c.TrimPrefixes) > 0 { - displayPath = TrimPrefixes(displayPath, c.TrimPrefixes) - } + displayPath := trimDisplayPath(path, expath, c) matchCount := len(mrs.MatchingRules()) - fr := &malcontent.FileReport{ - Path: displayPath, - SHA256: checksum, - Size: size, - Meta: make(map[string]string, matchCount), - Behaviors: make([]*malcontent.Behavior, 0, matchCount), - Overrides: make([]*malcontent.Behavior, 0, matchCount/10), - } + fr := initFileReport(displayPath, checksum, size, matchCount) pledges := make([]string, 0, 4) caps := make([]string, 0, 4) @@ -428,10 +411,7 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon highestRisk := HighestMatchRisk(mrs) // Store match rules in a map for future override operations - mrsMap := make(map[string]*yarax.Rule, matchCount) - for _, m := range mrs.MatchingRules() { - mrsMap[m.Identifier()] = m - } + mrsMap := createMatchRulesMap(mrs, matchCount) for _, m := range mrs.MatchingRules() { if all(m.Identifier() == NAME, ignoreSelf) { @@ -465,106 +445,11 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon key = generateKey(m.Namespace(), m.Identifier()) ruleURL := generateRuleURL(m.Namespace(), m.Identifier()) - var matchedStrings []string - { - totalMatches := 0 - for _, p := range m.Patterns() { - totalMatches += len(p.Matches()) - } - - matches := make([]yarax.Match, 0, totalMatches) - for _, p := range m.Patterns() { - matches = append(matches, p.Matches()...) - } - - processor := newMatchProcessor(fc, matches, m.Patterns()) - matchedStrings = processor.process() - processor.clearFileContent() - } - - b := &malcontent.Behavior{ - ID: key, - MatchStrings: matchStrings(m.Identifier(), matchedStrings), - RiskLevel: RiskLevels[risk], - RiskScore: risk, - RuleName: m.Identifier(), - RuleURL: ruleURL, - } - - k := "" - v := "" + matchedStrings := processMatchedStrings(fc, m) - for _, meta := range m.Metadata() { - k = meta.Identifier() - v = fmt.Sprintf("%s", meta.Value()) - // Empty data is unusual, so just ignore it. - if k == "" || v == "" { - continue - } - - // If we find a match in the map for the metadata key, that's the rule to override - // Store this rule (the override) in the fr.Overrides behavior slice - // If an override rule is not overriding a valid rule, log an error - _, exists := mrsMap[k] - switch { - case exists && override: - var overrideSev int - if sev, ok := Levels[v]; ok { - overrideSev = sev - } - b.RiskLevel = RiskLevels[overrideSev] - b.RiskScore = overrideSev - b.Override = append(b.Override, k) - fr.Overrides = append(fr.Overrides, b) - case !exists && override: - // TODO: return error if override references an unknown rule name - continue - } + b := buildBehavior(m, matchedStrings, key, ruleURL, risk) - switch k { - case "author": - b.RuleAuthor = v - m := authorWithURLRe.FindStringSubmatch(v) - if len(m) > 0 && isValidURL(m[2]) { - b.RuleAuthor = m[1] - b.RuleAuthorURL = m[2] - } - // If author is in @username format, strip @ to avoid constantly pinging them on GitHub - if strings.HasPrefix(b.RuleAuthor, "@") { - b.RuleAuthor = strings.Replace(b.RuleAuthor, "@", "", 1) - } - case "author_url": - b.RuleAuthorURL = v - case fmt.Sprintf("__%s__", NAME): - if v == "true" { - fr.IsMalcontent = true - } - case "license": - b.RuleLicense = v - case "license_url": - b.RuleLicenseURL = v - case "description", "threat_name", "name": - desc := mungeDescription(v) - if len(desc) > len(b.Description) { - b.Description = desc - } - case "ref", "reference": - u := fixURL(v) - if isValidURL(u) { - b.ReferenceURL = u - } - case "source_url": - // YARAforge forgets to encode spaces - b.RuleURL = fixURL(v) - case "pledge": - pledges = append(pledges, v) - case "syscall": - sy := strings.Split(v, ",") - syscalls = append(syscalls, sy...) - case "cap": - caps = append(caps, v) - } - } + handleMetadata(m, b, fr, override, mrsMap, &pledges, &caps, &syscalls) // Fix YARA Forge rules that record their author URL as reference URLs if strings.HasPrefix(b.RuleURL, b.ReferenceURL) { @@ -591,25 +476,7 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon b.Description = strings.ReplaceAll(m.Identifier(), "_", " ") } - existingIndex := -1 - for i, existing := range fr.Behaviors { - if existing.ID == key { - existingIndex = i - break - } - } - - // If the existing description is longer and the priority is the same or lower - if existingIndex != -1 { - if fr.Behaviors[existingIndex].RiskScore < b.RiskScore { - fr.Behaviors = append(fr.Behaviors[:existingIndex], append([]*malcontent.Behavior{b}, fr.Behaviors[existingIndex+1:]...)...) - } - if len(fr.Behaviors[existingIndex].Description) < len(b.Description) && fr.Behaviors[existingIndex].RiskScore <= b.RiskScore { - fr.Behaviors[existingIndex].Description = b.Description - } - } else { - fr.Behaviors = append(fr.Behaviors, b) - } + updateBehavior(fr, b, key) // TODO: If we match multiple rules within a single namespace, merge matchstrings } @@ -657,6 +524,169 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon return fr, nil } +func buildIgnoreMap(ignoreTags []string) map[string]bool { + ignore := make(map[string]bool, len(ignoreTags)) + for _, t := range ignoreTags { + ignore[t] = true + } + return ignore +} + +func trimDisplayPath(path string, expath string, c malcontent.Config) string { + displayPath := path + if c.OCI { + displayPath = strings.TrimPrefix(path, expath) + } + if len(c.TrimPrefixes) > 0 { + displayPath = TrimPrefixes(displayPath, c.TrimPrefixes) + } + return displayPath +} + +func initFileReport(path string, checksum string, size int64, matchCount int) *malcontent.FileReport { + return &malcontent.FileReport{ + Path: path, + SHA256: checksum, + Size: size, + Meta: make(map[string]string, matchCount), + Behaviors: make([]*malcontent.Behavior, 0, matchCount), + Overrides: make([]*malcontent.Behavior, 0, matchCount/10), + } +} + +func createMatchRulesMap(mrs *yarax.ScanResults, matchCount int) map[string]*yarax.Rule { + mrsMap := make(map[string]*yarax.Rule, matchCount) + for _, m := range mrs.MatchingRules() { + mrsMap[m.Identifier()] = m + } + return mrsMap +} + +func processMatchedStrings(fc []byte, m *yarax.Rule) []string { + totalMatches := 0 + for _, p := range m.Patterns() { + totalMatches += len(p.Matches()) + } + + matches := make([]yarax.Match, 0, totalMatches) + for _, p := range m.Patterns() { + matches = append(matches, p.Matches()...) + } + + processor := newMatchProcessor(fc, matches, m.Patterns()) + return processor.process() +} + +func buildBehavior(m *yarax.Rule, matchedStrings []string, key string, ruleURL string, risk int) *malcontent.Behavior { + return &malcontent.Behavior{ + ID: key, + MatchStrings: matchStrings(m.Identifier(), matchedStrings), + RiskLevel: RiskLevels[risk], + RiskScore: risk, + RuleName: m.Identifier(), + RuleURL: ruleURL, + } +} + +func handleMetadata(m *yarax.Rule, b *malcontent.Behavior, fr *malcontent.FileReport, override bool, mrsMap map[string]*yarax.Rule, pledges *[]string, caps *[]string, syscalls *[]string) { + k := "" + v := "" + + for _, meta := range m.Metadata() { + k = meta.Identifier() + v = fmt.Sprintf("%s", meta.Value()) + // Empty data is unusual, so just ignore it. + if k == "" || v == "" { + continue + } + + // If we find a match in the map for the metadata key, that's the rule to override + // Store this rule (the override) in the fr.Overrides behavior slice + // If an override rule is not overriding a valid rule, log an error + _, exists := mrsMap[k] + switch { + case exists && override: + var overrideSev int + if sev, ok := Levels[v]; ok { + overrideSev = sev + } + b.RiskLevel = RiskLevels[overrideSev] + b.RiskScore = overrideSev + b.Override = append(b.Override, k) + fr.Overrides = append(fr.Overrides, b) + case !exists && override: + // TODO: return error if override references an unknown rule name + continue + } + + switch k { + case "author": + b.RuleAuthor = v + m := authorWithURLRe.FindStringSubmatch(v) + if len(m) > 0 && isValidURL(m[2]) { + b.RuleAuthor = m[1] + b.RuleAuthorURL = m[2] + } + // If author is in @username format, strip @ to avoid constantly pinging them on GitHub + if strings.HasPrefix(b.RuleAuthor, "@") { + b.RuleAuthor = strings.Replace(b.RuleAuthor, "@", "", 1) + } + case "author_url": + b.RuleAuthorURL = v + case fmt.Sprintf("__%s__", NAME): + if v == "true" { + fr.IsMalcontent = true + } + case "license": + b.RuleLicense = v + case "license_url": + b.RuleLicenseURL = v + case "description", "threat_name", "name": + desc := mungeDescription(v) + if len(desc) > len(b.Description) { + b.Description = desc + } + case "ref", "reference": + u := fixURL(v) + if isValidURL(u) { + b.ReferenceURL = u + } + case "source_url": + // YARAforge forgets to encode spaces + b.RuleURL = fixURL(v) + case "pledge": + pledges = append(pledges, v) + case "syscall": + sy := strings.Split(v, ",") + syscalls = append(syscalls, sy...) + case "cap": + caps = append(caps, v) + } + } + +} + +func updateBehavior(fr *malcontent.FileReport, b *malcontent.Behavior, key string) { + existingIndex := -1 + for i, existing := range fr.Behaviors { + if existing.ID == key { + existingIndex = i + break + } + } + + // If the existing description is longer and the priority is the same or lower + if existingIndex != -1 { + if fr.Behaviors[existingIndex].RiskScore < b.RiskScore { + fr.Behaviors = append(fr.Behaviors[:existingIndex], append([]*malcontent.Behavior{b}, fr.Behaviors[existingIndex+1:]...)...) + } + if len(fr.Behaviors[existingIndex].Description) < len(b.Description) && fr.Behaviors[existingIndex].RiskScore <= b.RiskScore { + fr.Behaviors[existingIndex].Description = b.Description + } + } else { + fr.Behaviors = append(fr.Behaviors, b) + } +} // upgradeRisk determines whether to upgrade risk based on finding density. func upgradeRisk(ctx context.Context, riskScore int, riskCounts map[int]int, size int64) bool { if riskScore != 3 { From a00013688f7bcb9e83866687f40ca2be7ba0138b Mon Sep 17 00:00:00 2001 From: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> Date: Fri, 6 Jun 2025 23:15:52 +0530 Subject: [PATCH 2/8] formatting fix Signed-off-by: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> --- pkg/report/report.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/report/report.go b/pkg/report/report.go index aa317edda..163fb1401 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -588,7 +588,7 @@ func buildBehavior(m *yarax.Rule, matchedStrings []string, key string, ruleURL s } } -func handleMetadata(m *yarax.Rule, b *malcontent.Behavior, fr *malcontent.FileReport, override bool, mrsMap map[string]*yarax.Rule, pledges *[]string, caps *[]string, syscalls *[]string) { +func handleMetadata(m *yarax.Rule, b *malcontent.Behavior, fr *malcontent.FileReport, override bool, mrsMap map[string]*yarax.Rule, pledges []string, caps []string, syscalls []string) { k := "" v := "" @@ -687,6 +687,7 @@ func updateBehavior(fr *malcontent.FileReport, b *malcontent.Behavior, key strin fr.Behaviors = append(fr.Behaviors, b) } } + // upgradeRisk determines whether to upgrade risk based on finding density. func upgradeRisk(ctx context.Context, riskScore int, riskCounts map[int]int, size int64) bool { if riskScore != 3 { From 8af9735c3bd2ad26adb36851aabd50d0cd531726 Mon Sep 17 00:00:00 2001 From: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> Date: Fri, 6 Jun 2025 23:38:19 +0530 Subject: [PATCH 3/8] fix typecheck error Signed-off-by: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> --- pkg/report/report.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/report/report.go b/pkg/report/report.go index 163fb1401..1dd3bc6d3 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -449,7 +449,7 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon b := buildBehavior(m, matchedStrings, key, ruleURL, risk) - handleMetadata(m, b, fr, override, mrsMap, &pledges, &caps, &syscalls) + handleMetadata(m, b, fr, override, mrsMap, pledges, caps, syscalls) // Fix YARA Forge rules that record their author URL as reference URLs if strings.HasPrefix(b.RuleURL, b.ReferenceURL) { From 3b731197db850e4f5a2badb728966dc28da2f2eb Mon Sep 17 00:00:00 2001 From: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> Date: Fri, 6 Jun 2025 23:55:24 +0530 Subject: [PATCH 4/8] fix formatting using gofumpt Signed-off-by: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> --- pkg/report/report.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/report/report.go b/pkg/report/report.go index 1dd3bc6d3..d18808483 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -663,7 +663,6 @@ func handleMetadata(m *yarax.Rule, b *malcontent.Behavior, fr *malcontent.FileRe caps = append(caps, v) } } - } func updateBehavior(fr *malcontent.FileReport, b *malcontent.Behavior, key string) { From 39cf7e222f8efb701ec6fb05e3411faaed16c62c Mon Sep 17 00:00:00 2001 From: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> Date: Sat, 7 Jun 2025 18:06:23 +0530 Subject: [PATCH 5/8] Update programkind.go - Moved the regex for checking the version outside `getExt` func for better performance - Created slices to check for `.elf, .gzip, .Z` files using `bytes.hasPrefix` - Wrote Equivalent code without an anonymous function in `GetExt` for extension of size greater than 2, which is more idiomatic. Signed-off-by: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> --- pkg/programkind/programkind.go | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/pkg/programkind/programkind.go b/pkg/programkind/programkind.go index 86b6fecd9..8ba5a6218 100644 --- a/pkg/programkind/programkind.go +++ b/pkg/programkind/programkind.go @@ -112,6 +112,11 @@ type FileType struct { var ( headerPool *pool.BufferPool initializeOnce sync.Once + versionRegex = regexp.MustCompile(`\d+\.\d+\.\d+$`) + // Magic byte constants for common file signatures. + elfMagic = []byte{0x7f, 'E', 'L', 'F'} + gzipMagic = []byte{0x1f, 0x8b} + ZMagic = []byte{0x78, 0x5E} ) const headerSize int = 512 @@ -137,8 +142,7 @@ func GetExt(path string) string { // Handle files with version numbers in the name // e.g. file1.2.3.tar.gz -> .tar.gz - re := regexp.MustCompile(`\d+\.\d+\.\d+$`) - base = re.ReplaceAllString(base, "") + base = versionRegex.ReplaceAllString(base, "") ext := filepath.Ext(base) @@ -146,10 +150,7 @@ func GetExt(path string) string { parts := strings.Split(base, ".") if len(parts) > 2 { subExt := fmt.Sprintf(".%s%s", parts[len(parts)-2], ext) - if isValidExt := func(ext string) bool { - _, ok := ArchiveMap[ext] - return ok - }(subExt); isValidExt { + if _, ok := ArchiveMap[subExt]; ok { return subExt } } @@ -252,9 +253,7 @@ func File(path string) (*FileType, error) { return nil, nil } - initializeOnce.Do(func() { - headerPool = pool.NewBufferPool(runtime.GOMAXPROCS(0)) - }) + initializeHeaderPool() buf := headerPool.Get(int64(headerSize)) defer headerPool.Put(buf) @@ -289,7 +288,7 @@ func File(path string) (*FileType, error) { } switch { - case hdr[0] == '\x7f' && (hdr[1] == 'E' && hdr[2] == 'L' && hdr[3] == 'F'): + case bytes.HasPrefix(hdr, elfMagic): return Path(".elf"), nil case bytes.Contains(hdr, []byte(" Date: Sun, 22 Jun 2025 07:06:18 +0000 Subject: [PATCH 6/8] fix missing pledges syscalls and caps --- pkg/report/report.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/report/report.go b/pkg/report/report.go index d18808483..16718b684 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -449,7 +449,7 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon b := buildBehavior(m, matchedStrings, key, ruleURL, risk) - handleMetadata(m, b, fr, override, mrsMap, pledges, caps, syscalls) + handleMetadata(m, b, fr, override, mrsMap, &pledges, &caps, &syscalls) // Fix YARA Forge rules that record their author URL as reference URLs if strings.HasPrefix(b.RuleURL, b.ReferenceURL) { @@ -588,7 +588,7 @@ func buildBehavior(m *yarax.Rule, matchedStrings []string, key string, ruleURL s } } -func handleMetadata(m *yarax.Rule, b *malcontent.Behavior, fr *malcontent.FileReport, override bool, mrsMap map[string]*yarax.Rule, pledges []string, caps []string, syscalls []string) { +func handleMetadata(m *yarax.Rule, b *malcontent.Behavior, fr *malcontent.FileReport, override bool, mrsMap map[string]*yarax.Rule, pledges *[]string, caps *[]string, syscalls *[]string) { k := "" v := "" @@ -655,12 +655,12 @@ func handleMetadata(m *yarax.Rule, b *malcontent.Behavior, fr *malcontent.FileRe // YARAforge forgets to encode spaces b.RuleURL = fixURL(v) case "pledge": - pledges = append(pledges, v) + *pledges = append(*pledges, v) case "syscall": sy := strings.Split(v, ",") - syscalls = append(syscalls, sy...) + *syscalls = append(*syscalls, sy...) case "cap": - caps = append(caps, v) + *caps = append(*caps, v) } } } From 2a8af8285b502fd57dcf92ff8ffcb1f1c35ea804 Mon Sep 17 00:00:00 2001 From: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> Date: Mon, 30 Jun 2025 14:14:48 +0000 Subject: [PATCH 7/8] fix golangci-lint Signed-off-by: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> --- pkg/programkind/programkind.go | 2 -- pkg/report/report.go | 1 - 2 files changed, 3 deletions(-) diff --git a/pkg/programkind/programkind.go b/pkg/programkind/programkind.go index 8ba5a6218..a8db00d30 100644 --- a/pkg/programkind/programkind.go +++ b/pkg/programkind/programkind.go @@ -229,8 +229,6 @@ func makeFileType(path string, ext string, mime string) *FileType { } // File detects what kind of program this file might be. -// -//nolint:cyclop // ignore complexity of 38 func File(path string) (*FileType, error) { // Follow symlinks and return cleanly if the target does not exist _, err := filepath.EvalSymlinks(path) diff --git a/pkg/report/report.go b/pkg/report/report.go index 16718b684..2e5d7cabd 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -377,7 +377,6 @@ func fileMatchesRule(meta []yarax.Metadata, ext string) bool { return true } -//nolint:cyclop // ignore complexity of 64 func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcontent.Config, expath string, _ *clog.Logger, fc []byte, kind *programkind.FileType) (*malcontent.FileReport, error) { if ctx.Err() != nil { return &malcontent.FileReport{}, ctx.Err() From b9db2029396d458f17b3a912e65d64d6603d80b2 Mon Sep 17 00:00:00 2001 From: Ritish Srivastava <121374890+Ritish134@users.noreply.github.com> Date: Tue, 1 Jul 2025 14:36:27 +0000 Subject: [PATCH 8/8] WIP: saving before rebase --- pkg/report/report.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/report/report.go b/pkg/report/report.go index 2e5d7cabd..90495df55 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -573,7 +573,9 @@ func processMatchedStrings(fc []byte, m *yarax.Rule) []string { } processor := newMatchProcessor(fc, matches, m.Patterns()) - return processor.process() + matchedStrings := processor.process() + processor.clearFileContent() + return matchedStrings } func buildBehavior(m *yarax.Rule, matchedStrings []string, key string, ruleURL string, risk int) *malcontent.Behavior {