From b8260a75e38943aad7d59776ddde15a0c4ecae76 Mon Sep 17 00:00:00 2001 From: egibs <20933572+egibs@users.noreply.github.com> Date: Sat, 26 Jul 2025 21:03:56 -0500 Subject: [PATCH 1/8] [WIP] pass file contents by reference; add more nil checks Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --- pkg/action/diff.go | 4 ++- pkg/action/scan.go | 33 +++++++++++----------- pkg/archive/archive.go | 2 +- pkg/archive/bz2.go | 2 +- pkg/archive/gzip.go | 2 +- pkg/archive/rpm.go | 2 +- pkg/archive/tar.go | 2 +- pkg/archive/zip.go | 2 +- pkg/archive/zlib.go | 2 +- pkg/archive/zstd.go | 2 +- pkg/compile/compile.go | 4 +-- pkg/pool/pool.go | 13 ++++++--- pkg/programkind/programkind.go | 2 +- pkg/render/json.go | 9 +++++- pkg/render/markdown.go | 2 +- pkg/render/render.go | 5 ++++ pkg/render/simple.go | 3 +- pkg/render/stats.go | 5 ++++ pkg/render/strings.go | 3 +- pkg/render/terminal.go | 12 ++++++-- pkg/render/terminal_brief.go | 3 +- pkg/render/yaml.go | 9 +++++- pkg/report/report.go | 50 +++++++++++++++++++++------------- pkg/report/strings.go | 10 +++++-- 24 files changed, 119 insertions(+), 64 deletions(-) diff --git a/pkg/action/diff.go b/pkg/action/diff.go index 268f9fb52..01c42c424 100644 --- a/pkg/action/diff.go +++ b/pkg/action/diff.go @@ -60,7 +60,9 @@ func relPath(from string, fr *malcontent.FileReport, isArchive bool, isImage boo from = fr.Path if strings.Contains(fr.Path, "∴") { parts := strings.Split(fr.Path, "∴") - from = strings.TrimSpace(parts[0]) + if len(parts) > 0 { + from = strings.TrimSpace(parts[0]) + } } base, err = filepath.Abs(from) if err != nil { diff --git a/pkg/action/scan.go b/pkg/action/scan.go index faebb7d9a..7d634dcb3 100644 --- a/pkg/action/scan.go +++ b/pkg/action/scan.go @@ -167,9 +167,7 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF initializeOnce.Do(func() { scannerPool = pool.NewScannerPool(yrs, getMaxConcurrency(c.Concurrency)) }) - - scanner := scannerPool.Get() - defer scannerPool.Put(scanner) + scanner := scannerPool.Get(yrs) fc, mrs, size, checksum, err := scanFD(scanner, fd, logger) if err != nil { @@ -195,6 +193,7 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF } defer func() { + scannerPool.Put(scanner) fc = nil mrs = nil }() @@ -788,20 +787,22 @@ func Scan(ctx context.Context, c malcontent.Config) (*malcontent.Report, error) return r, err } - r.Files.Range(func(key, value any) bool { - if scanCtx.Err() != nil { - return false - } - if key == nil || value == nil { - return true - } - if fr, ok := value.(*malcontent.FileReport); ok { - if fr.RiskScore < c.MinFileRisk { - r.Files.Delete(key) + if r != nil { + r.Files.Range(func(key, value any) bool { + if scanCtx.Err() != nil { + return false } - } - return true - }) + if key == nil || value == nil { + return true + } + if fr, ok := value.(*malcontent.FileReport); ok { + if fr.RiskScore < c.MinFileRisk { + r.Files.Delete(key) + } + } + return true + }) + } if scanCtx.Err() == nil && c.Stats && c.Renderer.Name() != "JSON" && c.Renderer.Name() != "YAML" { err = render.Statistics(&c, r) if err != nil { diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index ac086a470..c74bf437b 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -244,7 +244,7 @@ func handleDirectory(target string) error { // handleFile extracts valid files within .deb or .tar archives. func handleFile(target string, tr *tar.Reader) error { - buf := tarPool.Get(extractBuffer) + buf := tarPool.Get(extractBuffer) //nolint:nilaway // the buffer pool is created above defer tarPool.Put(buf) if err := os.MkdirAll(filepath.Dir(target), 0o700); err != nil { diff --git a/pkg/archive/bz2.go b/pkg/archive/bz2.go index b26ac4f91..6cd70633d 100644 --- a/pkg/archive/bz2.go +++ b/pkg/archive/bz2.go @@ -31,7 +31,7 @@ func ExtractBz2(ctx context.Context, d, f string) error { return nil } - buf := archivePool.Get(extractBuffer) + buf := archivePool.Get(extractBuffer) //nolint:nilaway // the buffer pool is created in archive.go tf, err := os.Open(f) if err != nil { diff --git a/pkg/archive/gzip.go b/pkg/archive/gzip.go index 6eb718e11..5de1e648d 100644 --- a/pkg/archive/gzip.go +++ b/pkg/archive/gzip.go @@ -53,7 +53,7 @@ func ExtractGzip(ctx context.Context, d string, f string) error { return nil } - buf := archivePool.Get(extractBuffer) + buf := archivePool.Get(extractBuffer) //nolint:nilaway // the buffer pool is created in archive.go gf, err := os.Open(f) if err != nil { diff --git a/pkg/archive/rpm.go b/pkg/archive/rpm.go index af49302af..9d9db762e 100644 --- a/pkg/archive/rpm.go +++ b/pkg/archive/rpm.go @@ -40,7 +40,7 @@ func ExtractRPM(ctx context.Context, d, f string) error { return nil } - buf := archivePool.Get(extractBuffer) + buf := archivePool.Get(extractBuffer) //nolint:nilaway // the buffer pool is created in archive.go defer archivePool.Put(buf) pkg, err := rpm.Read(rpmFile) diff --git a/pkg/archive/tar.go b/pkg/archive/tar.go index 2387af926..d3299612e 100644 --- a/pkg/archive/tar.go +++ b/pkg/archive/tar.go @@ -48,7 +48,7 @@ func ExtractTar(ctx context.Context, d string, f string) error { return nil } - buf := tarPool.Get(extractBuffer) + buf := tarPool.Get(extractBuffer) //nolint:nilaway // the buffer pool is created in archive.go filename := filepath.Base(f) tf, err := os.Open(f) diff --git a/pkg/archive/zip.go b/pkg/archive/zip.go index 88b8e097b..e2ec9cfce 100644 --- a/pkg/archive/zip.go +++ b/pkg/archive/zip.go @@ -123,7 +123,7 @@ func extractFile(ctx context.Context, file *zip.File, destDir string, logger *cl } } - buf := zipPool.Get(zipBuffer) + buf := zipPool.Get(zipBuffer) //nolint:nilaway // the buffer pool is created in archive.go clean := filepath.Clean(filepath.ToSlash(file.Name)) if strings.Contains(clean, "..") { diff --git a/pkg/archive/zlib.go b/pkg/archive/zlib.go index 0f5cce47c..9b96aac80 100644 --- a/pkg/archive/zlib.go +++ b/pkg/archive/zlib.go @@ -30,7 +30,7 @@ func ExtractZlib(ctx context.Context, d string, f string) error { return nil } - buf := archivePool.Get(extractBuffer) + buf := archivePool.Get(extractBuffer) //nolint:nilaway // the buffer pool is created in archive.go defer archivePool.Put(buf) zf, err := os.Open(f) diff --git a/pkg/archive/zstd.go b/pkg/archive/zstd.go index f341de8f2..b99417a11 100644 --- a/pkg/archive/zstd.go +++ b/pkg/archive/zstd.go @@ -30,7 +30,7 @@ func ExtractZstd(ctx context.Context, d string, f string) error { return nil } - buf := archivePool.Get(extractBuffer) + buf := archivePool.Get(extractBuffer) //nolint:nilaway // the buffer pool is created in archive.go uncompressed := strings.TrimSuffix(filepath.Base(f), ".zstd") uncompressed = strings.TrimSuffix(uncompressed, ".zst") diff --git a/pkg/compile/compile.go b/pkg/compile/compile.go index db048d221..f16c581b9 100644 --- a/pkg/compile/compile.go +++ b/pkg/compile/compile.go @@ -217,7 +217,5 @@ func Recursive(ctx context.Context, fss []fs.FS) (*yarax.Rules, error) { return nil, fmt.Errorf("compile errors encountered: %v", errors) } - yrs := yxc.Build() - - return yrs, nil + return yxc.Build(), nil } diff --git a/pkg/pool/pool.go b/pkg/pool/pool.go index 0f98e36e9..5a2738996 100644 --- a/pkg/pool/pool.go +++ b/pkg/pool/pool.go @@ -46,11 +46,11 @@ func (bp *BufferPool) Get(size int64) []byte { bufInterface := bp.pool.Get() bufPtr, ok := bufInterface.(*[]byte) - if !ok || bufPtr == nil { + if !ok { return make([]byte, size) } - if cap(*bufPtr) < int(size) { + if bufPtr != nil && cap(*bufPtr) < int(size) { bp.pool.Put(bufPtr) return make([]byte, size) } @@ -95,8 +95,13 @@ func NewScannerPool(yrs *yarax.Rules, count int) *ScannerPool { } // Get retrieves a scanner from the scanner pool, blocking if none are available. -func (sp *ScannerPool) Get() *yarax.Scanner { - return <-sp.scanners +func (sp *ScannerPool) Get(yrs *yarax.Rules) *yarax.Scanner { + if sp != nil { + return <-sp.scanners + } + // Guard against a nil scanner pool and + // create a new scanner with the cached rules as a fallback + return yarax.NewScanner(yrs) } // Put returns a scanner to the scanner pool. diff --git a/pkg/programkind/programkind.go b/pkg/programkind/programkind.go index 3dcf69b47..c1f6e330d 100644 --- a/pkg/programkind/programkind.go +++ b/pkg/programkind/programkind.go @@ -259,7 +259,7 @@ func File(path string) (*FileType, error) { initializeHeaderPool() - buf := headerPool.Get(int64(headerSize)) + buf := headerPool.Get(int64(headerSize)) //nolint:nilaway // the buffer pool is created above defer headerPool.Put(buf) f, err := os.Open(path) diff --git a/pkg/render/json.go b/pkg/render/json.go index dc4ed2a3c..307b6bc7d 100644 --- a/pkg/render/json.go +++ b/pkg/render/json.go @@ -33,6 +33,11 @@ func (r JSON) Full(ctx context.Context, c *malcontent.Config, rep *malcontent.Re return ctx.Err() } + // guard against nil reports + if rep == nil { + return nil + } + jr := Report{ Diff: rep.Diff, Files: make(map[string]*malcontent.FileReport), @@ -61,7 +66,9 @@ func (r JSON) Full(ctx context.Context, c *malcontent.Config, rep *malcontent.Re }) if c != nil && c.Stats && jr.Diff == nil { - jr.Stats = serializedStats(c, rep) + if s := serializedStats(c, rep); s != nil { + jr.Stats = s + } } j, err := json.MarshalIndent(jr, "", " ") diff --git a/pkg/render/markdown.go b/pkg/render/markdown.go index e62387a3b..65c1977eb 100644 --- a/pkg/render/markdown.go +++ b/pkg/render/markdown.go @@ -68,7 +68,7 @@ func (r Markdown) Full(ctx context.Context, _ *malcontent.Config, rep *malconten return ctx.Err() } - if rep.Diff == nil { + if rep == nil || rep.Diff == nil { return nil } diff --git a/pkg/render/render.go b/pkg/render/render.go index 9ee8dd6a6..9499219e7 100644 --- a/pkg/render/render.go +++ b/pkg/render/render.go @@ -70,6 +70,11 @@ func riskEmoji(score int) string { } func serializedStats(c *malcontent.Config, r *malcontent.Report) *Stats { + // guard against nil reports + if r == nil { + return &Stats{} + } + pkgStats, _, totalBehaviors := PkgStatistics(c, &r.Files) riskStats, totalRisks, processedFiles, skippedFiles := RiskStatistics(c, &r.Files) diff --git a/pkg/render/simple.go b/pkg/render/simple.go index fa3481917..b26720739 100644 --- a/pkg/render/simple.go +++ b/pkg/render/simple.go @@ -57,7 +57,8 @@ func (r Simple) Full(ctx context.Context, _ *malcontent.Config, rep *malcontent. return ctx.Err() } - if rep.Diff == nil { + // guard against nil reports + if rep == nil || rep.Diff == nil { return nil } diff --git a/pkg/render/stats.go b/pkg/render/stats.go index e438d1467..dd79f49e8 100644 --- a/pkg/render/stats.go +++ b/pkg/render/stats.go @@ -117,6 +117,11 @@ func PkgStatistics(_ *malcontent.Config, files *sync.Map) ([]malcontent.StrMetri } func Statistics(c *malcontent.Config, r *malcontent.Report) error { + // guard against nil reports + if r == nil { + return fmt.Errorf("unexpected nil report") + } + riskStats, totalRisks, processedFiles, skippedFiles := RiskStatistics(c, &r.Files) pkgStats, width, totalBehaviors := PkgStatistics(c, &r.Files) diff --git a/pkg/render/strings.go b/pkg/render/strings.go index e47f60ddd..b30d436ea 100644 --- a/pkg/render/strings.go +++ b/pkg/render/strings.go @@ -118,8 +118,9 @@ func (r StringMatches) Full(ctx context.Context, _ *malcontent.Config, rep *malc return ctx.Err() } + // guard against nil reports // Non-diff files are handled on the fly by File() - if rep.Diff == nil { + if rep == nil || rep.Diff == nil { return nil } diff --git a/pkg/render/terminal.go b/pkg/render/terminal.go index f198491a1..2f1a39f96 100644 --- a/pkg/render/terminal.go +++ b/pkg/render/terminal.go @@ -101,8 +101,9 @@ func (r Terminal) Full(ctx context.Context, _ *malcontent.Config, rep *malconten return ctx.Err() } + // guard against nil reports // Non-diff files are handled on the fly by File() - if rep.Diff == nil { + if rep == nil || rep.Diff == nil { return nil } @@ -187,8 +188,13 @@ func nsLongName(s string) string { // split rule into namespace + resource/technique. func splitRuleID(s string) (string, string) { parts := strings.Split(s, "/") - rest := strings.Join(parts[1:], "/") - return parts[0], rest + id := "" + rest := "" + if len(parts) > 0 { + id = parts[0] + rest = strings.Join(parts[1:], "/") + } + return id, rest } // suggestedWidth calculates a maximum terminal width to render against. diff --git a/pkg/render/terminal_brief.go b/pkg/render/terminal_brief.go index 91ebfa374..8b31ecac3 100644 --- a/pkg/render/terminal_brief.go +++ b/pkg/render/terminal_brief.go @@ -78,8 +78,9 @@ func (r TerminalBrief) Full(ctx context.Context, _ *malcontent.Config, rep *malc return ctx.Err() } + // guard against nil reports // Non-diff files are handled on the fly by File() - if rep.Diff == nil { + if rep == nil || rep.Diff == nil { return nil } diff --git a/pkg/render/yaml.go b/pkg/render/yaml.go index 1242d3ee8..eb32b3113 100644 --- a/pkg/render/yaml.go +++ b/pkg/render/yaml.go @@ -33,6 +33,11 @@ func (r YAML) Full(ctx context.Context, c *malcontent.Config, rep *malcontent.Re return ctx.Err() } + // guard against nil reports + if rep == nil { + return nil + } + // Make the sync.Map YAML-friendly yr := Report{ Diff: rep.Diff, @@ -60,7 +65,9 @@ func (r YAML) Full(ctx context.Context, c *malcontent.Config, rep *malcontent.Re }) if c != nil && c.Stats && yr.Diff == nil { - yr.Stats = serializedStats(c, rep) + if s := serializedStats(c, rep); s != nil { + yr.Stats = s + } } yaml, err := yaml.Marshal(yr) diff --git a/pkg/report/report.go b/pkg/report/report.go index a116ce101..9e1c74c35 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -115,13 +115,16 @@ func thirdPartyKey(path string, rule string) string { words = append(words, strings.Split(strings.ToLower(rule), "_")...) // strip off the last word if it's a hex key - lastWord := words[len(words)-1] - _, err := strconv.ParseUint(lastWord, 16, 64) - if err == nil { - words = words[0 : len(words)-1] + lastWord := "" + if len(words) > 0 { + lastWord = words[len(words)-1] + _, err := strconv.ParseUint(lastWord, 16, 64) + if err == nil { + words = words[0 : len(words)-1] + } } - var keepWords []string + keepWords := make([]string, 0, len(words)) for x, w := range words { // ends with a date if x == len(words)-1 && dateRe.MatchString(w) { @@ -139,7 +142,10 @@ func thirdPartyKey(path string, rule string) string { keepWords = keepWords[0:4] } - src := keepWords[0] + src := "" + if len(keepWords) > 0 { + src = keepWords[0] + } // Fix name for https://github.com/Neo23x0/signature-base within YARAForge if src == "signature" { @@ -173,18 +179,21 @@ func generateKey(src string, rule string) string { dirParts := strings.Split(key, "/") // ID's generally follow: `//` - ns := dirParts[0] - // namespaces can have dashes, like 'anti-static' - ns = strings.ReplaceAll(ns, "_", "-") - rsrc := dirParts[len(dirParts)-2] - tech := dirParts[len(dirParts)-1] - - tech = strings.ReplaceAll(tech, rsrc, "") - tech = strings.ReplaceAll(tech, "__", "_") - tech = strings.Trim(tech, "_") - - dirParts[0] = ns - dirParts[len(dirParts)-1] = tech + if len(dirParts) >= 1 { + ns := dirParts[0] + // namespaces can have dashes, like 'anti-static' + ns = strings.ReplaceAll(ns, "_", "-") + rsrc := dirParts[len(dirParts)-2] + tech := dirParts[len(dirParts)-1] + + tech = strings.ReplaceAll(tech, rsrc, "") + tech = strings.ReplaceAll(tech, "__", "_") + tech = strings.Trim(tech, "_") + + dirParts[0] = ns + dirParts[len(dirParts)-1] = tech + } + return strings.TrimSuffix(strings.Join(dirParts, "/"), "/") } @@ -463,7 +472,10 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon key = generateKey(m.Namespace(), m.Identifier()) ruleURL := generateRuleURL(m.Namespace(), m.Identifier()) - matchedStrings := processMatchedStrings(fc, m) + var matchedStrings []string + if fc != nil { + matchedStrings = processMatchedStrings(fc, m) + } b := buildBehavior(m, matchedStrings, key, ruleURL, risk) diff --git a/pkg/report/strings.go b/pkg/report/strings.go index 6f6bd559e..175004015 100644 --- a/pkg/report/strings.go +++ b/pkg/report/strings.go @@ -107,7 +107,7 @@ func (mp *matchProcessor) process() []string { matchPool = pool.NewBufferPool(len(mp.matches)) }) - buffer := matchPool.Get(8) + buffer := matchPool.Get(8) //nolint:nilaway // the buffer pool is created above defer matchPool.Put(buffer) patternsCap := len(mp.patterns) @@ -118,11 +118,15 @@ func (mp *matchProcessor) process() []string { l := int(match.Length()) o := int(match.Offset()) - if o < 0 || o+l > len(mp.fc) { + // if the match processor's file content is nil, + // or the offset is less than zero, + // or the match length + offset exceeds the size of the file, + // avoid any processing and continue + if len(mp.fc) == 0 || o < 0 || o+l > len(mp.fc) { continue } - matchBytes := mp.fc[o : o+l] + matchBytes := (mp.fc)[o : o+l] if !containsUnprintable(matchBytes) { if l <= cap(buffer) { From b7b354186f588a8e8c5a628464e0d4bdc847a98b Mon Sep 17 00:00:00 2001 From: egibs <20933572+egibs@users.noreply.github.com> Date: Mon, 28 Jul 2025 12:33:21 -0500 Subject: [PATCH 2/8] More improvements to report and strings.go Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --- pkg/report/report.go | 28 +++++++++++------------- pkg/report/strings.go | 51 ++++++++++++++----------------------------- 2 files changed, 29 insertions(+), 50 deletions(-) diff --git a/pkg/report/report.go b/pkg/report/report.go index 9e1c74c35..bd45b8691 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -179,19 +179,20 @@ func generateKey(src string, rule string) string { dirParts := strings.Split(key, "/") // ID's generally follow: `//` - if len(dirParts) >= 1 { - ns := dirParts[0] + if len(dirParts) > 1 { // namespaces can have dashes, like 'anti-static' - ns = strings.ReplaceAll(ns, "_", "-") - rsrc := dirParts[len(dirParts)-2] - tech := dirParts[len(dirParts)-1] + dirParts[0] = strings.ReplaceAll(dirParts[0], "_", "-") - tech = strings.ReplaceAll(tech, rsrc, "") - tech = strings.ReplaceAll(tech, "__", "_") - tech = strings.Trim(tech, "_") - - dirParts[0] = ns - dirParts[len(dirParts)-1] = tech + var rsrc, tech string + // we need at least two parts to pull out resources and technique (potentially one in the same) + if len(dirParts) >= 2 { + rsrc = dirParts[len(dirParts)-2] + tech = dirParts[len(dirParts)-1] + tech = strings.ReplaceAll(tech, rsrc, "") + tech = strings.ReplaceAll(tech, "__", "_") + tech = strings.Trim(tech, "_") + dirParts[len(dirParts)-1] = tech + } } return strings.TrimSuffix(strings.Join(dirParts, "/"), "/") @@ -472,10 +473,7 @@ func Generate(ctx context.Context, path string, mrs *yarax.ScanResults, c malcon key = generateKey(m.Namespace(), m.Identifier()) ruleURL := generateRuleURL(m.Namespace(), m.Identifier()) - var matchedStrings []string - if fc != nil { - matchedStrings = processMatchedStrings(fc, m) - } + matchedStrings := processMatchedStrings(fc, m) b := buildBehavior(m, matchedStrings, key, ruleURL, risk) diff --git a/pkg/report/strings.go b/pkg/report/strings.go index 175004015..75e298792 100644 --- a/pkg/report/strings.go +++ b/pkg/report/strings.go @@ -5,12 +5,6 @@ import ( "sync" yarax "github.com/VirusTotal/yara-x/go" - "github.com/chainguard-dev/malcontent/pkg/pool" -) - -var ( - initializeOnce sync.Once - matchPool *pool.BufferPool ) // StringPool holds data to handle string interning. @@ -103,60 +97,47 @@ func (mp *matchProcessor) process() []string { } defer matchResultPool.Put(result) - initializeOnce.Do(func() { - matchPool = pool.NewBufferPool(len(mp.matches)) - }) - - buffer := matchPool.Get(8) //nolint:nilaway // the buffer pool is created above - defer matchPool.Put(buffer) - - patternsCap := len(mp.patterns) - var patterns []string - // #nosec G115 // ignore Type conversion which leads to integer overflow for _, match := range mp.matches { l := int(match.Length()) o := int(match.Offset()) - // if the match processor's file content is nil, + // if the match processor's file content is empty, // or the offset is less than zero, // or the match length + offset exceeds the size of the file, - // avoid any processing and continue + // then avoid any processing and continue if len(mp.fc) == 0 || o < 0 || o+l > len(mp.fc) { continue } matchBytes := (mp.fc)[o : o+l] - if !containsUnprintable(matchBytes) { - if l <= cap(buffer) { - buffer = buffer[:l] - copy(buffer, matchBytes) - matchStr := string(buffer) - *result = append(*result, mp.pool.Intern(string([]byte(matchStr)))) + switch !containsUnprintable(matchBytes) { + case true: + var matchStr string + if l <= cap(matchBytes) { + matchStr = string(append([]byte(nil), matchBytes[:l]...)) } else { - matchStr := string(matchBytes) - *result = append(*result, mp.pool.Intern(string([]byte(matchStr)))) + matchStr = string(matchBytes) } - } else { - if patterns == nil || cap(patterns) < patternsCap { + *result = append(*result, mp.pool.Intern(matchStr)) + default: + patternsCap := len(mp.patterns) + var patterns []string + if len(patterns) == 0 || cap(patterns) < patternsCap { patterns = make([]string, 0, patternsCap) } else { clear(patterns) patterns = patterns[:0] } for _, p := range mp.patterns { - patterns = append(patterns, p.Identifier()) + patterns = append(patterns, mp.pool.Intern(p.Identifier())) } - compacted := slices.Compact(patterns) - *result = append(*result, compacted...) + *result = append(*result, slices.Compact(patterns)...) } } - finalResult := make([]string, len(*result)) - copy(finalResult, *result) - - return finalResult + return append([]string{}, *result...) } // containsUnprintable determines if a byte is a valid character. From 6d508510304ff13fea053afc72ed06fafe2bb302 Mon Sep 17 00:00:00 2001 From: egibs <20933572+egibs@users.noreply.github.com> Date: Mon, 28 Jul 2025 12:36:22 -0500 Subject: [PATCH 3/8] Add another length check for terminal output Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --- pkg/render/render.go | 2 +- pkg/render/terminal.go | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/render/render.go b/pkg/render/render.go index 9499219e7..933aea990 100644 --- a/pkg/render/render.go +++ b/pkg/render/render.go @@ -72,7 +72,7 @@ func riskEmoji(score int) string { func serializedStats(c *malcontent.Config, r *malcontent.Report) *Stats { // guard against nil reports if r == nil { - return &Stats{} + return nil } pkgStats, _, totalBehaviors := PkgStatistics(c, &r.Files) diff --git a/pkg/render/terminal.go b/pkg/render/terminal.go index 2f1a39f96..5820edb16 100644 --- a/pkg/render/terminal.go +++ b/pkg/render/terminal.go @@ -192,7 +192,9 @@ func splitRuleID(s string) (string, string) { rest := "" if len(parts) > 0 { id = parts[0] - rest = strings.Join(parts[1:], "/") + if len(parts) >= 1 { + rest = strings.Join(parts[1:], "/") + } } return id, rest } From 6468ef26cd06c62af039b20090bde52d69adda14 Mon Sep 17 00:00:00 2001 From: egibs <20933572+egibs@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:31:45 -0500 Subject: [PATCH 4/8] More cleanup Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --- pkg/report/strings.go | 55 +++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 34 deletions(-) diff --git a/pkg/report/strings.go b/pkg/report/strings.go index 75e298792..9755db478 100644 --- a/pkg/report/strings.go +++ b/pkg/report/strings.go @@ -30,15 +30,7 @@ func NewStringPool(length int) *StringPool { // Intern returns an interned version of the input string. func (sp *StringPool) Intern(s string) string { sp.RLock() - if interned, ok := sp.strings[s]; ok { - sp.RUnlock() - return interned - } - sp.RUnlock() - - sp.Lock() - defer sp.Unlock() - + defer sp.RUnlock() if interned, ok := sp.strings[s]; ok { return interned } @@ -66,7 +58,7 @@ func newMatchProcessor(fc []byte, matches []yarax.Match, mp []yarax.Pattern) *ma var matchResultPool = sync.Pool{ New: func() any { - s := make([]string, 0, 32) + s := make([]string, 0) return &s }, } @@ -80,7 +72,7 @@ func (mp *matchProcessor) clearFileContent() { // process performantly handles the conversion of matched data to strings. // yara-x does not expose the rendered string via the API due to performance overhead. func (mp *matchProcessor) process() []string { - if len(mp.matches) == 0 { + if len(mp.matches) == 0 || len(mp.fc) == 0 { return nil } @@ -89,24 +81,26 @@ func (mp *matchProcessor) process() []string { var result *[]string var ok bool - if result, ok = matchResultPool.Get().(*[]string); ok { - *result = (*result)[:0] - } else { - slice := make([]string, 0, 32) - result = &slice + if result, ok = matchResultPool.Get().(*[]string); !ok { + s := make([]string, 0, len(mp.matches)) + result = &s } - defer matchResultPool.Put(result) + + // Return early if neither the pool nor the make result in a usable slice + if result == nil { + return nil + } + + ret := (*result)[:0] + defer matchResultPool.Put(&ret) // #nosec G115 // ignore Type conversion which leads to integer overflow for _, match := range mp.matches { - l := int(match.Length()) - o := int(match.Offset()) - - // if the match processor's file content is empty, - // or the offset is less than zero, - // or the match length + offset exceeds the size of the file, - // then avoid any processing and continue - if len(mp.fc) == 0 || o < 0 || o+l > len(mp.fc) { + l := match.Length() + o := match.Offset() + + // avoid any processing if the match offset and match length exceed the size of the file + if o+l > uint64(len(mp.fc)) { continue } @@ -115,21 +109,14 @@ func (mp *matchProcessor) process() []string { switch !containsUnprintable(matchBytes) { case true: var matchStr string - if l <= cap(matchBytes) { + if l <= uint64(cap(matchBytes)) { matchStr = string(append([]byte(nil), matchBytes[:l]...)) } else { matchStr = string(matchBytes) } *result = append(*result, mp.pool.Intern(matchStr)) default: - patternsCap := len(mp.patterns) - var patterns []string - if len(patterns) == 0 || cap(patterns) < patternsCap { - patterns = make([]string, 0, patternsCap) - } else { - clear(patterns) - patterns = patterns[:0] - } + patterns := make([]string, 0, len(mp.patterns)) for _, p := range mp.patterns { patterns = append(patterns, mp.pool.Intern(p.Identifier())) } From 9cca8d80a24f386c9ed29ddedc8c401c6cf41a4f Mon Sep 17 00:00:00 2001 From: egibs <20933572+egibs@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:45:42 -0500 Subject: [PATCH 5/8] More cleaup and consolidation Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --- pkg/render/terminal.go | 3 +-- pkg/report/report.go | 32 +++++++++++++++----------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/pkg/render/terminal.go b/pkg/render/terminal.go index 5820edb16..7d4fd5b4b 100644 --- a/pkg/render/terminal.go +++ b/pkg/render/terminal.go @@ -188,8 +188,7 @@ func nsLongName(s string) string { // split rule into namespace + resource/technique. func splitRuleID(s string) (string, string) { parts := strings.Split(s, "/") - id := "" - rest := "" + var id, rest string if len(parts) > 0 { id = parts[0] if len(parts) >= 1 { diff --git a/pkg/report/report.go b/pkg/report/report.go index bd45b8691..5cdf28980 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -114,9 +114,11 @@ func thirdPartyKey(path string, rule string) string { // ELASTIC_Linux_Trojan_Gafgyt_E4A1982B words = append(words, strings.Split(strings.ToLower(rule), "_")...) - // strip off the last word if it's a hex key - lastWord := "" + var lastWord string + // creating a slice with subDir initially should usually ensure this is at least one, + // but the subDir assignment may not result in a non-empty string if len(words) > 0 { + // strip off the last word if it's a hex key lastWord = words[len(words)-1] _, err := strconv.ParseUint(lastWord, 16, 64) if err == nil { @@ -126,11 +128,8 @@ func thirdPartyKey(path string, rule string) string { keepWords := make([]string, 0, len(words)) for x, w := range words { - // ends with a date - if x == len(words)-1 && dateRe.MatchString(w) { - continue - } - if w == "" { + // ends with a date or empty + if (x == len(words)-1 && dateRe.MatchString(w)) || w == "" { continue } @@ -142,19 +141,18 @@ func thirdPartyKey(path string, rule string) string { keepWords = keepWords[0:4] } - src := "" + var src string + // the rule name is equivalent to the words we're keeping minus one to account for the source + ruleName := make([]string, 0, len(keepWords)-1) if len(keepWords) > 0 { - src = keepWords[0] - } - - // Fix name for https://github.com/Neo23x0/signature-base within YARAForge - if src == "signature" { - src = "sig_base" + // Fix name for https://github.com/Neo23x0/signature-base within YARAForge + src = strings.Replace(keepWords[0], "signature", "sig_base", 1) + if len(keepWords) > 1 { + ruleName = keepWords[1:] + } } - rulename := keepWords[1:] - key := fmt.Sprintf("3P/%s/%s", src, strings.Join(rulename, "_")) - return key + return fmt.Sprintf("3P/%s/%s", src, strings.Join(ruleName, "_")) } // thirdParty returns whether the rule is sourced from a 3rd party. From 14f32ab6b49f732335486535c68fe9bc7853f522 Mon Sep 17 00:00:00 2001 From: egibs <20933572+egibs@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:50:41 -0500 Subject: [PATCH 6/8] Small tweaks Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --- pkg/render/terminal.go | 2 +- pkg/report/strings.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/render/terminal.go b/pkg/render/terminal.go index 7d4fd5b4b..438088db4 100644 --- a/pkg/render/terminal.go +++ b/pkg/render/terminal.go @@ -191,7 +191,7 @@ func splitRuleID(s string) (string, string) { var id, rest string if len(parts) > 0 { id = parts[0] - if len(parts) >= 1 { + if len(parts) > 1 { rest = strings.Join(parts[1:], "/") } } diff --git a/pkg/report/strings.go b/pkg/report/strings.go index 9755db478..029e2b9f4 100644 --- a/pkg/report/strings.go +++ b/pkg/report/strings.go @@ -58,7 +58,7 @@ func newMatchProcessor(fc []byte, matches []yarax.Match, mp []yarax.Pattern) *ma var matchResultPool = sync.Pool{ New: func() any { - s := make([]string, 0) + s := make([]string, 0, 32) return &s }, } From a120a230ea1870c8a0668906ec7dc4952dbf7d4c Mon Sep 17 00:00:00 2001 From: egibs <20933572+egibs@users.noreply.github.com> Date: Mon, 28 Jul 2025 13:56:14 -0500 Subject: [PATCH 7/8] Update third-party src assignment Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --- pkg/report/report.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pkg/report/report.go b/pkg/report/report.go index 5cdf28980..5de926e7d 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -216,14 +216,16 @@ func behaviorRisk(ns string, rule string, tags []string) int { if thirdParty(ns) { risk = HIGH - src := strings.Split(ns, "/")[1] - - switch src { - case "JPCERT", "YARAForge", "bartblaze", "huntress", "elastic": - risk = CRITICAL - if strings.Contains(strings.ToLower(ns), "generic") || - strings.Contains(strings.ToLower(rule), "generic") { - risk = HIGH + src := strings.Split(ns, "/") + + if len(src) > 1 { + switch src[1] { + case "JPCERT", "YARAForge", "bartblaze", "huntress", "elastic": + risk = CRITICAL + if strings.Contains(strings.ToLower(ns), "generic") || + strings.Contains(strings.ToLower(rule), "generic") { + risk = HIGH + } } } From 8a436e29dd548f1bd87eebdefac1d802450d7016 Mon Sep 17 00:00:00 2001 From: egibs <20933572+egibs@users.noreply.github.com> Date: Mon, 28 Jul 2025 14:10:12 -0500 Subject: [PATCH 8/8] Clean up hex key conditional Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --- pkg/report/report.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/report/report.go b/pkg/report/report.go index 5de926e7d..f5c4765ff 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -120,8 +120,7 @@ func thirdPartyKey(path string, rule string) string { if len(words) > 0 { // strip off the last word if it's a hex key lastWord = words[len(words)-1] - _, err := strconv.ParseUint(lastWord, 16, 64) - if err == nil { + if _, err := strconv.ParseUint(lastWord, 16, 64); err == nil { words = words[0 : len(words)-1] } }