From 6f9ee14de689121c77b28a674842e1eb3674abd5 Mon Sep 17 00:00:00 2001 From: egibs <20933572+egibs@users.noreply.github.com> Date: Fri, 6 Feb 2026 13:22:13 -0600 Subject: [PATCH 1/2] fix: address more fuzzing errors; miscellaneous improvements Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --- pkg/action/diff.go | 24 ++++++------ pkg/action/diff_test.go | 4 +- pkg/archive/archive.go | 41 +++++++++++++------- pkg/archive/fuzz_test.go | 2 +- pkg/archive/symlink_test.go | 53 ++++++++++++++++++++++++++ pkg/programkind/fuzz_test.go | 5 +-- pkg/render/fuzz_test.go | 1 + pkg/render/json.go | 26 +------------ pkg/render/render.go | 40 ++++++++++++++++++++ pkg/render/yaml.go | 25 +------------ pkg/report/report.go | 72 ++++++++++++++++++++++++++++-------- 11 files changed, 195 insertions(+), 98 deletions(-) diff --git a/pkg/action/diff.go b/pkg/action/diff.go index b15d6b671..64156f54b 100644 --- a/pkg/action/diff.go +++ b/pkg/action/diff.go @@ -519,15 +519,6 @@ func formatReportKey(res ScanResult, fr *malcontent.FileReport, isReport bool) s return formatKey(res, CleanPath(fr.Path, res.tmpRoot)) } -func behaviorExists(b *malcontent.Behavior, behaviors []*malcontent.Behavior) bool { - for _, tb := range behaviors { - if b.ID == tb.ID { - return true - } - } - return false -} - // fileDiff handles files that exist in both source and destination. func fileDiff(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileReport, rpath, apath string, d *malcontent.DiffReport, src ScanResult, dest ScanResult, archiveOrImage, isReport, isMoved bool) { if ctx.Err() != nil { @@ -563,9 +554,18 @@ func fileDiff(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileR abs.PreviousPath = fr.Path } + srcBehaviorIDs := make(map[string]struct{}, len(fr.Behaviors)) + for _, b := range fr.Behaviors { + srcBehaviorIDs[b.ID] = struct{}{} + } + destBehaviorIDs := make(map[string]struct{}, len(tr.Behaviors)) + for _, b := range tr.Behaviors { + destBehaviorIDs[b.ID] = struct{}{} + } + // if destination behavior is not in the source for _, tb := range tr.Behaviors { - if !behaviorExists(tb, fr.Behaviors) { + if _, ok := srcBehaviorIDs[tb.ID]; !ok { tb.DiffAdded = true abs.Behaviors = append(abs.Behaviors, tb) } @@ -573,7 +573,7 @@ func fileDiff(ctx context.Context, c malcontent.Config, fr, tr *malcontent.FileR // if source behavior is not in the destination for _, fb := range fr.Behaviors { - if !behaviorExists(fb, tr.Behaviors) { + if _, ok := destBehaviorIDs[fb.ID]; !ok { fb.DiffRemoved = true abs.Behaviors = append(abs.Behaviors, fb) } @@ -758,8 +758,6 @@ func formatKey(res ScanResult, name string) string { switch { case res.imageURI != "": return fmt.Sprintf("%s ∴ %s", res.imageURI, name) - case res.tmpRoot != "": - return fmt.Sprintf("%s ∴ %s", res.tmpRoot, name) case res.base != "": return fmt.Sprintf("%s ∴ %s", res.base, name) default: diff --git a/pkg/action/diff_test.go b/pkg/action/diff_test.go index fc0c8dc63..257bb25b6 100644 --- a/pkg/action/diff_test.go +++ b/pkg/action/diff_test.go @@ -233,10 +233,10 @@ func TestFormatKey(t *testing.T) { want: "/bin/ls", }, { - name: "with tmpRoot prepends tmpRoot", + name: "with tmpRoot falls through to plain path", res: ScanResult{tmpRoot: "/tmp/extract", imageURI: ""}, file: "/tmp/extract/bin/ls", - want: "/tmp/extract ∴ /tmp/extract/bin/ls", + want: "/tmp/extract/bin/ls", }, { name: "with image URI prepends imageURI", diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 27765388b..4cc6e0e17 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -60,6 +60,31 @@ func evalSymlinks(path string) (string, bool) { return resolved, true } +// symlinkEscapesDir checks whether a symlink at target resolves outside dir. +func symlinkEscapesDir(target, dir string) bool { + fi, err := os.Lstat(target) + if err != nil || fi.Mode()&os.ModeSymlink == 0 { + return false + } + + evalTarget, err := filepath.EvalSymlinks(target) + if err != nil { + // Dangling symlinks (target doesn't exist) are not path traversals. + return !errors.Is(err, fs.ErrNotExist) + } + + evalDir, err := filepath.EvalSymlinks(dir) + if err != nil { + return true + } + + rel, err := filepath.Rel(evalDir, evalTarget) + if err != nil { + return false + } + return rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) +} + // isValidPath checks if the target file is within the given directory. func IsValidPath(target, dir string) bool { if strings.Contains(target, "\x00") || strings.Contains(dir, "\x00") { @@ -69,20 +94,8 @@ func IsValidPath(target, dir string) bool { cleanTarget := filepath.Clean(target) cleanDir := filepath.Clean(dir) - // avoid evaluating symlinks if the target is not a symlink - if fi, err := os.Lstat(cleanTarget); err == nil && fi.Mode()&os.ModeSymlink == os.ModeSymlink { - var evalTarget, evalDir, rel string - - if evalTarget, err = filepath.EvalSymlinks(cleanTarget); err != nil { - return false - } - if evalDir, err = filepath.EvalSymlinks(cleanDir); err != nil { - return false - } - if rel, err = filepath.Rel(evalDir, evalTarget); err == nil && - (rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator))) { - return false - } + if symlinkEscapesDir(cleanTarget, cleanDir) { + return false } switch { diff --git a/pkg/archive/fuzz_test.go b/pkg/archive/fuzz_test.go index f65f4ca5e..0a8ad2452 100644 --- a/pkg/archive/fuzz_test.go +++ b/pkg/archive/fuzz_test.go @@ -497,7 +497,7 @@ func FuzzExtractRPM(f *testing.F) { f.Add([]byte("not rpm")) // invalid f.Fuzz(func(t *testing.T, data []byte) { - if len(data) < 96 || !bytes.Equal(data[:4], rpmMagic) { + if len(data) < 96 || len(data) > 10*1024*1024 || !bytes.Equal(data[:4], rpmMagic) { return } diff --git a/pkg/archive/symlink_test.go b/pkg/archive/symlink_test.go index 7d272388b..07ca6aefc 100644 --- a/pkg/archive/symlink_test.go +++ b/pkg/archive/symlink_test.go @@ -4,6 +4,8 @@ package archive import ( + "archive/tar" + "bytes" "context" "os" "path/filepath" @@ -195,6 +197,57 @@ func TestExtractNestedArchiveCollision(t *testing.T) { } } +// TestDanglingSymlinkExtraction verifies that a tar containing a dangling symlink +// (target doesn't exist) extracts without error and all extracted paths pass IsValidPath. +func TestDanglingSymlinkExtraction(t *testing.T) { + t.Parallel() + + // Build a tar in memory with a dangling symlink (points to nonexistent file within dir) + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + if err := tw.WriteHeader(&tar.Header{ + Name: "link", + Typeflag: tar.TypeSymlink, + Linkname: "nonexistent", + }); err != nil { + t.Fatal(err) + } + tw.Close() + + tmpFile, err := os.CreateTemp("", "dangling-symlink-*.tar") + if err != nil { + t.Fatal(err) + } + defer os.Remove(tmpFile.Name()) + tmpFile.Write(buf.Bytes()) + tmpFile.Close() + + tmpDir, err := os.MkdirTemp("", "dangling-symlink-extract-*") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + + // Extraction should succeed + if err := ExtractTar(context.Background(), tmpDir, tmpFile.Name()); err != nil { + t.Fatalf("ExtractTar failed on dangling symlink: %v", err) + } + + // Every extracted path must pass IsValidPath (this is what the fuzzer checks) + err = filepath.WalkDir(tmpDir, func(path string, _ os.DirEntry, err error) error { + if err != nil { + return err + } + if !IsValidPath(path, tmpDir) { + t.Errorf("IsValidPath returned false for dangling symlink: %s", path) + } + return nil + }) + if err != nil { + t.Fatalf("WalkDir failed: %v", err) + } +} + func TestHandleSymlink(t *testing.T) { t.Parallel() tmpDir, err := os.MkdirTemp("", "symlink-test-*") diff --git a/pkg/programkind/fuzz_test.go b/pkg/programkind/fuzz_test.go index e92a4e602..01c5dce0b 100644 --- a/pkg/programkind/fuzz_test.go +++ b/pkg/programkind/fuzz_test.go @@ -15,8 +15,7 @@ import ( // FuzzFile tests file type detection with random inputs. func FuzzFile(f *testing.F) { - // Limit seed file size to avoid excessive memory usage during fuzzing. - const maxSeedSize int64 = 50 * 1024 * 1024 // 50MB + const maxSeedSize int64 = 10 * 1024 * 1024 // 10MB samplesDir := "../../out/chainguard-sandbox/malcontent-samples" err := filepath.WalkDir(samplesDir, func(path string, d os.DirEntry, _ error) error { @@ -64,7 +63,7 @@ func FuzzFile(f *testing.F) { f.Add([]byte("UPX!"), "test.upx") // UPX magic f.Fuzz(func(t *testing.T, data []byte, filename string) { - if len(data) > 50*1024*1024 { + if len(data) > 10*1024*1024 { return } if len(filename) > 255 || filepath.Clean(filename) != filename { diff --git a/pkg/render/fuzz_test.go b/pkg/render/fuzz_test.go index cb3877801..07de257d8 100644 --- a/pkg/render/fuzz_test.go +++ b/pkg/render/fuzz_test.go @@ -47,6 +47,7 @@ func FuzzRenderDifferential(f *testing.F) { } f.Fuzz(func(t *testing.T, riskLevel int8, filePath, behaviorName, behaviorDesc string, hasDiff bool) { + filePath = sanitizeUTF8(filePath) if filePath == "" || yamlIgnore[filePath] { return } diff --git a/pkg/render/json.go b/pkg/render/json.go index f5f4bb114..ad0bad4bc 100644 --- a/pkg/render/json.go +++ b/pkg/render/json.go @@ -48,31 +48,7 @@ func (r JSON) Full(ctx context.Context, c *malcontent.Config, rep *malcontent.Re if ctx.Err() != nil { return false } - - if key == nil || value == nil { - return true - } - if path, ok := key.(string); ok { - if r, ok := value.(*malcontent.FileReport); ok { - if r.Skipped == "" { - r.ArchiveRoot = "" - r.FullPath = "" - - cleanPath := sanitizeUTF8(path) - - r.Path = sanitizeUTF8(r.Path) - - for _, b := range r.Behaviors { - if b != nil { - b.ID = sanitizeUTF8(b.ID) - b.Description = sanitizeUTF8(b.Description) - } - } - - jr.Files[cleanPath] = r - } - } - } + sanitizeFileReport(key, value, jr.Files) return true }) diff --git a/pkg/render/render.go b/pkg/render/render.go index 16ee549f7..61964031e 100644 --- a/pkg/render/render.go +++ b/pkg/render/render.go @@ -38,6 +38,19 @@ func sanitizeUTF8(s string) string { if !utf8.ValidString(s) { s = strings.ToValidUTF8(s, string(utf8.RuneError)) } + // Strip BiDi override characters that can confuse visual display + s = strings.Map(func(r rune) rune { + switch { + case r >= 0x202A && r <= 0x202E: // LRE, RLE, PDF, LRO, RLO + return -1 + case r >= 0x2066 && r <= 0x2069: // LRI, RLI, FSI, PDI + return -1 + case r == 0x200E || r == 0x200F: // LRM, RLM + return -1 + default: + return r + } + }, s) // Replace newlines and carriage returns with spaces to avoid YAML complex key issues s = strings.ReplaceAll(s, "\n", " ") s = strings.ReplaceAll(s, "\r", " ") @@ -70,6 +83,33 @@ func New(kind string, w io.Writer) (malcontent.Renderer, error) { } } +// sanitizeFileReport sanitizes a sync.Map entry and stores it in files. +// Returns false only if iteration should stop (never in this implementation). +func sanitizeFileReport(key, value any, files map[string]*malcontent.FileReport) { + path, ok := key.(string) + if !ok { + return + } + + r, ok := value.(*malcontent.FileReport) + if !ok || r.Skipped != "" { + return + } + + r.ArchiveRoot = "" + r.FullPath = "" + r.Path = sanitizeUTF8(r.Path) + + for _, b := range r.Behaviors { + if b != nil { + b.ID = sanitizeUTF8(b.ID) + b.Description = sanitizeUTF8(b.Description) + } + } + + files[sanitizeUTF8(path)] = r +} + func riskEmoji(score int) string { symbol := "🔵" switch score { diff --git a/pkg/render/yaml.go b/pkg/render/yaml.go index 2cbd5b8b2..a62f5da8a 100644 --- a/pkg/render/yaml.go +++ b/pkg/render/yaml.go @@ -49,30 +49,7 @@ func (r YAML) Full(ctx context.Context, c *malcontent.Config, rep *malcontent.Re if ctx.Err() != nil { return false } - if key == nil || value == nil { - return true - } - if path, ok := key.(string); ok { - if r, ok := value.(*malcontent.FileReport); ok { - if r.Skipped == "" { - r.ArchiveRoot = "" - r.FullPath = "" - - cleanPath := sanitizeUTF8(path) - - r.Path = sanitizeUTF8(r.Path) - - for _, b := range r.Behaviors { - if b != nil { - b.ID = sanitizeUTF8(b.ID) - b.Description = sanitizeUTF8(b.Description) - } - } - - yr.Files[cleanPath] = r - } - } - } + sanitizeFileReport(key, value, yr.Files) return true }) diff --git a/pkg/report/report.go b/pkg/report/report.go index 0f47a5487..795b8a748 100644 --- a/pkg/report/report.go +++ b/pkg/report/report.go @@ -6,6 +6,7 @@ package report import ( "context" "fmt" + "index/suffixarray" "maps" "net/url" "path/filepath" @@ -281,38 +282,74 @@ func longestUnique(raw []string) []string { } keys := slices.Sorted(maps.Keys(unique)) - slices.SortFunc(keys, func(a, b string) int { - diff := len(b) - len(a) - switch { - case diff != 0: - return diff - case a < b: + + // Find a byte value not present in any string to use as separator + sep := findSeparator(keys) + + totalLen := 0 + for _, s := range keys { + totalLen += len(s) + 1 + } + combined := make([]byte, 0, totalLen) + offsets := make([]int, len(keys)) + for i, s := range keys { + offsets[i] = len(combined) + combined = append(combined, s...) + combined = append(combined, sep) + } + + sa := suffixarray.New(combined) + + parentOf := func(pos int) int { + idx := sort.Search(len(offsets), func(i int) bool { + return offsets[i] > pos + }) - 1 + if idx < 0 || pos >= offsets[idx]+len(keys[idx]) { return -1 - case a > b: - return 1 - default: - return 0 } - }) + return idx + } longest := make([]string, 0, len(keys)) - - for _, s := range keys { + for i, k := range keys { + positions := sa.Lookup([]byte(k), -1) contained := false - for _, o := range longest { - if strings.Contains(o, s) { + for _, pos := range positions { + if p := parentOf(pos); p >= 0 && p != i { contained = true break } } if !contained { - longest = append(longest, s) + longest = append(longest, k) } } + slices.SortFunc(longest, func(a, b string) int { + if diff := len(b) - len(a); diff != 0 { + return diff + } + return strings.Compare(a, b) + }) return longest } +// findSeparator returns a byte value not present in any of the input strings. +func findSeparator(strs []string) byte { + var used [256]bool + for _, s := range strs { + for _, b := range []byte(s) { + used[b] = true + } + } + for b := range used { + if !used[b] { + return byte(b) + } + } + return 0 +} + func matchToString(ruleName string, m string) string { if containsUnprintable([]byte(m)) { return ruleName @@ -740,6 +777,9 @@ func parseMetadata(m *yarax.Rule, b *malcontent.Behavior, fr *malcontent.FileRep _, exists := mrsMap[k] switch { case exists && override: + if thirdParty(m.Namespace()) { + continue + } var overrideSev int if sev, ok := Levels[v]; ok { overrideSev = sev From 6d1ec63dc3892b7357b047afae5d5f2bb812a800 Mon Sep 17 00:00:00 2001 From: egibs <20933572+egibs@users.noreply.github.com> Date: Fri, 6 Feb 2026 13:51:27 -0600 Subject: [PATCH 2/2] limit all fuzzing sizes to 10MB Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --- pkg/archive/fuzz_test.go | 33 ++++++++++++++++++++++++++++++++- pkg/compile/fuzz_test.go | 9 ++++++++- pkg/programkind/fuzz_test.go | 8 ++++++-- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/pkg/archive/fuzz_test.go b/pkg/archive/fuzz_test.go index 0a8ad2452..7ee4b9889 100644 --- a/pkg/archive/fuzz_test.go +++ b/pkg/archive/fuzz_test.go @@ -17,6 +17,10 @@ import ( "github.com/chainguard-dev/malcontent/pkg/programkind" ) +// maxFuzzSize is the maximum input size for fuzz tests to stay well under +// Go's 100MB fuzzer shared memory capacity and avoid OOM in parsers. +const maxFuzzSize = 10 * 1024 * 1024 + // readTestFile reads a file using file.GetContents for consistency with production code. func readTestFile(path string) ([]byte, error) { f, err := os.Open(path) @@ -52,6 +56,9 @@ func FuzzExtractTar(f *testing.F) { f.Add([]byte{0x1f, 0x8b, 0x08, 0x00}) // gzip magic bytes only f.Fuzz(func(t *testing.T, data []byte) { + if len(data) > maxFuzzSize { + return + } tmpFile, err := os.CreateTemp("", "fuzz-tar-*.tar.gz") if err != nil { t.Skip("failed to create temp file") @@ -107,6 +114,9 @@ func FuzzExtractZip(f *testing.F) { f.Add([]byte{0x50, 0x4b, 0x03, 0x04}) // full zip signature f.Fuzz(func(t *testing.T, data []byte) { + if len(data) > maxFuzzSize { + return + } tmpFile, err := os.CreateTemp("", "fuzz-zip-*.zip") if err != nil { t.Skip("failed to create temp file") @@ -196,6 +206,9 @@ func FuzzExtractArchive(f *testing.F) { f.Add([]byte{0x1f, 0x8b, 0x08, 0x00}, ".gz") // gzip header f.Fuzz(func(t *testing.T, data []byte, ext string) { + if len(data) > maxFuzzSize { + return + } if _, ok := programkind.ArchiveMap[ext]; !ok { return } @@ -294,6 +307,9 @@ func FuzzExtractGzip(f *testing.F) { f.Add(make([]byte, 1024*1024)) // large zeros (compression bomb test) f.Fuzz(func(t *testing.T, data []byte) { + if len(data) > maxFuzzSize { + return + } tmpFile, err := os.CreateTemp("", "fuzz-gz-*.gz") if err != nil { t.Skip() @@ -338,6 +354,9 @@ func FuzzExtractBz2(f *testing.F) { f.Add(make([]byte, 1024*1024)) // large zeros f.Fuzz(func(t *testing.T, data []byte) { + if len(data) > maxFuzzSize { + return + } tmpFile, err := os.CreateTemp("", "fuzz-bz2-*.bz2") if err != nil { t.Skip() @@ -390,6 +409,9 @@ func FuzzExtractZstd(f *testing.F) { f.Add(make([]byte, 1024*1024)) // large zeros f.Fuzz(func(t *testing.T, data []byte) { + if len(data) > maxFuzzSize { + return + } tmpFile, err := os.CreateTemp("", "fuzz-zst-*.zst") if err != nil { t.Skip() @@ -444,6 +466,9 @@ func FuzzExtractZlib(f *testing.F) { f.Add(make([]byte, 1024*1024)) // large zeros f.Fuzz(func(t *testing.T, data []byte) { + if len(data) > maxFuzzSize { + return + } tmpFile, err := os.CreateTemp("", "fuzz-zlib-*.zlib") if err != nil { t.Skip() @@ -497,7 +522,7 @@ func FuzzExtractRPM(f *testing.F) { f.Add([]byte("not rpm")) // invalid f.Fuzz(func(t *testing.T, data []byte) { - if len(data) < 96 || len(data) > 10*1024*1024 || !bytes.Equal(data[:4], rpmMagic) { + if len(data) < 96 || len(data) > maxFuzzSize || !bytes.Equal(data[:4], rpmMagic) { return } @@ -552,6 +577,9 @@ func FuzzExtractDeb(f *testing.F) { f.Add([]byte("not deb")) // invalid f.Fuzz(func(t *testing.T, data []byte) { + if len(data) > maxFuzzSize { + return + } tmpFile, err := os.CreateTemp("", "fuzz-deb-*.deb") if err != nil { t.Skip() @@ -593,6 +621,9 @@ func FuzzExtractUPX(f *testing.F) { f.Add([]byte("not upx")) // invalid f.Fuzz(func(t *testing.T, data []byte) { + if len(data) > maxFuzzSize { + return + } tmpFile, err := os.CreateTemp("", "fuzz-upx-*") if err != nil { t.Skip() diff --git a/pkg/compile/fuzz_test.go b/pkg/compile/fuzz_test.go index fb1992692..8beababdb 100644 --- a/pkg/compile/fuzz_test.go +++ b/pkg/compile/fuzz_test.go @@ -14,6 +14,10 @@ import ( "time" ) +// maxFuzzSize is the maximum input size for fuzz tests to stay well under +// Go's 100MB fuzzer shared memory capacity and avoid OOM in parsers. +const maxFuzzSize = 10 * 1024 * 1024 + // FuzzRemoveRules tests the removeRules function with random inputs. func FuzzRemoveRules(f *testing.F) { for _, root := range getAllRuleFS() { @@ -75,6 +79,9 @@ rule keep_me { condition: true } `), "remove_me_1,remove_me_2") f.Fuzz(func(t *testing.T, data []byte, rulesToRemove string) { + if len(data) > maxFuzzSize { + return + } var rules []string if rulesToRemove != "" { rules = strings.Split(rulesToRemove, ",") @@ -138,7 +145,7 @@ func FuzzRecursiveCompile(f *testing.F) { floatPattern := regexp.MustCompile(`\d+\.\d+`) f.Fuzz(func(_ *testing.T, data []byte) { - if len(data) > 50*1024*1024 { + if len(data) > maxFuzzSize { return } diff --git a/pkg/programkind/fuzz_test.go b/pkg/programkind/fuzz_test.go index 01c5dce0b..de5770a45 100644 --- a/pkg/programkind/fuzz_test.go +++ b/pkg/programkind/fuzz_test.go @@ -13,9 +13,13 @@ import ( "github.com/chainguard-dev/malcontent/pkg/file" ) +// maxFuzzSize is the maximum input size for fuzz tests to stay well under +// Go's 100MB fuzzer shared memory capacity and avoid OOM in parsers. +const maxFuzzSize = 10 * 1024 * 1024 + // FuzzFile tests file type detection with random inputs. func FuzzFile(f *testing.F) { - const maxSeedSize int64 = 10 * 1024 * 1024 // 10MB + const maxSeedSize int64 = maxFuzzSize samplesDir := "../../out/chainguard-sandbox/malcontent-samples" err := filepath.WalkDir(samplesDir, func(path string, d os.DirEntry, _ error) error { @@ -63,7 +67,7 @@ func FuzzFile(f *testing.F) { f.Add([]byte("UPX!"), "test.upx") // UPX magic f.Fuzz(func(t *testing.T, data []byte, filename string) { - if len(data) > 10*1024*1024 { + if len(data) > maxFuzzSize { return } if len(filename) > 255 || filepath.Clean(filename) != filename {