77 "context"
88 "errors"
99 "fmt"
10- "io"
1110 "io/fs"
1211 "log/slog"
1312 "os"
@@ -17,6 +16,7 @@ import (
1716 "strings"
1817 "sync"
1918 "sync/atomic"
19+ "syscall"
2020
2121 "github.com/chainguard-dev/clog"
2222 "github.com/chainguard-dev/malcontent/pkg/archive"
@@ -43,13 +43,59 @@ var (
4343 ErrMatchedCondition = errors .New ("matched exit criteria" )
4444 // initializeOnce ensures that the file and scanner pools are only initialized once.
4545 initializeOnce sync.Once
46- filePool * pool.BufferPool
4746 scannerPool * pool.ScannerPool
47+ maxMmapSize int64 = 1 << 31
4848)
4949
50+ // scanFD scans a file descriptor using memory mapping for efficient large file handling.
51+ // This avoids loading the entire file into memory while still using yara-x's byte slice scanning.
52+ func scanFD (scanner * yarax.Scanner , fd uintptr , logger * clog.Logger ) ([]byte , * yarax.ScanResults , error ) {
53+ var stat syscall.Stat_t
54+ if err := syscall .Fstat (int (fd ), & stat ); err != nil {
55+ return nil , nil , fmt .Errorf ("fstat failed: %w" , err )
56+ }
57+
58+ size := stat .Size
59+ if size == 0 {
60+ mrs , err := scanner .Scan ([]byte {})
61+ return nil , mrs , err
62+ }
63+
64+ if size < 0 {
65+ return nil , nil , fmt .Errorf ("invalid file size: %d" , size )
66+ }
67+
68+ if size > maxMmapSize {
69+ logger .Warn ("file exceeds mmap limit, scanning first portion only" ,
70+ "size" , size , "limit" , maxMmapSize )
71+ size = maxMmapSize
72+ }
73+
74+ data , err := syscall .Mmap (int (fd ), 0 , int (size ), syscall .PROT_READ , syscall .MAP_PRIVATE )
75+ if err != nil {
76+ return nil , nil , fmt .Errorf ("mmap failed: %w" , err )
77+ }
78+ defer func () {
79+ if unmapErr := syscall .Munmap (data ); unmapErr != nil {
80+ logger .Error ("failed to unmap memory" , "error" , unmapErr )
81+ }
82+ }()
83+
84+ mrs , err := scanner .Scan (data )
85+ if err != nil {
86+ return nil , nil , err
87+ }
88+
89+ // Create a copy of the data to return since the mmap will be unmapped
90+ // This is necessary because report generation needs access to file content
91+ // for checksum calculation and match string extraction
92+ fc := make ([]byte , len (data ))
93+ copy (fc , data )
94+
95+ return fc , mrs , err
96+ }
97+
5098// scanSinglePath YARA scans a single path and converts it to a fileReport.
51- //
52- //nolint:cyclop // ignore complexity of 38
5399func scanSinglePath (ctx context.Context , c malcontent.Config , path string , ruleFS []fs.FS , absPath string , archiveRoot string ) (* malcontent.FileReport , error ) {
54100 if ctx .Err () != nil {
55101 return & malcontent.FileReport {}, ctx .Err ()
@@ -60,7 +106,14 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
60106
61107 isArchive := archiveRoot != ""
62108
63- fi , err := os .Stat (path )
109+ f , err := os .Open (path )
110+ if err != nil {
111+ return nil , err
112+ }
113+ fd := f .Fd ()
114+ defer f .Close ()
115+
116+ fi , err := f .Stat ()
64117 if err != nil {
65118 return nil , err
66119 }
@@ -105,43 +158,13 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
105158 }
106159
107160 initializeOnce .Do (func () {
108- filePool = pool .NewBufferPool (c .Concurrency + 1 )
109- scannerPool = pool .NewScannerPool (yrs , c .Concurrency + 1 )
161+ scannerPool = pool .NewScannerPool (yrs , c .Concurrency )
110162 })
111163
112164 scanner := scannerPool .Get ()
113- if scanner == nil {
114- scanner = yarax .NewScanner (yrs )
115- }
116165 defer scannerPool .Put (scanner )
117166
118- f , err := os .Open (path )
119- if err != nil {
120- return nil , err
121- }
122- defer f .Close ()
123-
124- fc := filePool .Get (size )
125- defer filePool .Put (fc )
126-
127- var bytesRead int
128- var totalRead int64
129- for totalRead < size {
130- bytesRead , err = f .Read (fc [totalRead :])
131- if errors .Is (err , io .EOF ) {
132- break
133- }
134- if err != nil {
135- return nil , err
136- }
137- totalRead += int64 (bytesRead )
138- }
139-
140- if totalRead < size && err != nil {
141- return nil , fmt .Errorf ("incomplete read: got %d bytes, expected %d: %w" , totalRead , size , err )
142- }
143-
144- mrs , err := scanner .Scan (fc )
167+ fc , mrs , err := scanFD (scanner , fd , logger )
145168 if err != nil {
146169 logger .Debug ("skipping" , slog .Any ("error" , err ))
147170 return nil , err
@@ -164,6 +187,11 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
164187 return nil , NewFileReportError (err , path , TypeGenerateError )
165188 }
166189
190+ defer func () {
191+ fc = nil
192+ mrs = nil
193+ }()
194+
167195 // Clean up the path if scanning an archive
168196 var clean string
169197 if isArchive || c .OCI {
@@ -427,6 +455,12 @@ func processPaths(ctx context.Context, paths []string, scanInfo scanPathInfo, c
427455 }
428456 }()
429457
458+ // Zero-out the path strings and empty the slice once read into the path channel
459+ defer func () {
460+ clear (paths )
461+ paths = paths [:0 ]
462+ }()
463+
430464 for path := range pc {
431465 g .Go (func () error {
432466 if gCtx .Err () != nil {
0 commit comments