4848)
4949
5050// scanSinglePath YARA scans a single path and converts it to a fileReport.
51+ //
52+ //nolint:cyclop // ignore complexity of 38
5153func scanSinglePath (ctx context.Context , c malcontent.Config , path string , ruleFS []fs.FS , absPath string , archiveRoot string ) (* malcontent.FileReport , error ) {
5254 if ctx .Err () != nil {
5355 return & malcontent.FileReport {}, ctx .Err ()
@@ -56,30 +58,22 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
5658 logger := clog .FromContext (ctx )
5759 logger = logger .With ("path" , path )
5860
59- var yrs * yarax.Rules
60- var err error
61- if c .Rules == nil {
62- yrs , err = CachedRules (ctx , ruleFS )
63- if err != nil {
64- return nil , fmt .Errorf ("rules: %w" , err )
65- }
66- } else {
67- yrs = c .Rules
68- }
61+ isArchive := archiveRoot != ""
6962
70- initializeOnce . Do ( func () {
71- filePool = pool . NewBufferPool ()
72- scannerPool = pool . NewScannerPool ( yrs , c . Concurrency )
73- })
63+ fi , err := os . Stat ( path )
64+ if err != nil {
65+ return nil , err
66+ }
7467
75- scanner := scannerPool .Get ()
76- // Scanner should not be nil here, but guard against it anyway
77- if scanner == nil {
78- scanner = yarax .NewScanner (yrs )
68+ size := fi .Size ()
69+ if size == 0 {
70+ fr := & malcontent.FileReport {Skipped : "zero-sized file" , Path : path }
71+ if isArchive {
72+ defer os .RemoveAll (path )
73+ }
74+ return fr , nil
7975 }
80- defer scannerPool .Put (scanner )
8176
82- isArchive := archiveRoot != ""
8377 mime := "<unknown>"
8478 kind , err := programkind .File (path )
8579 if err != nil && ! interactive (c ) {
@@ -88,6 +82,7 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
8882 if kind != nil {
8983 mime = kind .MIME
9084 }
85+
9186 if ! c .IncludeDataFiles && kind == nil {
9287 logger .Debugf ("skipping %s [%s]: data file or empty" , path , mime )
9388 fr := & malcontent.FileReport {Skipped : "data file or empty" , Path : path }
@@ -99,33 +94,51 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
9994 }
10095 logger = logger .With ("mime" , mime )
10196
102- f , err := os .Open (path )
103- if err != nil {
104- return nil , err
97+ var yrs * yarax.Rules
98+ if c .Rules == nil {
99+ yrs , err = CachedRules (ctx , ruleFS )
100+ if err != nil {
101+ return nil , fmt .Errorf ("rules: %w" , err )
102+ }
103+ } else {
104+ yrs = c .Rules
105105 }
106- defer f .Close ()
107106
108- fi , err := f .Stat ()
109- if err != nil {
110- return nil , err
107+ initializeOnce .Do (func () {
108+ filePool = pool .NewBufferPool (c .Concurrency + 1 )
109+ scannerPool = pool .NewScannerPool (yrs , c .Concurrency + 1 )
110+ })
111+
112+ scanner := scannerPool .Get ()
113+ if scanner == nil {
114+ scanner = yarax .NewScanner (yrs )
111115 }
112- size := fi . Size ( )
116+ defer scannerPool . Put ( scanner )
113117
114- if size == 0 {
115- fr := & malcontent.FileReport {Skipped : "zero-sized file" , Path : path }
116- defer os .RemoveAll (path )
117- return fr , nil
118+ f , err := os .Open (path )
119+ if err != nil {
120+ return nil , err
118121 }
122+ defer f .Close ()
119123
120124 fc := filePool .Get (size )
121125 defer filePool .Put (fc )
122- if _ , err := io .ReadFull (f , fc ); err != nil {
123- return nil , err
126+
127+ var bytesRead int
128+ var totalRead int64
129+ for totalRead < size {
130+ bytesRead , err = f .Read (fc [totalRead :])
131+ if errors .Is (err , io .EOF ) {
132+ break
133+ }
134+ if err != nil {
135+ return nil , err
136+ }
137+ totalRead += int64 (bytesRead )
124138 }
125139
126- // Immediately remove archive files read into memory
127- if isArchive {
128- defer os .RemoveAll (path )
140+ if totalRead < size && err != nil {
141+ return nil , fmt .Errorf ("incomplete read: got %d bytes, expected %d: %w" , totalRead , size , err )
129142 }
130143
131144 mrs , err := scanner .Scan (fc )
@@ -137,9 +150,12 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
137150 // If running a scan, only generate reports for mrs that satisfy the risk threshold of 3
138151 // This is a short-circuit that avoids any report generation logic
139152 risk := report .HighestMatchRisk (mrs )
140- if c .Scan && risk < 3 {
153+ threshold := max (3 , c .MinFileRisk , c .MinRisk )
154+ if c .Scan && risk < threshold {
141155 fr := & malcontent.FileReport {Skipped : "overall risk too low for scan" , Path : path }
142- os .RemoveAll (path )
156+ if isArchive {
157+ os .RemoveAll (path )
158+ }
143159 return fr , nil
144160 }
145161
@@ -166,15 +182,13 @@ func scanSinglePath(ctx context.Context, c malcontent.Config, path string, ruleF
166182 fr .ArchiveRoot = archiveRootAbs
167183 fr .FullPath = pathAbs
168184 clean = formatPath (cleanPath (pathAbs , archiveRootAbs ))
169- }
170185
171- // If absPath is provided, use it instead of the path if they are different.
172- // This is useful when scanning images and archives.
173- if absPath != "" && absPath != path && ( isArchive || c . OCI ) {
174- if len ( c . TrimPrefixes ) > 0 {
175- absPath = report . TrimPrefixes ( absPath , c . TrimPrefixes )
186+ if absPath != "" && absPath != path && ( isArchive || c . OCI ) {
187+ if len ( c . TrimPrefixes ) > 0 {
188+ absPath = report . TrimPrefixes ( absPath , c . TrimPrefixes )
189+ }
190+ fr . Path = fmt . Sprintf ( "%s ∴ %s" , absPath , clean )
176191 }
177- fr .Path = fmt .Sprintf ("%s ∴ %s" , absPath , clean )
178192 }
179193
180194 if len (fr .Behaviors ) == 0 {
@@ -396,17 +410,17 @@ func processPaths(ctx context.Context, paths []string, scanInfo scanPathInfo, c
396410 cancel ()
397411 }()
398412
399- g := setupErrorGroup (maxConcurrency )
413+ g , gCtx := errgroup .WithContext (scanCtx )
414+ g .SetLimit (maxConcurrency )
400415
401- setupMatchHandler (scanCtx , matchChan , c , cancel , logger )
416+ setupMatchHandler (gCtx , matchChan , c , cancel , logger )
402417
403418 pc := make (chan string , len (paths ))
404419 go func () {
405420 defer close (pc )
406-
407421 for _ , path := range paths {
408422 select {
409- case <- scanCtx .Done ():
423+ case <- gCtx .Done ():
410424 return
411425 case pc <- path :
412426 }
@@ -415,10 +429,10 @@ func processPaths(ctx context.Context, paths []string, scanInfo scanPathInfo, c
415429
416430 for path := range pc {
417431 g .Go (func () error {
418- if scanCtx .Err () != nil {
432+ if gCtx .Err () != nil {
419433 return scanCtx .Err ()
420434 }
421- return processPath (scanCtx , path , scanInfo , c , r , matchChan , matchOnce , logger )
435+ return processPath (gCtx , path , scanInfo , c , r , matchChan , matchOnce , logger )
422436 })
423437 }
424438
@@ -447,21 +461,6 @@ func getMaxConcurrency(configured int) int {
447461 return configured
448462}
449463
450- func createPathChannel (paths []string ) chan string {
451- pc := make (chan string , len (paths ))
452- for _ , path := range paths {
453- pc <- path
454- }
455- close (pc )
456- return pc
457- }
458-
459- func setupErrorGroup (maxConcurrency int ) * errgroup.Group {
460- g := & errgroup.Group {}
461- g .SetLimit (maxConcurrency )
462- return g
463- }
464-
465464func setupMatchHandler (ctx context.Context , matchChan chan matchResult , c malcontent.Config , cancel context.CancelFunc , logger * clog.Logger ) {
466465 if ctx .Err () != nil {
467466 return
@@ -634,13 +633,12 @@ func processArchive(ctx context.Context, c malcontent.Config, rfs []fs.FS, archi
634633 return nil , fmt .Errorf ("extract to temp: %w" , err )
635634 }
636635 // Ensure that tmpRoot is removed before returning if created successfully
637- if tmpRoot != "" {
638- defer func () {
639- if err := os .RemoveAll (tmpRoot ); err != nil {
640- logger .Errorf ("remove %s: %v" , tmpRoot , err )
641- }
642- }()
643- }
636+ defer func () {
637+ if err := os .RemoveAll (tmpRoot ); err != nil {
638+ logger .Errorf ("remove %s: %v" , tmpRoot , err )
639+ }
640+ }()
641+
644642 // macOS will prefix temporary directories with `/private`
645643 // update tmpRoot with this prefix to allow strings.TrimPrefix to work
646644 if runtime .GOOS == "darwin" {
@@ -652,16 +650,28 @@ func processArchive(ctx context.Context, c malcontent.Config, rfs []fs.FS, archi
652650 return nil , fmt .Errorf ("find: %w" , err )
653651 }
654652
653+ ep := make (chan string , len (extractedPaths ))
654+ go func () {
655+ defer close (ep )
656+ for _ , path := range extractedPaths {
657+ select {
658+ case <- ctx .Done ():
659+ return
660+ case ep <- path :
661+ }
662+ }
663+ }()
664+
655665 maxConcurrency := getMaxConcurrency (c .Concurrency )
656666 scanCtx , cancel := context .WithCancel (ctx )
657667 defer cancel ()
658668
659- g := setupErrorGroup (maxConcurrency )
669+ g , gCtx := errgroup .WithContext (scanCtx )
670+ g .SetLimit (maxConcurrency )
660671
661- ep := createPathChannel (extractedPaths )
662672 for path := range ep {
663673 g .Go (func () error {
664- fr , err := processFile (scanCtx , c , rfs , path , archivePath , tmpRoot , logger )
674+ fr , err := processFile (gCtx , c , rfs , path , archivePath , tmpRoot , logger )
665675 if err != nil {
666676 return err
667677 }
0 commit comments