Skip to content

Commit b767561

Browse files
committed
Avoid failing scans outright when encountering extraction failures
Signed-off-by: egibs <20933572+egibs@users.noreply.github.com>
1 parent 5a4eed8 commit b767561

5 files changed

Lines changed: 98 additions & 1 deletion

File tree

cmd/mal/mal.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ var (
5050
allFlag bool
5151
concurrencyFlag int
5252
diffImageFlag bool
53+
exitExtractionFlag bool
5354
exitFirstHitFlag bool
5455
exitFirstMissFlag bool
5556
fileRiskChangeFlag bool
@@ -262,6 +263,7 @@ func main() {
262263

263264
mc = malcontent.Config{
264265
Concurrency: concurrency,
266+
ExitExtraction: exitExtractionFlag,
265267
ExitFirstHit: exitFirstHitFlag,
266268
ExitFirstMiss: exitFirstMissFlag,
267269
IgnoreSelf: ignoreSelfFlag,
@@ -287,6 +289,12 @@ func main() {
287289
Usage: "Ignore nothing within a provided scan path",
288290
Destination: &allFlag,
289291
},
292+
&cli.BoolFlag{
293+
Name: "exit-extraction",
294+
Value: true,
295+
Usage: "Exit when encountering file extraction errors",
296+
Destination: &exitExtractionFlag,
297+
},
290298
&cli.BoolFlag{
291299
Name: "exit-first-miss",
292300
Value: false,

pkg/action/archive_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,89 @@ func TestScanArchive(t *testing.T) {
260260
}
261261
}
262262

263+
func extractError(e error) error {
264+
if strings.Contains(e.Error(), "not a valid gzip archive") {
265+
return nil
266+
}
267+
return e
268+
}
269+
270+
func TestScanInvalidArchive(t *testing.T) {
271+
t.Parallel()
272+
ctx := slogtest.Context(t)
273+
clog.FromContext(ctx).With("test", "scan_archive")
274+
275+
var out bytes.Buffer
276+
r, err := render.New("json", &out)
277+
if err != nil {
278+
t.Fatalf("render: %v", err)
279+
}
280+
281+
rfs := []fs.FS{rules.FS, thirdparty.FS}
282+
yrs, err := CachedRules(ctx, rfs)
283+
if err != nil {
284+
t.Fatalf("rules: %v", err)
285+
}
286+
287+
mc := malcontent.Config{
288+
Concurrency: runtime.NumCPU(),
289+
ExitExtraction: true,
290+
IgnoreSelf: false,
291+
MinFileRisk: 0,
292+
MinRisk: 0,
293+
Renderer: r,
294+
Rules: yrs,
295+
ScanPaths: []string{"testdata/joblib_0.9.4.dev0_compressed_cache_size_pickle_py35_np19.gz"},
296+
}
297+
_, err = Scan(ctx, mc)
298+
err = extractError(err)
299+
if err != nil {
300+
t.Fatal(err)
301+
}
302+
}
303+
304+
func TestScanInvalidArchiveIgnore(t *testing.T) {
305+
t.Parallel()
306+
ctx := slogtest.Context(t)
307+
clog.FromContext(ctx).With("test", "scan_archive")
308+
309+
var out bytes.Buffer
310+
r, err := render.New("json", &out)
311+
if err != nil {
312+
t.Fatalf("render: %v", err)
313+
}
314+
315+
rfs := []fs.FS{rules.FS, thirdparty.FS}
316+
yrs, err := CachedRules(ctx, rfs)
317+
if err != nil {
318+
t.Fatalf("rules: %v", err)
319+
}
320+
321+
mc := malcontent.Config{
322+
Concurrency: runtime.NumCPU(),
323+
ExitExtraction: false,
324+
IgnoreSelf: false,
325+
MinFileRisk: 0,
326+
MinRisk: 0,
327+
Renderer: r,
328+
Rules: yrs,
329+
ScanPaths: []string{"testdata/joblib_0.9.4.dev0_compressed_cache_size_pickle_py35_np19.gz"},
330+
}
331+
res, err := Scan(ctx, mc)
332+
if err != nil {
333+
t.Fatal(err)
334+
}
335+
if err := r.Full(ctx, nil, res); err != nil {
336+
t.Fatalf("full: %v", err)
337+
}
338+
339+
got := out.String()
340+
want := "{}\n"
341+
if diff := cmp.Diff(want, got); diff != "" {
342+
t.Errorf("output mismatch: (-want +got):\n%s", diff)
343+
}
344+
}
345+
263346
func TestGetExt(t *testing.T) {
264347
tests := []struct {
265348
path string

pkg/action/scan.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -630,7 +630,12 @@ func processArchive(ctx context.Context, c malcontent.Config, rfs []fs.FS, archi
630630

631631
tmpRoot, err := archive.ExtractArchiveToTempDir(ctx, archivePath)
632632
if err != nil {
633-
return nil, fmt.Errorf("extract to temp: %w", err)
633+
// Avoid failing an entire scan when encountering problematic archives
634+
// e.g., joblib_0.8.4_compressed_pickle_py27_np17.gz: not a valid gzip archive
635+
if c.ExitExtraction {
636+
return nil, fmt.Errorf("extract to temp: %w", err)
637+
}
638+
return nil, nil
634639
}
635640
// Ensure that tmpRoot is removed before returning if created successfully
636641
defer func() {
Binary file not shown.

pkg/malcontent/malcontent.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ type Renderer interface {
2323

2424
type Config struct {
2525
Concurrency int
26+
ExitExtraction bool
2627
ExitFirstHit bool
2728
ExitFirstMiss bool
2829
FileRiskChange bool

0 commit comments

Comments
 (0)