Skip to content

Commit 1971699

Browse files
authored
Fix partial read edge cases (#969)
* Fix partial read edge cases Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> * Preserve directory structure when extracting nested archives Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> * Avoid extracting archives that will collide with existing files Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> * Remove archive files we aren't going to extract Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> * Append the timestamp to duplicate macOS files instead of _file Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> * Modify overlapping directory names to extract everything Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> * Use archivePath in logs Signed-off-by: egibs <20933572+egibs@users.noreply.github.com> --------- Signed-off-by: egibs <20933572+egibs@users.noreply.github.com>
1 parent f3c4fcb commit 1971699

13 files changed

Lines changed: 187 additions & 88 deletions

File tree

pkg/action/archive_test.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,55 @@ func TestScanInvalidArchiveIgnore(t *testing.T) {
348348
}
349349
}
350350

351+
func TestScanConflictingArchiveFiles(t *testing.T) {
352+
t.Parallel()
353+
ctx := context.Background()
354+
clog.FromContext(ctx).With("test", "scan_conflicting_archive_files")
355+
356+
var out bytes.Buffer
357+
r, err := render.New("json", &out)
358+
if err != nil {
359+
t.Fatalf("render: %v", err)
360+
}
361+
362+
rfs := []fs.FS{rules.FS, thirdparty.FS}
363+
yrs, err := CachedRules(ctx, rfs)
364+
if err != nil {
365+
t.Fatalf("rules: %v", err)
366+
}
367+
368+
mc := malcontent.Config{
369+
Concurrency: runtime.NumCPU(),
370+
ExitExtraction: false,
371+
IgnoreSelf: false,
372+
MinFileRisk: 0,
373+
MinRisk: 0,
374+
Renderer: r,
375+
Rules: yrs,
376+
ScanPaths: []string{
377+
"testdata/conflict.zip",
378+
},
379+
}
380+
res, err := Scan(ctx, mc)
381+
if err != nil {
382+
t.Fatal(err)
383+
}
384+
if err := r.Full(ctx, nil, res); err != nil {
385+
t.Fatalf("full: %v", err)
386+
}
387+
388+
got := out.String()
389+
td, err := os.ReadFile("testdata/scan_conflict")
390+
if err != nil {
391+
t.Fatalf("testdata read failed: %v", err)
392+
}
393+
want := string(td)
394+
395+
if diff := cmp.Diff(want, got); diff != "" {
396+
t.Errorf("output mismatch: (-want +got):\n%s", diff)
397+
}
398+
}
399+
351400
func TestGetExt(t *testing.T) {
352401
tests := []struct {
353402
path string

pkg/action/testdata/conflict.zip

607 Bytes
Binary file not shown.

pkg/action/testdata/scan_archive

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"Files": {
3-
"/apko_0.13.2_linux_arm64/apko": {
4-
"Path": "testdata/apko_nested.tar.gz ∴ /apko_0.13.2_linux_arm64/apko",
3+
"/apko_0.13.2_linux_arm64/apko_0.13.2_linux_arm64/apko": {
4+
"Path": "testdata/apko_nested.tar.gz ∴ /apko_0.13.2_linux_arm64/apko_0.13.2_linux_arm64/apko",
55
"SHA256": "ad237dc65d25cfe673b4891e189e9ff1fd041ec817133ac6c565120a6a189189",
66
"Size": 26400952,
77
"Syscalls": [

pkg/action/testdata/scan_conflict

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

pkg/archive/archive.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"runtime"
1111
"strings"
1212
"sync"
13+
"time"
1314

1415
"github.com/chainguard-dev/clog"
1516
"github.com/chainguard-dev/malcontent/pkg/pool"
@@ -32,7 +33,7 @@ func IsValidPath(target, dir string) bool {
3233
return strings.HasPrefix(filepath.Clean(target), filepath.Clean(dir))
3334
}
3435

35-
func extractNestedArchive(ctx context.Context, d string, f string, extracted *sync.Map) error {
36+
func extractNestedArchive(ctx context.Context, d string, f string, extracted *sync.Map, logger *clog.Logger) error {
3637
if ctx.Err() != nil {
3738
return ctx.Err()
3839
}
@@ -87,14 +88,28 @@ func extractNestedArchive(ctx context.Context, d string, f string, extracted *sy
8788
return nil
8889
}
8990

90-
err = extract(ctx, d, fullPath)
91+
archivePath := filepath.Join(d, strings.TrimSuffix(f, programkind.GetExt(f)))
92+
// Some packages may have archives and files with colliding names
93+
// e.g., demo_page.css and demo_page.css.gz
94+
// the former is the uncompressed version of the latter
95+
// if we encounter this, replace the name with something that won't collide
96+
if _, err := os.Stat(archivePath); err == nil {
97+
logger.Debugf("duplicate file name already exists, modifying directory name for %s", archivePath)
98+
archivePath = fmt.Sprintf("%s%d", archivePath, time.Now().UnixNano())
99+
}
100+
101+
if err := os.MkdirAll(archivePath, 0o755); err != nil {
102+
return fmt.Errorf("failed to create extraction directory: %w", err)
103+
}
104+
105+
err = extract(ctx, archivePath, fullPath)
91106
if err != nil {
92107
return fmt.Errorf("failed to extract archive: %w", err)
93108
}
94109

95110
extracted.Store(f, true)
96111

97-
if err := os.RemoveAll(fullPath); err != nil {
112+
if err := os.Remove(fullPath); err != nil {
98113
return fmt.Errorf("failed to remove archive file: %w", err)
99114
}
100115

@@ -109,7 +124,7 @@ func extractNestedArchive(ctx context.Context, d string, f string, extracted *sy
109124
}
110125
rel := file.Name()
111126
if _, alreadyProcessed := extracted.Load(rel); !alreadyProcessed {
112-
if err := extractNestedArchive(ctx, d, rel, extracted); err != nil {
127+
if err := extractNestedArchive(ctx, d, rel, extracted, logger); err != nil {
113128
return fmt.Errorf("process nested file %s: %w", rel, err)
114129
}
115130
}
@@ -187,7 +202,7 @@ func ExtractArchiveToTempDir(ctx context.Context, path string) (string, error) {
187202

188203
ext := programkind.GetExt(path)
189204
if _, ok := programkind.ArchiveMap[ext]; ok {
190-
if err := extractNestedArchive(ctx, tmpDir, rel, &extractedFiles); err != nil {
205+
if err := extractNestedArchive(ctx, tmpDir, rel, &extractedFiles, logger); err != nil {
191206
return err
192207
}
193208
}

pkg/archive/bz2.go

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package archive
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"io"
78
"os"
@@ -49,7 +50,7 @@ func ExtractBz2(ctx context.Context, d, f string) error {
4950
if !IsValidPath(target, d) {
5051
return fmt.Errorf("invalid file path: %s", target)
5152
}
52-
if err := os.MkdirAll(d, 0o700); err != nil {
53+
if err := os.MkdirAll(filepath.Dir(target), 0o700); err != nil {
5354
return fmt.Errorf("failed to create directory for file: %w", err)
5455
}
5556

@@ -73,20 +74,22 @@ func ExtractBz2(ctx context.Context, d, f string) error {
7374
}
7475

7576
n, err := br.Read(buf)
76-
if err == io.EOF {
77-
break
78-
}
79-
if err != nil {
80-
return fmt.Errorf("failed to read file contents: %w", err)
77+
if n > 0 {
78+
written += int64(n)
79+
if written > maxBytes {
80+
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
81+
}
82+
if _, writeErr := out.Write(buf[:n]); writeErr != nil {
83+
return fmt.Errorf("failed to write file contents: %w", writeErr)
84+
}
8185
}
8286

83-
written += int64(n)
84-
if written > maxBytes {
85-
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
87+
if errors.Is(err, io.EOF) {
88+
break
8689
}
8790

88-
if _, err := out.Write(buf[:n]); err != nil {
89-
return fmt.Errorf("failed to write file contents: %w", err)
91+
if err != nil {
92+
return fmt.Errorf("failed to read file contents: %w", err)
9093
}
9194
}
9295

pkg/archive/gzip.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -90,21 +90,24 @@ func ExtractGzip(ctx context.Context, d string, f string) error {
9090
}
9191

9292
n, err := gr.Read(buf)
93+
if n > 0 {
94+
written += int64(n)
95+
if written > maxBytes {
96+
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
97+
}
98+
99+
if _, writeErr := out.Write(buf[:n]); writeErr != nil {
100+
return fmt.Errorf("failed to write file contents: %w", writeErr)
101+
}
102+
}
103+
93104
if errors.Is(err, io.EOF) {
94105
break
95106
}
107+
96108
if err != nil {
97109
return fmt.Errorf("failed to read file contents: %w", err)
98110
}
99-
100-
written += int64(n)
101-
if written > maxBytes {
102-
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
103-
}
104-
105-
if _, err := out.Write(buf[:n]); err != nil {
106-
return fmt.Errorf("failed to write file contents: %w", err)
107-
}
108111
}
109112

110113
return nil

pkg/archive/rpm.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -128,21 +128,23 @@ func ExtractRPM(ctx context.Context, d, f string) error {
128128
}
129129

130130
n, err := cr.Read(buf)
131+
if n > 0 {
132+
written += int64(n)
133+
if written > maxBytes {
134+
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
135+
}
136+
if _, writeErr := out.Write(buf[:n]); writeErr != nil {
137+
return fmt.Errorf("failed to write file contents: %w", writeErr)
138+
}
139+
}
140+
131141
if errors.Is(err, io.EOF) {
132142
break
133143
}
144+
134145
if err != nil {
135146
return fmt.Errorf("failed to read file contents: %w", err)
136147
}
137-
138-
written += int64(n)
139-
if written > maxBytes {
140-
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
141-
}
142-
143-
if _, err := out.Write(buf[:n]); err != nil {
144-
return fmt.Errorf("failed to write file contents: %w", err)
145-
}
146148
}
147149

148150
if err := out.Close(); err != nil {

pkg/archive/tar.go

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -114,23 +114,22 @@ func ExtractTar(ctx context.Context, d string, f string) error {
114114
}
115115

116116
n, err := xzStream.Read(buf)
117-
if errors.Is(err, io.EOF) {
118-
break
119-
}
120-
if err != nil {
121-
if strings.Contains(err.Error(), "unexpected EOF") && n > 0 {
122-
break
117+
if n > 0 {
118+
written += int64(n)
119+
if written > maxBytes {
120+
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
121+
}
122+
if _, writeErr := out.Write(buf[:n]); writeErr != nil {
123+
return fmt.Errorf("failed to write file contents: %w", writeErr)
123124
}
124-
return fmt.Errorf("failed to read file contents: %w", err)
125125
}
126126

127-
written += int64(n)
128-
if written > maxBytes {
129-
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
127+
if errors.Is(err, io.EOF) {
128+
break
130129
}
131130

132-
if _, err := out.Write(buf[:n]); err != nil {
133-
return fmt.Errorf("failed to write file contents: %w", err)
131+
if err != nil {
132+
return fmt.Errorf("failed to read file contents: %w", err)
134133
}
135134
}
136135
return nil
@@ -152,20 +151,23 @@ func ExtractTar(ctx context.Context, d string, f string) error {
152151
}
153152

154153
n, err := br.Read(buf)
155-
if err == io.EOF {
156-
break
157-
}
158-
if err != nil {
159-
return fmt.Errorf("failed to read file contents: %w", err)
154+
if n > 0 {
155+
written += int64(n)
156+
if written > maxBytes {
157+
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
158+
}
159+
160+
if _, writeErr := out.Write(buf[:n]); writeErr != nil {
161+
return fmt.Errorf("failed to write file contents: %w", writeErr)
162+
}
160163
}
161164

162-
written += int64(n)
163-
if written > maxBytes {
164-
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
165+
if errors.Is(err, io.EOF) {
166+
break
165167
}
166168

167-
if _, err := out.Write(buf[:n]); err != nil {
168-
return fmt.Errorf("failed to write file contents: %w", err)
169+
if err != nil {
170+
return fmt.Errorf("failed to read file contents: %w", err)
169171
}
170172
}
171173
return nil

pkg/archive/zip.go

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@ package archive
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"io"
78
"os"
89
"path/filepath"
910
"runtime"
1011
"strings"
1112
"sync"
13+
"time"
1214

1315
"github.com/chainguard-dev/clog"
1416
"github.com/chainguard-dev/malcontent/pkg/pool"
@@ -113,6 +115,14 @@ func extractFile(ctx context.Context, file *zip.File, destDir string, logger *cl
113115
return ctx.Err()
114116
}
115117

118+
// macOS will encounter issues with paths like META-INF/LICENSE and META-INF/license/foo
119+
// this case insensitivity will break scans, so rename files that collide with existing directories
120+
if runtime.GOOS == "darwin" {
121+
if _, err := os.Stat(filepath.Join(destDir, file.Name)); err == nil {
122+
file.Name = fmt.Sprintf("%s%d", file.Name, time.Now().UnixNano())
123+
}
124+
}
125+
116126
buf := zipPool.Get(zipBuffer)
117127

118128
clean := filepath.Clean(filepath.ToSlash(file.Name))
@@ -154,20 +164,23 @@ func extractFile(ctx context.Context, file *zip.File, destDir string, logger *cl
154164
}
155165

156166
n, err := src.Read(buf)
157-
if err == io.EOF {
158-
break
159-
}
160-
if err != nil {
161-
return fmt.Errorf("failed to read file contents: %w", err)
167+
if n > 0 {
168+
written += int64(n)
169+
if written > maxBytes {
170+
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
171+
}
172+
173+
if _, writeErr := dst.Write(buf[:n]); writeErr != nil {
174+
return fmt.Errorf("failed to write file contents: %w", writeErr)
175+
}
162176
}
163177

164-
written += int64(n)
165-
if written > maxBytes {
166-
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)
178+
if errors.Is(err, io.EOF) {
179+
break
167180
}
168181

169-
if _, err := dst.Write(buf[:n]); err != nil {
170-
return fmt.Errorf("failed to write file contents: %w", err)
182+
if err != nil {
183+
return fmt.Errorf("failed to read file contents: %w", err)
171184
}
172185
}
173186

0 commit comments

Comments
 (0)