Skip to content

Commit 6a43323

Browse files
authored
Address extraction edge cases re: duplicate file names (#967)
* Address extraction edge cases re: duplicate file names Signed-off-by: egibs <20933572+egibs@users.noreply.github.com>
1 parent 1e54771 commit 6a43323

12 files changed

Lines changed: 49 additions & 751 deletions

File tree

pkg/action/archive_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ func TestScanArchive(t *testing.T) {
260260
}
261261

262262
func extractError(e error) error {
263-
if strings.Contains(e.Error(), "not a valid gzip archive") || strings.Contains(e.Error(), "not a valid zip file") {
263+
if strings.Contains(e.Error(), "not a valid gzip archive") || strings.Contains(e.Error(), "not a valid zip archive") {
264264
return nil
265265
}
266266
return e

pkg/archive/archive.go

Lines changed: 30 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -166,62 +166,37 @@ func ExtractArchiveToTempDir(ctx context.Context, path string) (string, error) {
166166
}
167167

168168
var extractedFiles sync.Map
169-
files, err := os.ReadDir(tmpDir)
170-
if err != nil {
171-
return "", fmt.Errorf("failed to read files in directory %s: %w", tmpDir, err)
172-
}
173-
for _, file := range files {
174-
extractedFiles.Store(filepath.Join(tmpDir, file.Name()), false)
175-
}
176169

177-
extractedFiles.Range(func(key, _ any) bool {
178-
if key == nil {
179-
return true
170+
err = filepath.WalkDir(tmpDir, func(path string, d os.DirEntry, err error) error {
171+
if err != nil {
172+
return err
180173
}
181-
//nolint: nestif // ignoring complexity of 11
182-
if file, ok := key.(string); ok {
183-
ext := programkind.GetExt(file)
184-
info, err := os.Stat(file)
185-
if err != nil {
186-
return false
187-
}
188-
switch mode := info.Mode(); {
189-
case mode.IsDir():
190-
err = filepath.WalkDir(file, func(path string, d os.DirEntry, err error) error {
191-
if err != nil {
192-
return err
193-
}
194-
rel, err := filepath.Rel(tmpDir, path)
195-
if err != nil {
196-
return fmt.Errorf("filepath.Rel: %w", err)
197-
}
198-
if !d.IsDir() {
199-
if err := extractNestedArchive(ctx, tmpDir, rel, &extractedFiles); err != nil {
200-
return fmt.Errorf("failed to extract nested archive %s: %w", rel, err)
201-
}
202-
}
203-
204-
return nil
205-
})
206-
if err != nil {
207-
return false
208-
}
209-
return true
210-
case mode.IsRegular():
211-
if _, ok := programkind.ArchiveMap[ext]; ok {
212-
rel, err := filepath.Rel(tmpDir, file)
213-
if err != nil {
214-
return false
215-
}
216-
if err := extractNestedArchive(ctx, tmpDir, rel, &extractedFiles); err != nil {
217-
return false
218-
}
219-
}
220-
return true
174+
175+
if d.IsDir() {
176+
return nil
177+
}
178+
179+
if path == tmpDir {
180+
return nil
181+
}
182+
183+
rel, err := filepath.Rel(tmpDir, path)
184+
if err != nil {
185+
return fmt.Errorf("filepath.Rel: %w", err)
186+
}
187+
188+
ext := programkind.GetExt(path)
189+
if _, ok := programkind.ArchiveMap[ext]; ok {
190+
if err := extractNestedArchive(ctx, tmpDir, rel, &extractedFiles); err != nil {
191+
return err
221192
}
222193
}
223-
return true
194+
195+
return nil
224196
})
197+
if err != nil {
198+
return "", fmt.Errorf("failed to walk directory: %w", err)
199+
}
225200

226201
return tmpDir, nil
227202
}
@@ -275,7 +250,10 @@ func handleFile(target string, tr *tar.Reader) error {
275250

276251
written, err := io.CopyBuffer(out, io.LimitReader(tr, maxBytes), buf)
277252
if err != nil {
278-
return fmt.Errorf("failed to copy file: %w", err)
253+
if (strings.Contains(err.Error(), "unexpected EOF") && written == 0) ||
254+
!strings.Contains(err.Error(), "unexpected EOF") {
255+
return fmt.Errorf("failed to copy file: %w", err)
256+
}
279257
}
280258
if written >= maxBytes {
281259
return fmt.Errorf("file exceeds maximum allowed size (%d bytes): %s", maxBytes, target)

pkg/archive/bz2.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ func ExtractBz2(ctx context.Context, d, f string) error {
4545
br := bzip2.NewReader(ctx, tf)
4646
uncompressed := strings.TrimSuffix(filepath.Base(f), ".bz2")
4747
uncompressed = strings.TrimSuffix(uncompressed, ".bzip2")
48-
target := filepath.Join(d, uncompressed)
48+
target := filepath.Join(d, filepath.Base(filepath.Dir(f)), uncompressed)
4949
if !IsValidPath(target, d) {
5050
return fmt.Errorf("invalid file path: %s", target)
5151
}

pkg/archive/gzip.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313
gzip "github.com/klauspost/pgzip"
1414
)
1515

16-
var gzMIME = map[string]struct{}{
16+
var GzMIME = map[string]struct{}{
1717
"application/gzip": {},
1818
"application/gzip-compressed": {},
1919
"application/gzipped": {},
@@ -32,13 +32,13 @@ func ExtractGzip(ctx context.Context, d string, f string) error {
3232
// Check whether the provided file is a valid gzip archive
3333
var isGzip bool
3434
if ft, err := programkind.File(f); err == nil && ft != nil {
35-
if _, ok := gzMIME[ft.MIME]; ok {
35+
if _, ok := GzMIME[ft.MIME]; ok {
3636
isGzip = true
3737
}
3838
}
3939

4040
if !isGzip {
41-
return nil
41+
return fmt.Errorf("not a valid gzip archive: %s", f)
4242
}
4343

4444
logger := clog.FromContext(ctx).With("dir", d, "file", f)

pkg/archive/tar.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func ExtractTar(ctx context.Context, d string, f string) error {
6464
isTGZ := strings.Contains(f, ".tar.gz") || strings.Contains(f, ".tgz")
6565
var isGzip bool
6666
if ft, err := programkind.File(f); err == nil && ft != nil {
67-
if ft.MIME == "application/gzip" {
67+
if _, ok := GzMIME[ft.MIME]; ok {
6868
isGzip = true
6969
}
7070
}
@@ -96,13 +96,11 @@ func ExtractTar(ctx context.Context, d string, f string) error {
9696
return fmt.Errorf("failed to create xz reader: %w", err)
9797
}
9898
uncompressed := strings.Trim(filepath.Base(f), ".xz")
99-
target := filepath.Join(d, uncompressed)
99+
target := filepath.Join(d, filepath.Base(filepath.Dir(f)), uncompressed)
100100
if err := os.MkdirAll(filepath.Dir(target), 0o700); err != nil {
101101
return fmt.Errorf("failed to create directory for file: %w", err)
102102
}
103103

104-
// #nosec G115 // ignore Type conversion which leads to integer overflow
105-
// header.Mode is int64 and FileMode is uint32
106104
out, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600)
107105
if err != nil {
108106
return fmt.Errorf("failed to create file: %w", err)
@@ -120,6 +118,9 @@ func ExtractTar(ctx context.Context, d string, f string) error {
120118
break
121119
}
122120
if err != nil {
121+
if strings.Contains(err.Error(), "unexpected EOF") && n > 0 {
122+
break
123+
}
123124
return fmt.Errorf("failed to read file contents: %w", err)
124125
}
125126

@@ -136,7 +137,7 @@ func ExtractTar(ctx context.Context, d string, f string) error {
136137
case strings.Contains(filename, ".tar.bz2") || strings.Contains(filename, ".tbz"):
137138
br := bzip2.NewReader(ctx, tf)
138139
uncompressed := strings.Trim(filepath.Base(f), programkind.GetExt(filename))
139-
target := filepath.Join(d, uncompressed)
140+
target := filepath.Join(d, filepath.Base(filepath.Dir(f)), uncompressed)
140141
if err := os.MkdirAll(filepath.Dir(target), 0o700); err != nil {
141142
return fmt.Errorf("failed to create directory for file: %w", err)
142143
}

pkg/archive/zip.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func ExtractZip(ctx context.Context, d string, f string) error {
5757
}
5858

5959
if !isZip {
60-
return nil
60+
return fmt.Errorf("not a valid zip archive: %s", f)
6161
}
6262

6363
read, err := zip.OpenReader(f)

pkg/archive/zstd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ func ExtractZstd(ctx context.Context, d string, f string) error {
3434

3535
uncompressed := strings.TrimSuffix(filepath.Base(f), ".zstd")
3636
uncompressed = strings.TrimSuffix(uncompressed, ".zst")
37-
target := filepath.Join(d, uncompressed)
37+
target := filepath.Join(d, filepath.Base(filepath.Dir(f)), uncompressed)
3838

3939
if !IsValidPath(target, d) {
4040
return fmt.Errorf("invalid zstd decompression file path: %s", target)

pkg/programkind/programkind.go

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,11 @@ var ArchiveMap = map[string]bool{
4646
var supportedKind = map[string]string{
4747
"7z": "application/x-7z-compressed",
4848
"Z": "application/zlib",
49-
"apk": "application/gzip",
5049
"asm": "",
5150
"bash": "application/x-bsh",
5251
"bat": "application/bat",
5352
"beam": "application/x-erlang-binary",
5453
"bin": "application/octet-stream",
55-
"bz2": "application/x-bzip2",
56-
"bzip2": "application/x-bzip2",
5754
"c": "text/x-c",
5855
"cc": "text/x-c",
5956
"class": "application/java-vm",
@@ -63,21 +60,16 @@ var supportedKind = map[string]string{
6360
"crontab": "text/x-crontab",
6461
"csh": "application/x-csh",
6562
"cxx": "text/x-c",
66-
"deb": "application/vnd.debian.binary-package",
6763
"dll": "application/octet-stream",
6864
"dylib": "application/x-sharedlib",
6965
"elf": "application/x-elf",
7066
"exe": "application/octet-stream",
7167
"expect": "text/x-expect",
7268
"fish": "text/x-fish",
73-
"gem": "application/octet-stream",
7469
"go": "text/x-go",
75-
"gzip": "application/gzip",
76-
"gz": "application/gzip",
7770
"h": "text/x-h",
7871
"hh": "text/x-h",
7972
"html": "",
80-
"jar": "application/java-archive",
8173
"java": "text/x-java",
8274
"js": "application/javascript",
8375
"ko": "application/x-object",
@@ -97,29 +89,19 @@ var supportedKind = map[string]string{
9789
"py": "text/x-python",
9890
"pyc": "application/x-python-code",
9991
"rb": "text/x-ruby",
100-
"rpm": "application/x-rpm",
10192
"rs": "text/x-rust",
10293
"scpt": "application/x-applescript",
10394
"scptd": "application/x-applescript",
10495
"script": "text/x-generic-script",
10596
"service": "text/x-systemd",
10697
"sh": "application/x-sh",
10798
"so": "application/x-sharedlib",
108-
"tar": "application/x-tar",
109-
"tar.gz": "application/gzip",
110-
"tar.xz": "application/x-xz",
111-
"tgz": "application/gzip",
11299
"ts": "application/typescript",
113100
"upx": "application/x-upx",
114-
"whl": "application/x-wheel+zip",
115-
"xz": "application/x-xz",
116101
"yaml": "",
117102
"yara": "",
118103
"yml": "",
119-
"zip": "application/zip",
120104
"zsh": "application/x-zsh",
121-
"zst": "application/zstd",
122-
"zstd": "application/zstd",
123105
}
124106

125107
type FileType struct {
@@ -213,6 +195,11 @@ func IsValidUPX(header []byte, path string) (bool, error) {
213195
func makeFileType(path string, ext string, mime string) *FileType {
214196
ext = strings.TrimPrefix(ext, ".")
215197

198+
// Archives are supported
199+
if _, ok := ArchiveMap[GetExt(path)]; ok {
200+
return &FileType{Ext: ext, MIME: mime}
201+
}
202+
216203
// the only JSON files we currently scan are NPM package metadata, which ends in *package.json
217204
if strings.HasSuffix(path, "package.json") {
218205
return &FileType{
@@ -339,8 +326,7 @@ func File(path string) (*FileType, error) {
339326

340327
// Path returns a filetype based strictly on file path.
341328
func Path(path string) *FileType {
342-
// Trim the leading '.'
343-
ext := strings.TrimPrefix(GetExt(path), ".")
329+
ext := strings.ReplaceAll(filepath.Ext(path), ".", "")
344330
mime := supportedKind[ext]
345331
return makeFileType(path, ext, mime)
346332
}

0 commit comments

Comments
 (0)