Skip to content

Commit cb78af9

Browse files
authored
Merge pull request #6 from codeclysm/v3
Add Extractor struct
2 parents 90c43fb + 472639d commit cb78af9

5 files changed

Lines changed: 534 additions & 314 deletions

File tree

README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,28 @@ If you don't know which archive you're dealing with (life really is always a sur
3636

3737
```go
3838
extract.Archive(data, "/path/where/to/extract", nil)
39+
```
40+
41+
If you need more control over how your files will be extracted you can use an Extractor.
42+
43+
It Needs a FS object that implements the FS interface:
44+
45+
```
46+
type FS interface {
47+
Link(string, string) error
48+
MkdirAll(string, os.FileMode) error
49+
OpenFile(name string, flag int, perm os.FileMode) (*os.File, error)
50+
Symlink(string, string) error
51+
}
52+
```
53+
54+
which contains only the required function to perform an extraction. This way it's easy to wrap os functions to
55+
chroot the path, or scramble the files, or send an event for each operation or even reimplementing them for an in-memory store, I don't know.
56+
57+
```go
58+
extractor := extract.Extractor{
59+
FS: fs,
60+
}
61+
62+
extractor.Archive(data, "path/where/to/extract", nil)
3963
```

extract.go

Lines changed: 26 additions & 256 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,10 @@
2727
package extract
2828

2929
import (
30-
"archive/tar"
31-
"archive/zip"
3230
"bytes"
33-
"compress/bzip2"
34-
"compress/gzip"
3531
"context"
3632
"io"
37-
"io/ioutil"
3833
"os"
39-
"path/filepath"
40-
"strings"
41-
42-
filetype "github.com/h2non/filetype"
43-
"github.com/h2non/filetype/types"
44-
45-
"github.com/juju/errors"
4634
)
4735

4836
// Renamer is a function that can be used to rename the files when you're extracting
@@ -55,67 +43,31 @@ type Renamer func(string) string
5543
// handle the names of the files.
5644
// If the file is not an archive, an error is returned.
5745
func Archive(ctx context.Context, body io.Reader, location string, rename Renamer) error {
58-
body, kind, err := match(body)
59-
if err != nil {
60-
errors.Annotatef(err, "Detect archive type")
46+
extractor := Extractor{
47+
FS: fs{},
6148
}
6249

63-
switch kind.Extension {
64-
case "zip":
65-
return Zip(ctx, body, location, rename)
66-
case "gz":
67-
return Gz(ctx, body, location, rename)
68-
case "bz2":
69-
return Bz2(ctx, body, location, rename)
70-
case "tar":
71-
return Tar(ctx, body, location, rename)
72-
default:
73-
return errors.New("Not a supported archive")
74-
}
50+
return extractor.Archive(ctx, body, location, rename)
7551
}
7652

7753
// Bz2 extracts a .bz2 or .tar.bz2 archived stream of data in the specified location.
7854
// It accepts a rename function to handle the names of the files (see the example)
7955
func Bz2(ctx context.Context, body io.Reader, location string, rename Renamer) error {
80-
reader := bzip2.NewReader(body)
81-
82-
body, kind, err := match(reader)
83-
if err != nil {
84-
return errors.Annotatef(err, "extract bz2: detect")
85-
}
86-
87-
if kind.Extension == "tar" {
88-
return Tar(ctx, body, location, rename)
56+
extractor := Extractor{
57+
FS: fs{},
8958
}
9059

91-
err = copy(ctx, location, 0666, body)
92-
if err != nil {
93-
return err
94-
}
95-
return nil
60+
return extractor.Bz2(ctx, body, location, rename)
9661
}
9762

9863
// Gz extracts a .gz or .tar.gz archived stream of data in the specified location.
9964
// It accepts a rename function to handle the names of the files (see the example)
10065
func Gz(ctx context.Context, body io.Reader, location string, rename Renamer) error {
101-
reader, err := gzip.NewReader(body)
102-
if err != nil {
103-
return errors.Annotatef(err, "Gunzip")
104-
}
105-
106-
body, kind, err := match(reader)
107-
if err != nil {
108-
return err
66+
extractor := Extractor{
67+
FS: fs{},
10968
}
11069

111-
if kind.Extension == "tar" {
112-
return Tar(ctx, body, location, rename)
113-
}
114-
err = copy(ctx, location, 0666, body)
115-
if err != nil {
116-
return err
117-
}
118-
return nil
70+
return extractor.Gz(ctx, body, location, rename)
11971
}
12072

12173
type file struct {
@@ -131,219 +83,37 @@ type link struct {
13183
// Tar extracts a .tar archived stream of data in the specified location.
13284
// It accepts a rename function to handle the names of the files (see the example)
13385
func Tar(ctx context.Context, body io.Reader, location string, rename Renamer) error {
134-
files := []file{}
135-
links := []link{}
136-
symlinks := []link{}
137-
138-
// We make the first pass creating the directory structure, or we could end up
139-
// attempting to create a file where there's no folder
140-
tr := tar.NewReader(body)
141-
for {
142-
select {
143-
case <-ctx.Done():
144-
return errors.New("interrupted")
145-
default:
146-
}
147-
148-
header, err := tr.Next()
149-
if err == io.EOF {
150-
break
151-
}
152-
153-
if err != nil {
154-
return errors.Annotatef(err, "Read tar stream")
155-
}
156-
157-
path := header.Name
158-
if rename != nil {
159-
path = rename(path)
160-
}
161-
162-
if path == "" {
163-
continue
164-
}
165-
166-
path = filepath.Join(location, path)
167-
info := header.FileInfo()
168-
169-
switch header.Typeflag {
170-
case tar.TypeDir:
171-
if err := os.MkdirAll(path, info.Mode()); err != nil {
172-
return errors.Annotatef(err, "Create directory %s", path)
173-
}
174-
case tar.TypeReg, tar.TypeRegA:
175-
var data bytes.Buffer
176-
if _, err := copyCancel(ctx, &data, tr); err != nil {
177-
return errors.Annotatef(err, "Read contents of file %s", path)
178-
}
179-
files = append(files, file{Path: path, Mode: info.Mode(), Data: data})
180-
case tar.TypeLink:
181-
name := header.Linkname
182-
if rename != nil {
183-
name = rename(name)
184-
}
185-
186-
name = filepath.Join(location, name)
187-
links = append(links, link{Path: path, Name: name})
188-
case tar.TypeSymlink:
189-
symlinks = append(symlinks, link{Path: path, Name: header.Linkname})
190-
}
191-
}
192-
193-
// Now we make another pass creating the files and links
194-
for i := range files {
195-
if err := copy(ctx, files[i].Path, files[i].Mode, &files[i].Data); err != nil {
196-
return errors.Annotatef(err, "Create file %s", files[i].Path)
197-
}
198-
}
199-
200-
for i := range links {
201-
select {
202-
case <-ctx.Done():
203-
return errors.New("interrupted")
204-
default:
205-
}
206-
if err := os.Link(links[i].Name, links[i].Path); err != nil {
207-
return errors.Annotatef(err, "Create link %s", links[i].Path)
208-
}
86+
extractor := Extractor{
87+
FS: fs{},
20988
}
21089

211-
for i := range symlinks {
212-
select {
213-
case <-ctx.Done():
214-
return errors.New("interrupted")
215-
default:
216-
}
217-
if err := os.Symlink(symlinks[i].Name, symlinks[i].Path); err != nil {
218-
return errors.Annotatef(err, "Create link %s", symlinks[i].Path)
219-
}
220-
}
221-
return nil
90+
return extractor.Tar(ctx, body, location, rename)
22291
}
22392

22493
// Zip extracts a .zip archived stream of data in the specified location.
22594
// It accepts a rename function to handle the names of the files (see the example).
22695
func Zip(ctx context.Context, body io.Reader, location string, rename Renamer) error {
227-
// read the whole body into a buffer. Not sure this is the best way to do it
228-
buffer := bytes.NewBuffer([]byte{})
229-
copyCancel(ctx, buffer, body)
230-
231-
archive, err := zip.NewReader(bytes.NewReader(buffer.Bytes()), int64(buffer.Len()))
232-
if err != nil {
233-
return errors.Annotatef(err, "Read the zip file")
234-
}
235-
236-
files := []file{}
237-
links := []link{}
238-
239-
// We make the first pass creating the directory structure, or we could end up
240-
// attempting to create a file where there's no folder
241-
for _, header := range archive.File {
242-
select {
243-
case <-ctx.Done():
244-
return errors.New("interrupted")
245-
default:
246-
}
247-
248-
path := header.Name
249-
250-
// Replace backslash with forward slash. There are archives in the wild made with
251-
// buggy compressors that use backslash as path separator. The ZIP format explicitly
252-
// denies the use of "\" so we just replace it with slash "/".
253-
// Moreover it seems that folders are stored as "files" but with a final "\" in the
254-
// filename... oh, well...
255-
forceDir := strings.HasSuffix(path, "\\")
256-
path = strings.Replace(path, "\\", "/", -1)
257-
258-
if rename != nil {
259-
path = rename(path)
260-
}
261-
262-
if path == "" {
263-
continue
264-
}
265-
266-
path = filepath.Join(location, path)
267-
info := header.FileInfo()
268-
269-
switch {
270-
case info.IsDir() || forceDir:
271-
if err := os.MkdirAll(path, info.Mode()|os.ModeDir|100); err != nil {
272-
return errors.Annotatef(err, "Create directory %s", path)
273-
}
274-
// We only check for symlinks because hard links aren't possible
275-
case info.Mode()&os.ModeSymlink != 0:
276-
f, err := header.Open()
277-
if err != nil {
278-
return errors.Annotatef(err, "Open link %s", path)
279-
}
280-
name, err := ioutil.ReadAll(f)
281-
if err != nil {
282-
return errors.Annotatef(err, "Read address of link %s", path)
283-
}
284-
links = append(links, link{Path: path, Name: string(name)})
285-
default:
286-
f, err := header.Open()
287-
if err != nil {
288-
return errors.Annotatef(err, "Open file %s", path)
289-
}
290-
var data bytes.Buffer
291-
if _, err := copyCancel(ctx, &data, f); err != nil {
292-
return errors.Annotatef(err, "Read contents of file %s", path)
293-
}
294-
files = append(files, file{Path: path, Mode: info.Mode(), Data: data})
295-
}
96+
extractor := Extractor{
97+
FS: fs{},
29698
}
29799

298-
// Now we make another pass creating the files and links
299-
for i := range files {
300-
if err := copy(ctx, files[i].Path, files[i].Mode, &files[i].Data); err != nil {
301-
return errors.Annotatef(err, "Create file %s", files[i].Path)
302-
}
303-
}
100+
return extractor.Zip(ctx, body, location, rename)
101+
}
304102

305-
for i := range links {
306-
select {
307-
case <-ctx.Done():
308-
return errors.New("interrupted")
309-
default:
310-
}
311-
if err := os.Symlink(links[i].Name, links[i].Path); err != nil {
312-
return errors.Annotatef(err, "Create link %s", links[i].Path)
313-
}
314-
}
103+
type fs struct{}
315104

316-
return nil
105+
func (f fs) Link(oldname, newname string) error {
106+
return os.Link(oldname, newname)
317107
}
318108

319-
func copy(ctx context.Context, path string, mode os.FileMode, src io.Reader) error {
320-
// We add the execution permission to be able to create files inside it
321-
err := os.MkdirAll(filepath.Dir(path), mode|os.ModeDir|100)
322-
if err != nil {
323-
return err
324-
}
325-
file, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, mode)
326-
if err != nil {
327-
return err
328-
}
329-
defer file.Close()
330-
_, err = copyCancel(ctx, file, src)
331-
return err
109+
func (f fs) MkdirAll(path string, perm os.FileMode) error {
110+
return os.MkdirAll(path, perm)
332111
}
333112

334-
// match reads the first 512 bytes, calls types.Match and returns a reader
335-
// for the whole stream
336-
func match(r io.Reader) (io.Reader, types.Type, error) {
337-
buffer := make([]byte, 512)
338-
339-
n, err := r.Read(buffer)
340-
if err != nil && err != io.EOF {
341-
return nil, types.Unknown, err
342-
}
343-
344-
r = io.MultiReader(bytes.NewBuffer(buffer[:n]), r)
345-
346-
typ, err := filetype.Match(buffer)
113+
func (f fs) Symlink(oldname, newname string) error {
114+
return os.Symlink(oldname, newname)
115+
}
347116

348-
return r, typ, err
117+
func (f fs) OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) {
118+
return os.OpenFile(name, flag, perm)
349119
}

0 commit comments

Comments
 (0)