Skip to content

Commit 3cb68d1

Browse files
fix: git access tar methods (#1454)
The method to tar a filesystem that is currently used, also flattens the directory structure to a simple list of files. As this is not what we need, we will use another method. Additionally, the tar-function includes the "last-modification" date of the files. This leads to false results (e.g. digest calculation) as the modification date for files is resetted when downloaded. --------- Co-authored-by: Jakob Möller <jakob.moeller@sap.com>
1 parent ef70872 commit 3cb68d1

6 files changed

Lines changed: 104 additions & 28 deletions

File tree

api/utils/blobaccess/git/access.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package git
22

33
import (
44
"context"
5+
"fmt"
56

67
gogit "github.com/go-git/go-git/v5"
78
"github.com/mandelsoft/goutils/errors"
@@ -87,8 +88,8 @@ func BlobAccess(opt ...Option) (_ bpi.BlobAccess, rerr error) {
8788
dw := iotools.NewDigestWriterWith(digest.SHA256, tgz)
8889
finalize.Close(dw)
8990

90-
if err := tarutils.TgzFs(filteredRepositoryFS, dw); err != nil {
91-
return nil, err
91+
if err := tarutils.TgzFs(filteredRepositoryFS, dw, tarutils.TarFileSystemOptions{ZeroModTime: true}); err != nil {
92+
return nil, fmt.Errorf("failed to create tgz: %w", err)
9293
}
9394

9495
log.Debug("created", "file", tgz.Name())

api/utils/blobaccess/git/access_test.go

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@ package git_test
22

33
import (
44
"embed"
5+
_ "embed"
56
"fmt"
67
"io"
8+
"io/fs"
79
"os"
810
"time"
911

10-
_ "embed"
11-
1212
. "github.com/mandelsoft/goutils/testutils"
1313
. "github.com/onsi/ginkgo/v2"
1414
. "github.com/onsi/gomega"
@@ -56,26 +56,39 @@ var _ = Describe("git Blob Access", func() {
5656
repoTestData, err := testData.ReadDir(repoBase)
5757
Expect(err).ToNot(HaveOccurred())
5858

59-
for _, entry := range repoTestData {
60-
path := filepath.Join(repoBase, entry.Name())
61-
repoPath := filepath.Join(repoDir, entry.Name())
62-
63-
file, err := testData.Open(path)
64-
Expect(err).ToNot(HaveOccurred())
65-
66-
fileInRepo, err := os.OpenFile(
67-
repoPath,
68-
os.O_CREATE|os.O_RDWR|os.O_TRUNC,
69-
0o600,
70-
)
71-
Expect(err).ToNot(HaveOccurred())
72-
73-
_, err = io.Copy(fileInRepo, file)
74-
Expect(err).ToNot(HaveOccurred())
75-
76-
Expect(fileInRepo.Close()).To(Succeed())
77-
Expect(file.Close()).To(Succeed())
59+
var process func(base string, entries []fs.DirEntry)
60+
process = func(base string, infos []fs.DirEntry) {
61+
for _, entry := range infos {
62+
path := filepath.Join(base, entry.Name())
63+
repoPath := filepath.Join(repoDir, entry.Name())
64+
65+
file, err := testData.Open(path)
66+
Expect(err).ToNot(HaveOccurred())
67+
fi, err := file.Stat()
68+
Expect(err).ToNot(HaveOccurred())
69+
if fi.IsDir() {
70+
Expect(os.MkdirAll(repoPath, 0o700)).ToNot(HaveOccurred())
71+
entries, err := testData.ReadDir(path)
72+
Expect(err).ToNot(HaveOccurred())
73+
process(filepath.Join(base, fi.Name()), entries)
74+
continue
75+
}
76+
77+
fileInRepo, err := os.OpenFile(
78+
repoPath,
79+
os.O_CREATE|os.O_RDWR|os.O_TRUNC,
80+
0o600,
81+
)
82+
Expect(err).ToNot(HaveOccurred())
83+
84+
_, err = io.Copy(fileInRepo, file)
85+
Expect(err).ToNot(HaveOccurred())
86+
87+
Expect(fileInRepo.Close()).To(Succeed())
88+
Expect(file.Close()).To(Succeed())
89+
}
7890
}
91+
process(repoBase, repoTestData)
7992

8093
wt, err := repo.Worktree()
8194
Expect(err).ToNot(HaveOccurred())
@@ -100,7 +113,22 @@ var _ = Describe("git Blob Access", func() {
100113
))
101114
defer Close(b)
102115
files := Must(tarutils.ListArchiveContentFromReader(Must(b.Reader())))
103-
Expect(files).To(ConsistOf("file_in_repo"))
116+
Expect(files).To(ConsistOf("file_in_repo", "file_in_dir_in_repo"))
117+
118+
data1, err := b.Reader()
119+
Expect(err).ToNot(HaveOccurred())
120+
121+
By("testing equivalence of byte streams from equivalent accesses")
122+
b2 := Must(gitblob.BlobAccess(
123+
gitblob.WithURL(url),
124+
gitblob.WithLoggingContext(ctx),
125+
gitblob.WithCachingContext(ctx),
126+
))
127+
defer Close(b2)
128+
data2, err := b2.Reader()
129+
Expect(err).ToNot(HaveOccurred())
130+
131+
Expect(data1).ToNot(BeIdenticalTo(data2))
104132
})
105133
})
106134

@@ -151,4 +179,5 @@ var _ = Describe("git Blob Access", func() {
151179
Expect(files).To(ConsistOf("README", "CONTRIBUTING.md"))
152180
})
153181
})
182+
154183
})
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Foobar

api/utils/blobaccess/maven/utils.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ func (s *spec) getBlobAccess() (_ bpi.BlobAccess, rerr error) {
118118
dw := iotools.NewDigestWriterWith(digest.SHA256, tgz)
119119
finalize.Close(dw)
120120

121-
err = tarutils.TgzFs(tmpfs, dw)
121+
err = tarutils.TgzFlatFs(tmpfs, dw)
122122
if err != nil {
123123
return nil, err
124124
}

api/utils/tarutils/pack.go

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"time"
1313

1414
"github.com/mandelsoft/filepath/pkg/filepath"
15+
"github.com/mandelsoft/goutils/errors"
1516
"github.com/mandelsoft/goutils/finalizer"
1617
"github.com/mandelsoft/goutils/general"
1718
"github.com/mandelsoft/vfs/pkg/osfs"
@@ -46,7 +47,11 @@ type TarFileSystemOptions struct {
4647
ExcludeFiles []string
4748
// PreserveDir defines that the directory specified in the Path field should be included in the blob.
4849
// Only supported for Type dir.
49-
PreserveDir bool
50+
PreserveDir bool
51+
// ZeroModTime defines that the modtime of the files added to the tar should be zeroed. This is important
52+
// if the TAR archives need to be comparable on byte level (e.g. for hashing). To get fully byte-equivalent
53+
// TAR archives at different timestamps, the mod time needs to be set to 0.
54+
ZeroModTime bool
5055
FollowSymlinks bool
5156

5257
root string
@@ -119,6 +124,10 @@ func addFileToTar(fs vfs.FileSystem, tw *tar.Writer, path string, realPath strin
119124
}
120125
header.Name = path
121126

127+
if opts.ZeroModTime {
128+
header.ModTime = time.Time{}
129+
}
130+
122131
switch {
123132
case info.IsDir():
124133
// do not write root header
@@ -241,9 +250,18 @@ func ListSortedFilesInDir(fs vfs.FileSystem, root string, flat bool) ([]string,
241250
return files, err
242251
}
243252

244-
// TgzFs creates a tar.gz archive from a filesystem with all files being in the root of the zipped archive.
253+
// TgzFs works like PackFsIntoTar, but compresses the tar archive with [gzip.NewWriter].
254+
func TgzFs(fs vfs.FileSystem, writer io.Writer, options TarFileSystemOptions) (err error) {
255+
zip := gzip.NewWriter(writer)
256+
defer func() {
257+
err = errors.Join(err, zip.Close())
258+
}()
259+
return PackFsIntoTar(fs, "", zip, options)
260+
}
261+
262+
// TgzFlatFs creates a tar.gz archive from a filesystem with all files being in the root of the zipped archive.
245263
// The writer is closed after the archive is written. The TAR-headers are normalized, see RegularFileInfoHeader.
246-
func TgzFs(fs vfs.FileSystem, writer io.Writer) error {
264+
func TgzFlatFs(fs vfs.FileSystem, writer io.Writer) error {
247265
zip := gzip.NewWriter(writer)
248266
err := TarFlatFs(fs, zip)
249267
if err != nil {

api/utils/tarutils/pack_test.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package tarutils_test
22

33
import (
4+
"bytes"
45
"io/fs"
56
"os"
67
"runtime"
78

89
. "github.com/mandelsoft/goutils/testutils"
10+
"github.com/mandelsoft/vfs/pkg/memoryfs"
911
. "github.com/onsi/ginkgo/v2"
1012
. "github.com/onsi/gomega"
1113

@@ -55,4 +57,29 @@ var _ = Describe("tar utils mapping", func() {
5557
Expect(err.Error()).To(ContainSubstring("no such file or directory"))
5658
}
5759
})
60+
61+
It("test byte-equivalent compressed archives", func() {
62+
fs := memoryfs.New()
63+
f1, err := fs.Create("some file")
64+
Expect(err).ToNot(HaveOccurred())
65+
_, err = f1.Write([]byte("some content"))
66+
Expect(err).ToNot(HaveOccurred())
67+
Expect(f1.Close()).ToNot(HaveOccurred())
68+
69+
var buf1, buf2 bytes.Buffer
70+
71+
Expect(tarutils.TgzFs(fs, &buf1, tarutils.TarFileSystemOptions{
72+
ZeroModTime: true,
73+
})).To(Succeed())
74+
75+
Expect(tarutils.TgzFs(fs, &buf2, tarutils.TarFileSystemOptions{
76+
ZeroModTime: true,
77+
})).To(Succeed())
78+
79+
Expect(buf1.Bytes()).To(Equal(buf2.Bytes()))
80+
81+
var buf3 bytes.Buffer
82+
Expect(tarutils.TgzFs(fs, &buf3, tarutils.TarFileSystemOptions{})).To(Succeed())
83+
Expect(buf1.Bytes()).ToNot(Equal(buf3.Bytes()))
84+
})
5885
})

0 commit comments

Comments
 (0)