Skip to content

Commit 975d4a2

Browse files
authored
Merge pull request #1170 from fluxcd/feat-targz
tar: Add support for creating gzip-compressed tarballs
2 parents 188ca35 + 665606c commit 975d4a2

12 files changed

Lines changed: 1878 additions & 694 deletions

File tree

http/fetch/archive_fetcher.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ type ArchiveFetcher struct {
4646
retries int
4747
maxDownloadSize int
4848
fileMode fs.FileMode
49-
untarOpts []tar.TarOption
49+
untarOpts []tar.Option
5050
hostnameOverwrite string
5151
filename string
5252
logger any
@@ -75,9 +75,9 @@ func WithMaxDownloadSize(maxDownloadSize int) Option {
7575
}
7676

7777
// WithUntar tells the ArchiveFetcher to untar the archive expecting it to be a tarball.
78-
func WithUntar(opts ...tar.TarOption) Option {
78+
func WithUntar(opts ...tar.Option) Option {
7979
return func(a *ArchiveFetcher) {
80-
a.untarOpts = append([]tar.TarOption{}, opts...) // to make sure a.untarOpts won't be nil
80+
a.untarOpts = append([]tar.Option{}, opts...) // to make sure a.untarOpts won't be nil
8181
}
8282
}
8383

oci/build.go

Lines changed: 46 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,15 @@ limitations under the License.
1717
package oci
1818

1919
import (
20-
"archive/tar"
21-
"compress/gzip"
2220
"fmt"
2321
"io"
2422
"os"
2523
"path/filepath"
2624
"strings"
27-
"time"
2825

2926
"github.com/fluxcd/pkg/oci/internal/fs"
3027
"github.com/fluxcd/pkg/sourceignore"
28+
"github.com/fluxcd/pkg/tar"
3129
)
3230

3331
// Build archives the given directory as a tarball to the given local path.
@@ -37,17 +35,20 @@ func (c *Client) Build(artifactPath, sourceDir string, ignorePaths []string) (er
3735
}
3836

3937
func build(artifactPath, sourceDir string, ignorePaths []string) (err error) {
40-
absDir, err := filepath.Abs(sourceDir)
38+
absSrc, err := filepath.Abs(sourceDir)
4139
if err != nil {
4240
return err
4341
}
4442

45-
dirStat, err := os.Stat(absDir)
46-
if os.IsNotExist(err) {
47-
return fmt.Errorf("invalid source dir path: %s", absDir)
43+
srcInfo, err := os.Stat(absSrc)
44+
if err != nil {
45+
if os.IsNotExist(err) {
46+
return fmt.Errorf("source path does not exist: %s", absSrc)
47+
}
48+
return fmt.Errorf("invalid source path %s: %w", absSrc, err)
4849
}
4950

50-
tf, err := os.CreateTemp(filepath.Split(absDir))
51+
tf, err := os.CreateTemp(filepath.Split(absSrc))
5152
if err != nil {
5253
return err
5354
}
@@ -58,110 +59,60 @@ func build(artifactPath, sourceDir string, ignorePaths []string) (err error) {
5859
}
5960
}()
6061

61-
ignore := strings.Join(ignorePaths, "\n")
62-
domain := strings.Split(filepath.Clean(absDir), string(filepath.Separator))
63-
ps := sourceignore.ReadPatterns(strings.NewReader(ignore), domain)
64-
matcher := sourceignore.NewMatcher(ps)
65-
filter := func(p string, fi os.FileInfo) bool {
66-
return matcher.Match(strings.Split(p, string(filepath.Separator)), fi.IsDir())
67-
}
68-
69-
sz := &writeCounter{}
70-
mw := io.MultiWriter(tf, sz)
71-
72-
gw := gzip.NewWriter(mw)
73-
tw := tar.NewWriter(gw)
74-
if err := filepath.Walk(absDir, func(p string, fi os.FileInfo, err error) error {
75-
if err != nil {
76-
return err
77-
}
78-
79-
// Ignore anything that is not a file or directories e.g. symlinks
80-
if m := fi.Mode(); !(m.IsRegular() || m.IsDir()) {
81-
return nil
82-
}
83-
84-
if len(ignorePaths) > 0 && filter(p, fi) {
85-
return nil
62+
// If the source is a single file, stage it in a temp dir so Tar can
63+
// archive it as a directory tree containing that one entry.
64+
tarDir := absSrc
65+
if !srcInfo.IsDir() {
66+
stage, stageErr := os.MkdirTemp("", "oci-build-")
67+
if stageErr != nil {
68+
tf.Close()
69+
return stageErr
8670
}
71+
defer os.RemoveAll(stage)
8772

88-
header, err := tar.FileInfoHeader(fi, p)
89-
if err != nil {
90-
return err
91-
}
92-
if dirStat.IsDir() {
93-
// The name needs to be modified to maintain directory structure
94-
// as tar.FileInfoHeader only has access to the base name of the file.
95-
// Ref: https://golang.org/src/archive/tar/common.go?#L6264
96-
//
97-
// we only want to do this if a directory was passed in
98-
relFilePath, err := filepath.Rel(absDir, p)
99-
if err != nil {
100-
return err
101-
}
102-
// Normalize file path so it works on windows
103-
header.Name = filepath.ToSlash(relFilePath)
104-
}
105-
106-
// Remove any environment specific data.
107-
header.Gid = 0
108-
header.Uid = 0
109-
header.Uname = ""
110-
header.Gname = ""
111-
header.ModTime = time.Time{}
112-
header.AccessTime = time.Time{}
113-
header.ChangeTime = time.Time{}
114-
115-
if err := tw.WriteHeader(header); err != nil {
116-
return err
117-
}
118-
119-
if !fi.Mode().IsRegular() {
120-
return nil
121-
}
122-
f, err := os.Open(p)
123-
if err != nil {
124-
f.Close()
125-
return err
126-
}
127-
if _, err := io.Copy(tw, f); err != nil {
128-
f.Close()
73+
if err := copyFileContents(filepath.Join(stage, srcInfo.Name()), absSrc, srcInfo.Mode()); err != nil {
74+
tf.Close()
12975
return err
13076
}
131-
return f.Close()
132-
}); err != nil {
133-
tw.Close()
134-
gw.Close()
135-
tf.Close()
136-
return err
77+
tarDir = stage
13778
}
13879

139-
if err := tw.Close(); err != nil {
140-
gw.Close()
141-
tf.Close()
142-
return err
80+
ignore := strings.Join(ignorePaths, "\n")
81+
domain := strings.Split(filepath.Clean(tarDir), string(filepath.Separator))
82+
ps := sourceignore.ReadPatterns(strings.NewReader(ignore), domain)
83+
matcher := sourceignore.NewMatcher(ps)
84+
filter := func(p string, fi os.FileInfo) bool {
85+
return matcher.Match(strings.Split(p, string(filepath.Separator)), fi.IsDir())
14386
}
144-
if err := gw.Close(); err != nil {
87+
88+
if _, err := tar.Tar(tarDir, tf, tar.WithFilter(filter)); err != nil {
14589
tf.Close()
14690
return err
14791
}
14892
if err := tf.Close(); err != nil {
14993
return err
15094
}
151-
15295
if err := os.Chmod(tmpName, 0o640); err != nil {
15396
return err
15497
}
155-
15698
return fs.RenameWithFallback(tmpName, artifactPath)
15799
}
158100

159-
type writeCounter struct {
160-
written int64
161-
}
162-
163-
func (wc *writeCounter) Write(p []byte) (int, error) {
164-
n := len(p)
165-
wc.written += int64(n)
166-
return n, nil
101+
func copyFileContents(dst, src string, mode os.FileMode) (err error) {
102+
sf, err := os.Open(src)
103+
if err != nil {
104+
return err
105+
}
106+
defer sf.Close()
107+
df, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, mode.Perm())
108+
if err != nil {
109+
return err
110+
}
111+
defer func() {
112+
if closeErr := df.Close(); closeErr != nil && err == nil {
113+
err = closeErr
114+
}
115+
}()
116+
_, err = io.Copy(df, sf)
117+
return err
167118
}

tar/doc.go

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/*
2+
Copyright 2026 The Flux authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
// Package tar provides utilities for creating and extracting tar
18+
// archives, with optional gzip compression. Tar writes a sanitized
19+
// archive of a directory tree, skipping symlinks and other non-regular,
20+
// non-directory entries; use ResolveSymlinks (or the confined
21+
// ResolveSymlinksRoot) to materialize symlink targets before archiving.
22+
// Untar safely extracts a tar archive into a target directory,
23+
// rejecting path traversal and capping the total decompressed size.
24+
//
25+
// # Creating an archive
26+
//
27+
// Archive a directory tree to a file as a gzip-compressed tarball:
28+
//
29+
// f, err := os.Create("archive.tar.gz")
30+
// if err != nil {
31+
// return err
32+
// }
33+
// defer f.Close()
34+
//
35+
// if _, err := tar.Tar("/path/to/dir", f); err != nil {
36+
// return err
37+
// }
38+
//
39+
// Exclude entries with a filter and write a plain (non-gzipped) tar:
40+
//
41+
// skipHidden := func(p string, fi os.FileInfo) bool {
42+
// return strings.HasPrefix(fi.Name(), ".")
43+
// }
44+
// _, err := tar.Tar("/path/to/dir", f,
45+
// tar.WithFilter(skipHidden),
46+
// tar.WithSkipGzip(),
47+
// )
48+
//
49+
// # Extracting an archive
50+
//
51+
// Extract a gzip-compressed tarball into a directory:
52+
//
53+
// f, err := os.Open("archive.tar.gz")
54+
// if err != nil {
55+
// return err
56+
// }
57+
// defer f.Close()
58+
//
59+
// if err := tar.Untar(f, "/path/to/target"); err != nil {
60+
// return err
61+
// }
62+
//
63+
// Raise the size limit and tolerate symlinks in the archive:
64+
//
65+
// err := tar.Untar(f, "/path/to/target",
66+
// tar.WithMaxUntarSize(500<<20), // 500 MiB
67+
// tar.WithSkipSymlinks(),
68+
// )
69+
//
70+
// # Archiving with symlinks resolved
71+
//
72+
// By default Tar skips symlinks. For inputs where files live behind
73+
// symlinks (for example, manifest trees generated by Nix), stage the
74+
// source into a caller-owned directory with ResolveSymlinks first,
75+
// then archive the resolved tree:
76+
//
77+
// tmpDir, err := os.MkdirTemp("", "resolve-")
78+
// if err != nil {
79+
// return err
80+
// }
81+
// defer os.RemoveAll(tmpDir)
82+
//
83+
// if err := tar.ResolveSymlinks("/path/to/dir", tmpDir); err != nil {
84+
// return err
85+
// }
86+
//
87+
// if _, err := tar.Tar(tmpDir, w); err != nil {
88+
// return err
89+
// }
90+
//
91+
// For untrusted source trees, use ResolveSymlinksRoot to confine every
92+
// symlink target inside a caller-supplied rootDir. Targets that resolve
93+
// outside rootDir cause the call to fail without materializing them:
94+
//
95+
// err := tar.ResolveSymlinksRoot("/path/to/root", "/path/to/root/src", tmpDir)
96+
package tar

tar/options.go

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/*
2+
Copyright 2026 The Flux authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package tar
18+
19+
import "os"
20+
21+
// Option configures the behavior of Tar and Untar. Options are
22+
// silently ignored by operations they do not apply to.
23+
type Option func(*tarOpts)
24+
25+
type tarOpts struct {
26+
// maxUntarSize represents the limit size (bytes) for archives being decompressed by Untar.
27+
// When max is a negative value the size checks are disabled.
28+
maxUntarSize int
29+
30+
// skipSymlinks ignores symlinks instead of failing the decompression.
31+
skipSymlinks bool
32+
33+
// skipGzip disables gzip compression: Tar writes a plain tar stream,
34+
// and Untar reads one.
35+
skipGzip bool
36+
37+
// filter is called for each entry during archiving or extraction.
38+
// If it returns true, the entry is excluded.
39+
filter func(path string, fi os.FileInfo) bool
40+
}
41+
42+
// WithMaxUntarSize sets the limit size for archives being decompressed by Untar.
43+
// When max is equal or less than 0 disables size checks.
44+
func WithMaxUntarSize(max int) Option {
45+
return func(t *tarOpts) {
46+
t.maxUntarSize = max
47+
}
48+
}
49+
50+
// WithSkipSymlinks allows for symlinks to be present
51+
// in the tarball and skips them when decompressing.
52+
func WithSkipSymlinks() Option {
53+
return func(t *tarOpts) {
54+
t.skipSymlinks = true
55+
}
56+
}
57+
58+
// WithSkipGzip disables gzip compression: Tar writes a plain tar stream,
59+
// and Untar reads one.
60+
func WithSkipGzip() Option {
61+
return func(t *tarOpts) {
62+
t.skipGzip = true
63+
}
64+
}
65+
66+
// WithFilter sets a predicate called for each entry during archiving
67+
// or extraction. Entries for which fn returns true are excluded. During
68+
// Tar the path is the absolute filesystem path; during Untar it is the
69+
// slash-separated name from the tar header.
70+
func WithFilter(fn func(path string, fi os.FileInfo) bool) Option {
71+
return func(t *tarOpts) {
72+
t.filter = fn
73+
}
74+
}
75+
76+
// applyOpts applies the given Option to t.
77+
func (t *tarOpts) applyOpts(opts ...Option) {
78+
for _, opt := range opts {
79+
opt(t)
80+
}
81+
}

0 commit comments

Comments
 (0)