Skip to content

Commit 3c98b14

Browse files
author
Chris Morandi
committed
If applied, this commit will add an Extraction strategy to the ArtifactGenerator copy operations which will extract the content of matched tarball files to the output artifact. This is in reponse to the issue detailed here #301
Signed-off-by: Chris Morandi <chris.morandi@velocitaconsultancy.uk>
1 parent 37a803a commit 3c98b14

6 files changed

Lines changed: 951 additions & 14 deletions

File tree

api/v1beta1/artifactgenerator_types.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ const (
3737
SourceFetchFailedReason = "SourceFetchFailed"
3838
OverwriteStrategy = "Overwrite"
3939
MergeStrategy = "Merge"
40+
ExtractStrategy = "Extract"
4041
EnabledValue = "enabled"
4142
DisabledValue = "disabled"
4243
)
@@ -149,9 +150,13 @@ type CopyOperation struct {
149150
// Strategy specifies the copy strategy to use.
150151
// 'Overwrite' will overwrite existing files in the destination.
151152
// 'Merge' is for merging YAML files using Helm values merge strategy.
153+
// 'Extract' is for extracting the contents of tarball archives (.tar.gz, .tgz)
154+
// built with flux build artifact or helm package. When using glob patterns,
155+
// non-tarball files are silently skipped. For single file sources, the file
156+
// must be a tarball or an error is returned. Directories are not supported.
152157
// If not specified, defaults to 'Overwrite'.
153158
// +optional
154-
// +kubebuilder:validation:Enum=Overwrite;Merge
159+
// +kubebuilder:validation:Enum=Overwrite;Merge;Extract
155160
Strategy string `json:"strategy,omitempty"`
156161
}
157162

config/crd/bases/source.extensions.fluxcd.io_artifactgenerators.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,15 @@ spec:
8585
Strategy specifies the copy strategy to use.
8686
'Overwrite' will overwrite existing files in the destination.
8787
'Merge' is for merging YAML files using Helm values merge strategy.
88+
'Extract' is for extracting the contents of tarball archives (.tar.gz, .tgz)
89+
built with flux build artifact or helm package. When using glob patterns,
90+
non-tarball files are silently skipped. For single file sources, the file
91+
must be a tarball or an error is returned. Directories are not supported.
8892
If not specified, defaults to 'Overwrite'.
8993
enum:
9094
- Overwrite
9195
- Merge
96+
- Extract
9297
type: string
9398
to:
9499
description: |-

docs/spec/v1beta1/artifactgenerators.md

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,8 @@ Each copy operation specifies how to copy files from sources into the generated
268268
the root of the generated artifact and `path` is the relative path to a file or directory.
269269
- `exclude` (optional): A list of glob patterns to filter out from the source selection.
270270
Any file matched by `from` that also matches an exclude pattern will be ignored.
271-
- `strategy` (optional): Defines how to handle existing files at the destination,
272-
either `Overwrite` (default) or `Merge` (for YAML files only).
271+
- `strategy` (optional): Defines how to handle files during copy operations:
272+
`Overwrite` (default), `Merge` (for YAML files), or `Extract` (for tarball archives).
273273

274274
Copy operations use `cp`-like semantics:
275275

@@ -327,6 +327,35 @@ Example of copy with `Merge` strategy:
327327
**Note** that the merge strategy will replace _arrays_ entirely, the behavior is
328328
identical to how Helm merges `values.yaml` files when using multiple `--values` flags.
329329

330+
##### Extract Strategy
331+
332+
The `Extract` strategy is used for extracting the contents of tarball archives (`.tar.gz`, `.tgz`)
333+
built with `flux build artifact` or `helm package`. The tarball contents are extracted
334+
to the destination while preserving their internal directory structure.
335+
336+
Example of copy with `Extract` strategy:
337+
338+
```yaml
339+
# Extract a Helm chart tarball built with `helm package`
340+
- from: "@oci/podinfo-6.7.0.tgz"
341+
to: "@artifact/"
342+
strategy: Extract
343+
344+
# Extract multiple tarballs using glob patterns
345+
- from: "@source/charts/*.tgz"
346+
to: "@artifact/charts/"
347+
strategy: Extract
348+
349+
# Extract tarballs recursively from nested directories
350+
- from: "@source/releases/**/*.tgz"
351+
to: "@artifact/"
352+
strategy: Extract
353+
```
354+
355+
**Note** that when using glob patterns (including recursive `**` patterns) with the `Extract`
356+
strategy, non-tarball files are silently skipped. For single file sources, the file must have
357+
a `.tar.gz` or `.tgz` extension. Directories are not supported with this strategy.
358+
330359
## Working with ArtifactGenerators
331360

332361
### Suspend and Resume Reconciliation

internal/builder/builder.go

Lines changed: 59 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,12 @@ func applyCopyOperation(ctx context.Context,
175175

176176
if !isGlobPattern {
177177
// Direct path reference - check what it actually is first (cp-like behavior)
178-
return applySingleSourceCopy(ctx, op, srcRoot, srcPattern, stagingRoot, destRelPath, destEndsWithSlash)
178+
return applySingleSourceCopy(ctx, op, srcRoot, srcPattern, stagingRoot, stagingDir, destRelPath, destEndsWithSlash)
179179
}
180180

181181
// Glob pattern - find all matches and copy each
182-
matches, err := fs.Glob(srcRoot.FS(), srcPattern)
182+
matches, err := doublestar.Glob(srcRoot.FS(), srcPattern)
183+
183184
if err != nil {
184185
return fmt.Errorf("invalid glob pattern '%s': %w", srcPattern, err)
185186
}
@@ -188,12 +189,19 @@ func applyCopyOperation(ctx context.Context,
188189
return fmt.Errorf("no files match pattern '%s' in source '%s'", srcPattern, srcAlias)
189190
}
190191

191-
// Filter out excluded files
192+
// Filter out excluded files and special directory entries
192193
filteredMatches := make([]string, 0, len(matches))
193194
for _, match := range matches {
194-
if !shouldExclude(match, op.Exclude) {
195-
filteredMatches = append(filteredMatches, match)
195+
// Skip current directory and parent directory references
196+
// doublestar.Glob returns "." for patterns like "**" which would
197+
// cause the entire source to be copied, bypassing per-file strategies
198+
if match == "." || match == ".." {
199+
continue
200+
}
201+
if shouldExclude(match, op.Exclude) {
202+
continue
196203
}
204+
filteredMatches = append(filteredMatches, match)
197205
}
198206

199207
if len(filteredMatches) == 0 {
@@ -206,11 +214,25 @@ func applyCopyOperation(ctx context.Context,
206214
return err
207215
}
208216

209-
// Calculate destination path based on glob pattern type
210-
destFile := calculateGlobDestination(srcPattern, match, destRelPath)
211-
if err := copyFileWithRoots(ctx, op, srcRoot, match, stagingRoot, destFile); err != nil {
212-
return fmt.Errorf("failed to copy file '%s' to '%s': %w", match, destFile, err)
217+
// Handle Extract strategy for tarballs
218+
if op.Strategy == swapi.ExtractStrategy {
219+
if !isTarball(match) {
220+
// Ignore files that are not tarball archives
221+
continue
222+
}
223+
if err := extractTarball(ctx, srcRoot, match, stagingDir, destRelPath); err != nil {
224+
return fmt.Errorf("failed to extract tarball '%s' to '%s': %w", match, destRelPath, err)
225+
}
226+
227+
} else {
228+
// Calculate destination path based on glob pattern type
229+
destFile := calculateGlobDestination(srcPattern, match, destRelPath)
230+
231+
if err := copyFileWithRoots(ctx, op, srcRoot, match, stagingRoot, destFile); err != nil {
232+
return fmt.Errorf("failed to copy file '%s' to '%s': %w", match, destFile, err)
233+
}
213234
}
235+
214236
}
215237

216238
return nil
@@ -223,6 +245,7 @@ func applySingleSourceCopy(ctx context.Context,
223245
srcRoot *os.Root,
224246
srcPath string,
225247
stagingRoot *os.Root,
248+
stagingDir string,
226249
destPath string,
227250
destEndsWithSlash bool) error {
228251
// Clean the source path to handle trailing slashes
@@ -238,10 +261,14 @@ func applySingleSourceCopy(ctx context.Context,
238261
}
239262

240263
if srcInfo.IsDir() {
264+
// Extract strategy is not supported for directories
265+
if op.Strategy == swapi.ExtractStrategy {
266+
return fmt.Errorf("extract strategy is not supported for directories, got '%s'", srcPath)
267+
}
241268
return applySingleDirectoryCopy(ctx, op, srcRoot, srcPath, stagingRoot, destPath)
242-
} else {
243-
return applySingleFileCopy(ctx, op, srcRoot, srcPath, stagingRoot, destPath, destEndsWithSlash)
244269
}
270+
271+
return applySingleFileCopy(ctx, op, srcRoot, srcPath, stagingRoot, stagingDir, destPath, destEndsWithSlash)
245272
}
246273

247274
// applySingleFileCopy handles copying a single file using cp-like semantics:
@@ -252,12 +279,22 @@ func applySingleFileCopy(ctx context.Context,
252279
srcRoot *os.Root,
253280
srcPath string,
254281
stagingRoot *os.Root,
282+
stagingDir string,
255283
destPath string,
256284
destEndsWithSlash bool) error {
257285
// Check if the file should be excluded
258286
if shouldExclude(srcPath, op.Exclude) {
259287
return nil // Skip excluded file
260288
}
289+
290+
// Handle Extract strategy for tarballs
291+
if op.Strategy == swapi.ExtractStrategy {
292+
if !isTarball(srcPath) {
293+
return fmt.Errorf("extract strategy requires tarball file (.tar.gz or .tgz), got '%s'", srcPath)
294+
}
295+
return extractTarball(ctx, srcRoot, srcPath, stagingDir, destPath)
296+
}
297+
261298
var finalDestPath string
262299

263300
if destEndsWithSlash {
@@ -302,7 +339,9 @@ func containsGlobChars(path string) bool {
302339
// to match cp-like behavior for different glob patterns:
303340
// - dir/** patterns strip the directory prefix (like cp -r dir/** dest/)
304341
// - other patterns preserve the full match path
342+
// If stripFilename is true, only the directory structure is preserved (used for Extract strategy).
305343
func calculateGlobDestination(pattern, match, destPath string) string {
344+
306345
// Check if pattern ends with /** (recursive contents pattern)
307346
if strings.HasSuffix(pattern, "/**") {
308347
// Extract the directory prefix from pattern (everything before /**)
@@ -545,12 +584,21 @@ func shouldExclude(filePath string, excludePatterns []string) bool {
545584
return false
546585
}
547586

587+
fileName := filepath.Base(filePath)
588+
548589
for _, pattern := range excludePatterns {
549590
// We validate the patterns when parsing the copy operation,
550591
// so it's safe to use MatchUnvalidated here.
551592
if doublestar.MatchUnvalidated(pattern, filePath) {
552593
return true
553594
}
595+
// For simple patterns without path separators (e.g., "*.md"),
596+
// also match against just the filename. This provides a more
597+
// intuitive user experience where "*.md" excludes all markdown
598+
// files regardless of their directory depth.
599+
if !strings.Contains(pattern, "/") && doublestar.MatchUnvalidated(pattern, fileName) {
600+
return true
601+
}
554602
}
555603

556604
return false

internal/builder/extract.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
Copyright 2025 The Flux authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package builder
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"os"
23+
"path/filepath"
24+
"strings"
25+
26+
"github.com/fluxcd/pkg/tar"
27+
)
28+
29+
// tarballExtensions defines the recognized tarball file extensions.
30+
// These are the formats produced by:
31+
// - flux build artifact
32+
// - helm package
33+
//
34+
// Currently supported: .tar.gz and .tgz (gzip-compressed tar archives)
35+
var tarballExtensions = []string{".tar.gz", ".tgz"}
36+
37+
// isTarball checks if a file path has a recognized tarball extension.
38+
// The check is case-insensitive to handle variations like .TGZ or .Tar.Gz.
39+
func isTarball(path string) bool {
40+
lowerPath := strings.ToLower(path)
41+
for _, ext := range tarballExtensions {
42+
if strings.HasSuffix(lowerPath, ext) {
43+
return true
44+
}
45+
}
46+
return false
47+
}
48+
49+
// extractTarball extracts a tarball archive to the destination directory.
50+
// It uses fluxcd/pkg/tar.Untar for secure extraction which provides:
51+
// - Automatic gzip decompression
52+
// - Path traversal attack prevention
53+
// - Symlink security validation
54+
// - File permission preservation
55+
//
56+
// The tarball contents are extracted maintaining their internal directory structure.
57+
// If the destination directory doesn't exist, it will be created with 0755 permissions.
58+
func extractTarball(ctx context.Context,
59+
srcRoot *os.Root,
60+
srcPath string,
61+
stagingDir string,
62+
destPath string) error {
63+
if err := ctx.Err(); err != nil {
64+
return err
65+
}
66+
67+
// Open the tarball through the source root for secure file access
68+
srcFile, err := srcRoot.Open(srcPath)
69+
if err != nil {
70+
return fmt.Errorf("failed to open tarball %q: %w", srcPath, err)
71+
}
72+
defer srcFile.Close()
73+
74+
// Create the full destination path
75+
fullDestPath := filepath.Join(stagingDir, destPath)
76+
if err := os.MkdirAll(fullDestPath, 0o755); err != nil {
77+
return fmt.Errorf("failed to create destination directory %q: %w", fullDestPath, err)
78+
}
79+
80+
// Use fluxcd/pkg/tar.Untar for secure extraction
81+
if err := tar.Untar(srcFile, fullDestPath); err != nil {
82+
return fmt.Errorf("failed to extract tarball %q to %q: %w", srcPath, fullDestPath, err)
83+
}
84+
85+
return nil
86+
}

0 commit comments

Comments
 (0)