Skip to content

Commit 18982d4

Browse files
author
Chris Morandi
committed
If applied, this commit will add an Extraction strategy to the ArtifactGenerator copy operations which will extract the content of matched tarball files to the output artifact. This is in reponse to the issue detailed here #301
Signed-off-by: Chris Morandi <chris.morandi@velocitaconsultancy.uk>
1 parent 37a803a commit 18982d4

6 files changed

Lines changed: 961 additions & 15 deletions

File tree

api/v1beta1/artifactgenerator_types.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ const (
3737
SourceFetchFailedReason = "SourceFetchFailed"
3838
OverwriteStrategy = "Overwrite"
3939
MergeStrategy = "Merge"
40+
ExtractStrategy = "Extract"
4041
EnabledValue = "enabled"
4142
DisabledValue = "disabled"
4243
)
@@ -149,9 +150,13 @@ type CopyOperation struct {
149150
// Strategy specifies the copy strategy to use.
150151
// 'Overwrite' will overwrite existing files in the destination.
151152
// 'Merge' is for merging YAML files using Helm values merge strategy.
153+
// 'Extract' is for extracting the contents of tarball archives (.tar.gz, .tgz)
154+
// built with flux build artifact or helm package. When using glob patterns,
155+
// non-tarball files are silently skipped. For single file sources, the file
156+
// must be a tarball or an error is returned. Directories are not supported.
152157
// If not specified, defaults to 'Overwrite'.
153158
// +optional
154-
// +kubebuilder:validation:Enum=Overwrite;Merge
159+
// +kubebuilder:validation:Enum=Overwrite;Merge;Extract
155160
Strategy string `json:"strategy,omitempty"`
156161
}
157162

config/crd/bases/source.extensions.fluxcd.io_artifactgenerators.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,15 @@ spec:
8585
Strategy specifies the copy strategy to use.
8686
'Overwrite' will overwrite existing files in the destination.
8787
'Merge' is for merging YAML files using Helm values merge strategy.
88+
'Extract' is for extracting the contents of tarball archives (.tar.gz, .tgz)
89+
built with flux build artifact or helm package. When using glob patterns,
90+
non-tarball files are silently skipped. For single file sources, the file
91+
must be a tarball or an error is returned. Directories are not supported.
8892
If not specified, defaults to 'Overwrite'.
8993
enum:
9094
- Overwrite
9195
- Merge
96+
- Extract
9297
type: string
9398
to:
9499
description: |-

docs/spec/v1beta1/artifactgenerators.md

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,8 @@ Each copy operation specifies how to copy files from sources into the generated
268268
the root of the generated artifact and `path` is the relative path to a file or directory.
269269
- `exclude` (optional): A list of glob patterns to filter out from the source selection.
270270
Any file matched by `from` that also matches an exclude pattern will be ignored.
271-
- `strategy` (optional): Defines how to handle existing files at the destination,
272-
either `Overwrite` (default) or `Merge` (for YAML files only).
271+
- `strategy` (optional): Defines how to handle files during copy operations:
272+
`Overwrite` (default), `Merge` (for YAML files), or `Extract` (for tarball archives).
273273

274274
Copy operations use `cp`-like semantics:
275275

@@ -327,6 +327,35 @@ Example of copy with `Merge` strategy:
327327
**Note** that the merge strategy will replace _arrays_ entirely, the behavior is
328328
identical to how Helm merges `values.yaml` files when using multiple `--values` flags.
329329

330+
##### Extract Strategy
331+
332+
The `Extract` strategy is used for extracting the contents of tarball archives (`.tar.gz`, `.tgz`)
333+
built with `flux build artifact` or `helm package`. The tarball contents are extracted
334+
to the destination while preserving their internal directory structure.
335+
336+
Example of copy with `Extract` strategy:
337+
338+
```yaml
339+
# Extract a Helm chart tarball built with `helm package`
340+
- from: "@oci/podinfo-6.7.0.tgz"
341+
to: "@artifact/"
342+
strategy: Extract
343+
344+
# Extract multiple tarballs using glob patterns
345+
- from: "@source/charts/*.tgz"
346+
to: "@artifact/charts/"
347+
strategy: Extract
348+
349+
# Extract tarballs recursively from nested directories
350+
- from: "@source/releases/**/*.tgz"
351+
to: "@artifact/"
352+
strategy: Extract
353+
```
354+
355+
**Note** that when using glob patterns (including recursive `**` patterns) with the `Extract`
356+
strategy, non-tarball files are silently skipped. For single file sources, the file must have
357+
a `.tar.gz` or `.tgz` extension. Directories are not supported with this strategy.
358+
330359
## Working with ArtifactGenerators
331360

332361
### Suspend and Resume Reconciliation

internal/builder/builder.go

Lines changed: 69 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,18 @@ func applyCopyOperations(ctx context.Context,
127127
return nil
128128
}
129129

130+
// If the copy operation uses the Extract strategy, it uses doublestar.Glob as we do not need to walk the whole tree
131+
// otherwise we us std fs.Glob
132+
func getGlobMatchingEntries(op swapi.CopyOperation, srcRoot *os.Root, srcPattern string) ([]string, error) {
133+
if op.Strategy == swapi.ExtractStrategy {
134+
// Use doublestar.Glob for recursive and advanced glob patterns (e.g., **/*.tar.gz)
135+
return doublestar.Glob(srcRoot.FS(), srcPattern)
136+
} else {
137+
// Use fs.Glob for simple, non-recursive glob patterns
138+
return fs.Glob(srcRoot.FS(), srcPattern)
139+
}
140+
}
141+
130142
// applyCopyOperation applies a single copy operation from the sources to the staging directory.
131143
// This function implements cp-like semantics by first analyzing the source pattern to determine
132144
// if it's a glob, direct file/directory reference, or wildcard pattern, then making copy decisions
@@ -175,11 +187,11 @@ func applyCopyOperation(ctx context.Context,
175187

176188
if !isGlobPattern {
177189
// Direct path reference - check what it actually is first (cp-like behavior)
178-
return applySingleSourceCopy(ctx, op, srcRoot, srcPattern, stagingRoot, destRelPath, destEndsWithSlash)
190+
return applySingleSourceCopy(ctx, op, srcRoot, srcPattern, stagingRoot, stagingDir, destRelPath, destEndsWithSlash)
179191
}
180192

181-
// Glob pattern - find all matches and copy each
182-
matches, err := fs.Glob(srcRoot.FS(), srcPattern)
193+
matches, err := getGlobMatchingEntries(op, srcRoot, srcPattern)
194+
183195
if err != nil {
184196
return fmt.Errorf("invalid glob pattern '%s': %w", srcPattern, err)
185197
}
@@ -188,12 +200,19 @@ func applyCopyOperation(ctx context.Context,
188200
return fmt.Errorf("no files match pattern '%s' in source '%s'", srcPattern, srcAlias)
189201
}
190202

191-
// Filter out excluded files
203+
// Filter out excluded files and special directory entries
192204
filteredMatches := make([]string, 0, len(matches))
193205
for _, match := range matches {
194-
if !shouldExclude(match, op.Exclude) {
195-
filteredMatches = append(filteredMatches, match)
206+
// Skip current directory and parent directory references
207+
// doublestar.Glob returns "." for patterns like "**" which would
208+
// cause the entire source to be copied, bypassing per-file strategies
209+
if match == "." || match == ".." {
210+
continue
211+
}
212+
if shouldExclude(match, op.Exclude) {
213+
continue
196214
}
215+
filteredMatches = append(filteredMatches, match)
197216
}
198217

199218
if len(filteredMatches) == 0 {
@@ -206,11 +225,24 @@ func applyCopyOperation(ctx context.Context,
206225
return err
207226
}
208227

209-
// Calculate destination path based on glob pattern type
210-
destFile := calculateGlobDestination(srcPattern, match, destRelPath)
211-
if err := copyFileWithRoots(ctx, op, srcRoot, match, stagingRoot, destFile); err != nil {
212-
return fmt.Errorf("failed to copy file '%s' to '%s': %w", match, destFile, err)
228+
// Handle Extract strategy for tarballs
229+
if op.Strategy == swapi.ExtractStrategy {
230+
if !isTarball(match) {
231+
// Ignore files that are not tarball archives and directories
232+
continue
233+
}
234+
if err := extractTarball(ctx, srcRoot, match, stagingDir, destRelPath); err != nil {
235+
return fmt.Errorf("failed to extract tarball '%s' to '%s': %w", match, destRelPath, err)
236+
}
237+
} else {
238+
// Calculate destination path based on glob pattern type
239+
destFile := calculateGlobDestination(srcPattern, match, destRelPath)
240+
241+
if err := copyFileWithRoots(ctx, op, srcRoot, match, stagingRoot, destFile); err != nil {
242+
return fmt.Errorf("failed to copy file '%s' to '%s': %w", match, destFile, err)
243+
}
213244
}
245+
214246
}
215247

216248
return nil
@@ -223,6 +255,7 @@ func applySingleSourceCopy(ctx context.Context,
223255
srcRoot *os.Root,
224256
srcPath string,
225257
stagingRoot *os.Root,
258+
stagingDir string,
226259
destPath string,
227260
destEndsWithSlash bool) error {
228261
// Clean the source path to handle trailing slashes
@@ -238,10 +271,14 @@ func applySingleSourceCopy(ctx context.Context,
238271
}
239272

240273
if srcInfo.IsDir() {
274+
// Extract strategy is not supported for directories
275+
if op.Strategy == swapi.ExtractStrategy {
276+
return fmt.Errorf("extract strategy is not supported for directories, got '%s'", srcPath)
277+
}
241278
return applySingleDirectoryCopy(ctx, op, srcRoot, srcPath, stagingRoot, destPath)
242-
} else {
243-
return applySingleFileCopy(ctx, op, srcRoot, srcPath, stagingRoot, destPath, destEndsWithSlash)
244279
}
280+
281+
return applySingleFileCopy(ctx, op, srcRoot, srcPath, stagingRoot, stagingDir, destPath, destEndsWithSlash)
245282
}
246283

247284
// applySingleFileCopy handles copying a single file using cp-like semantics:
@@ -252,12 +289,22 @@ func applySingleFileCopy(ctx context.Context,
252289
srcRoot *os.Root,
253290
srcPath string,
254291
stagingRoot *os.Root,
292+
stagingDir string,
255293
destPath string,
256294
destEndsWithSlash bool) error {
257295
// Check if the file should be excluded
258296
if shouldExclude(srcPath, op.Exclude) {
259297
return nil // Skip excluded file
260298
}
299+
300+
// Handle Extract strategy for tarballs
301+
if op.Strategy == swapi.ExtractStrategy {
302+
if !isTarball(srcPath) {
303+
return fmt.Errorf("extract strategy requires tarball file (.tar.gz or .tgz), got '%s'", srcPath)
304+
}
305+
return extractTarball(ctx, srcRoot, srcPath, stagingDir, destPath)
306+
}
307+
261308
var finalDestPath string
262309

263310
if destEndsWithSlash {
@@ -303,6 +350,7 @@ func containsGlobChars(path string) bool {
303350
// - dir/** patterns strip the directory prefix (like cp -r dir/** dest/)
304351
// - other patterns preserve the full match path
305352
func calculateGlobDestination(pattern, match, destPath string) string {
353+
306354
// Check if pattern ends with /** (recursive contents pattern)
307355
if strings.HasSuffix(pattern, "/**") {
308356
// Extract the directory prefix from pattern (everything before /**)
@@ -545,12 +593,21 @@ func shouldExclude(filePath string, excludePatterns []string) bool {
545593
return false
546594
}
547595

596+
fileName := filepath.Base(filePath)
597+
548598
for _, pattern := range excludePatterns {
549599
// We validate the patterns when parsing the copy operation,
550600
// so it's safe to use MatchUnvalidated here.
551601
if doublestar.MatchUnvalidated(pattern, filePath) {
552602
return true
553603
}
604+
// For simple patterns without path separators (e.g., "*.md"),
605+
// also match against just the filename. This provides a more
606+
// intuitive user experience where "*.md" excludes all markdown
607+
// files regardless of their directory depth.
608+
if !strings.Contains(pattern, "/") && doublestar.MatchUnvalidated(pattern, fileName) {
609+
return true
610+
}
554611
}
555612

556613
return false

internal/builder/extract.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
Copyright 2025 The Flux authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package builder
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"os"
23+
"path/filepath"
24+
"strings"
25+
26+
"github.com/fluxcd/pkg/tar"
27+
)
28+
29+
// tarballExtensions defines the recognized tarball file extensions.
30+
// These are the formats produced by:
31+
// - flux build artifact
32+
// - helm package
33+
//
34+
// Currently supported: .tar.gz and .tgz (gzip-compressed tar archives)
35+
var tarballExtensions = []string{".tar.gz", ".tgz"}
36+
37+
// isTarball checks if a file path has a recognized tarball extension.
38+
// The check is case-insensitive to handle variations like .TGZ or .Tar.Gz.
39+
func isTarball(path string) bool {
40+
lowerPath := strings.ToLower(path)
41+
for _, ext := range tarballExtensions {
42+
if strings.HasSuffix(lowerPath, ext) {
43+
return true
44+
}
45+
}
46+
return false
47+
}
48+
49+
// extractTarball extracts a tarball archive to the destination directory.
50+
// It uses fluxcd/pkg/tar.Untar for secure extraction which provides:
51+
// - Automatic gzip decompression
52+
// - Path traversal attack prevention
53+
// - Symlink security validation
54+
// - File permission preservation
55+
//
56+
// The tarball contents are extracted maintaining their internal directory structure.
57+
// If the destination directory doesn't exist, it will be created with 0755 permissions.
58+
func extractTarball(ctx context.Context,
59+
srcRoot *os.Root,
60+
srcPath string,
61+
stagingDir string,
62+
destPath string) error {
63+
if err := ctx.Err(); err != nil {
64+
return err
65+
}
66+
67+
// Open the tarball through the source root for secure file access
68+
srcFile, err := srcRoot.Open(srcPath)
69+
if err != nil {
70+
return fmt.Errorf("failed to open tarball %q: %w", srcPath, err)
71+
}
72+
defer srcFile.Close()
73+
74+
// Create the full destination path
75+
fullDestPath := filepath.Join(stagingDir, destPath)
76+
if err := os.MkdirAll(fullDestPath, 0o755); err != nil {
77+
return fmt.Errorf("failed to create destination directory %q: %w", fullDestPath, err)
78+
}
79+
80+
// Use fluxcd/pkg/tar.Untar for secure extraction
81+
if err := tar.Untar(srcFile, fullDestPath); err != nil {
82+
return fmt.Errorf("failed to extract tarball %q to %q: %w", srcPath, fullDestPath, err)
83+
}
84+
85+
return nil
86+
}

0 commit comments

Comments
 (0)