Skip to content

Commit f565d8f

Browse files
authored
Merge pull request #302 from chris-morandi/copy-operations-tarball-301
Extend copy operations with tarball extraction capabilities
2 parents 37a803a + b3f059c commit f565d8f

6 files changed

Lines changed: 959 additions & 15 deletions

File tree

api/v1beta1/artifactgenerator_types.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ const (
3737
SourceFetchFailedReason = "SourceFetchFailed"
3838
OverwriteStrategy = "Overwrite"
3939
MergeStrategy = "Merge"
40+
ExtractStrategy = "Extract"
4041
EnabledValue = "enabled"
4142
DisabledValue = "disabled"
4243
)
@@ -149,9 +150,12 @@ type CopyOperation struct {
149150
// Strategy specifies the copy strategy to use.
150151
// 'Overwrite' will overwrite existing files in the destination.
151152
// 'Merge' is for merging YAML files using Helm values merge strategy.
153+
// 'Extract' is for extracting the contents of tarball archives (.tar.gz, .tgz)
154+
// When using glob patterns, non-tarball files are silently skipped. For single file sources,
155+
// the file must be a tarball or an error is returned. Directories are not supported.
152156
// If not specified, defaults to 'Overwrite'.
153157
// +optional
154-
// +kubebuilder:validation:Enum=Overwrite;Merge
158+
// +kubebuilder:validation:Enum=Overwrite;Merge;Extract
155159
Strategy string `json:"strategy,omitempty"`
156160
}
157161

config/crd/bases/source.extensions.fluxcd.io_artifactgenerators.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,14 @@ spec:
8585
Strategy specifies the copy strategy to use.
8686
'Overwrite' will overwrite existing files in the destination.
8787
'Merge' is for merging YAML files using Helm values merge strategy.
88+
'Extract' is for extracting the contents of tarball archives (.tar.gz, .tgz)
89+
When using glob patterns, non-tarball files are silently skipped. For single file sources,
90+
the file must be a tarball or an error is returned. Directories are not supported.
8891
If not specified, defaults to 'Overwrite'.
8992
enum:
9093
- Overwrite
9194
- Merge
95+
- Extract
9296
type: string
9397
to:
9498
description: |-

docs/spec/v1beta1/artifactgenerators.md

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,8 @@ Each copy operation specifies how to copy files from sources into the generated
268268
the root of the generated artifact and `path` is the relative path to a file or directory.
269269
- `exclude` (optional): A list of glob patterns to filter out from the source selection.
270270
Any file matched by `from` that also matches an exclude pattern will be ignored.
271-
- `strategy` (optional): Defines how to handle existing files at the destination,
272-
either `Overwrite` (default) or `Merge` (for YAML files only).
271+
- `strategy` (optional): Defines how to handle files during copy operations:
272+
`Overwrite` (default), `Merge` (for YAML files), or `Extract` (for tarball archives).
273273

274274
Copy operations use `cp`-like semantics:
275275

@@ -327,6 +327,35 @@ Example of copy with `Merge` strategy:
327327
**Note** that the merge strategy will replace _arrays_ entirely, the behavior is
328328
identical to how Helm merges `values.yaml` files when using multiple `--values` flags.
329329

330+
##### Extract Strategy
331+
332+
The `Extract` strategy is used for extracting the contents of tarball archives (`.tar.gz`, `.tgz`)
333+
built with `flux build artifact` or `helm package`. The tarball contents are extracted
334+
to the destination while preserving their internal directory structure.
335+
336+
Example of copy with `Extract` strategy:
337+
338+
```yaml
339+
# Extract a Helm chart tarball built with `helm package`
340+
- from: "@oci/podinfo-6.7.0.tgz"
341+
to: "@artifact/"
342+
strategy: Extract
343+
344+
# Extract multiple tarballs using glob patterns
345+
- from: "@source/charts/*.tgz"
346+
to: "@artifact/charts/"
347+
strategy: Extract
348+
349+
# Extract tarballs recursively from nested directories
350+
- from: "@source/releases/**/*.tgz"
351+
to: "@artifact/"
352+
strategy: Extract
353+
```
354+
355+
**Note** that when using glob patterns (including recursive `**` patterns) with the `Extract`
356+
strategy, non-tarball files are silently skipped. For single file sources, the file must have
357+
a `.tar.gz` or `.tgz` extension. Directories are not supported with this strategy.
358+
330359
## Working with ArtifactGenerators
331360

332361
### Suspend and Resume Reconciliation

internal/builder/builder.go

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,18 @@ func applyCopyOperations(ctx context.Context,
127127
return nil
128128
}
129129

130+
// If the copy operation uses the Extract strategy, it uses doublestar.Glob as we do not need to walk the whole tree
131+
// otherwise we us std fs.Glob
132+
func getGlobMatchingEntries(op swapi.CopyOperation, srcRoot *os.Root, srcPattern string) ([]string, error) {
133+
if op.Strategy == swapi.ExtractStrategy {
134+
// Use doublestar.Glob for recursive and advanced glob patterns (e.g., **/*.tar.gz)
135+
return doublestar.Glob(srcRoot.FS(), srcPattern)
136+
} else {
137+
// Use fs.Glob for simple, non-recursive glob patterns
138+
return fs.Glob(srcRoot.FS(), srcPattern)
139+
}
140+
}
141+
130142
// applyCopyOperation applies a single copy operation from the sources to the staging directory.
131143
// This function implements cp-like semantics by first analyzing the source pattern to determine
132144
// if it's a glob, direct file/directory reference, or wildcard pattern, then making copy decisions
@@ -175,11 +187,11 @@ func applyCopyOperation(ctx context.Context,
175187

176188
if !isGlobPattern {
177189
// Direct path reference - check what it actually is first (cp-like behavior)
178-
return applySingleSourceCopy(ctx, op, srcRoot, srcPattern, stagingRoot, destRelPath, destEndsWithSlash)
190+
return applySingleSourceCopy(ctx, op, srcRoot, srcPattern, stagingRoot, stagingDir, destRelPath, destEndsWithSlash)
179191
}
180192

181-
// Glob pattern - find all matches and copy each
182-
matches, err := fs.Glob(srcRoot.FS(), srcPattern)
193+
matches, err := getGlobMatchingEntries(op, srcRoot, srcPattern)
194+
183195
if err != nil {
184196
return fmt.Errorf("invalid glob pattern '%s': %w", srcPattern, err)
185197
}
@@ -188,12 +200,19 @@ func applyCopyOperation(ctx context.Context,
188200
return fmt.Errorf("no files match pattern '%s' in source '%s'", srcPattern, srcAlias)
189201
}
190202

191-
// Filter out excluded files
203+
// Filter out excluded files and special directory entries
192204
filteredMatches := make([]string, 0, len(matches))
193205
for _, match := range matches {
194-
if !shouldExclude(match, op.Exclude) {
195-
filteredMatches = append(filteredMatches, match)
206+
// Skip current directory and parent directory references
207+
// doublestar.Glob returns "." for patterns like "**" which would
208+
// cause the entire source to be copied, bypassing per-file strategies
209+
if match == "." || match == ".." {
210+
continue
211+
}
212+
if shouldExclude(match, op.Exclude) {
213+
continue
196214
}
215+
filteredMatches = append(filteredMatches, match)
197216
}
198217

199218
if len(filteredMatches) == 0 {
@@ -206,10 +225,22 @@ func applyCopyOperation(ctx context.Context,
206225
return err
207226
}
208227

209-
// Calculate destination path based on glob pattern type
210-
destFile := calculateGlobDestination(srcPattern, match, destRelPath)
211-
if err := copyFileWithRoots(ctx, op, srcRoot, match, stagingRoot, destFile); err != nil {
212-
return fmt.Errorf("failed to copy file '%s' to '%s': %w", match, destFile, err)
228+
// Handle Extract strategy for tarballs
229+
if op.Strategy == swapi.ExtractStrategy {
230+
if !isTarball(match) {
231+
// Ignore files that are not tarball archives and directories
232+
continue
233+
}
234+
if err := extractTarball(ctx, srcRoot, match, stagingDir, destRelPath); err != nil {
235+
return fmt.Errorf("failed to extract tarball '%s' to '%s': %w", match, destRelPath, err)
236+
}
237+
} else {
238+
// Calculate destination path based on glob pattern type
239+
destFile := calculateGlobDestination(srcPattern, match, destRelPath)
240+
241+
if err := copyFileWithRoots(ctx, op, srcRoot, match, stagingRoot, destFile); err != nil {
242+
return fmt.Errorf("failed to copy file '%s' to '%s': %w", match, destFile, err)
243+
}
213244
}
214245
}
215246

@@ -223,6 +254,7 @@ func applySingleSourceCopy(ctx context.Context,
223254
srcRoot *os.Root,
224255
srcPath string,
225256
stagingRoot *os.Root,
257+
stagingDir string,
226258
destPath string,
227259
destEndsWithSlash bool) error {
228260
// Clean the source path to handle trailing slashes
@@ -238,10 +270,14 @@ func applySingleSourceCopy(ctx context.Context,
238270
}
239271

240272
if srcInfo.IsDir() {
273+
// Extract strategy is not supported for directories
274+
if op.Strategy == swapi.ExtractStrategy {
275+
return fmt.Errorf("extract strategy is not supported for directories, got '%s'", srcPath)
276+
}
241277
return applySingleDirectoryCopy(ctx, op, srcRoot, srcPath, stagingRoot, destPath)
242-
} else {
243-
return applySingleFileCopy(ctx, op, srcRoot, srcPath, stagingRoot, destPath, destEndsWithSlash)
244278
}
279+
280+
return applySingleFileCopy(ctx, op, srcRoot, srcPath, stagingRoot, stagingDir, destPath, destEndsWithSlash)
245281
}
246282

247283
// applySingleFileCopy handles copying a single file using cp-like semantics:
@@ -252,12 +288,22 @@ func applySingleFileCopy(ctx context.Context,
252288
srcRoot *os.Root,
253289
srcPath string,
254290
stagingRoot *os.Root,
291+
stagingDir string,
255292
destPath string,
256293
destEndsWithSlash bool) error {
257294
// Check if the file should be excluded
258295
if shouldExclude(srcPath, op.Exclude) {
259296
return nil // Skip excluded file
260297
}
298+
299+
// Handle Extract strategy for tarballs
300+
if op.Strategy == swapi.ExtractStrategy {
301+
if !isTarball(srcPath) {
302+
return fmt.Errorf("extract strategy requires tarball file (.tar.gz or .tgz), got '%s'", srcPath)
303+
}
304+
return extractTarball(ctx, srcRoot, srcPath, stagingDir, destPath)
305+
}
306+
261307
var finalDestPath string
262308

263309
if destEndsWithSlash {
@@ -303,6 +349,7 @@ func containsGlobChars(path string) bool {
303349
// - dir/** patterns strip the directory prefix (like cp -r dir/** dest/)
304350
// - other patterns preserve the full match path
305351
func calculateGlobDestination(pattern, match, destPath string) string {
352+
306353
// Check if pattern ends with /** (recursive contents pattern)
307354
if strings.HasSuffix(pattern, "/**") {
308355
// Extract the directory prefix from pattern (everything before /**)
@@ -545,12 +592,21 @@ func shouldExclude(filePath string, excludePatterns []string) bool {
545592
return false
546593
}
547594

595+
fileName := filepath.Base(filePath)
596+
548597
for _, pattern := range excludePatterns {
549598
// We validate the patterns when parsing the copy operation,
550599
// so it's safe to use MatchUnvalidated here.
551600
if doublestar.MatchUnvalidated(pattern, filePath) {
552601
return true
553602
}
603+
// For simple patterns without path separators (e.g., "*.md"),
604+
// also match against just the filename. This provides a more
605+
// intuitive user experience where "*.md" excludes all markdown
606+
// files regardless of their directory depth.
607+
if !strings.Contains(pattern, "/") && doublestar.MatchUnvalidated(pattern, fileName) {
608+
return true
609+
}
554610
}
555611

556612
return false

internal/builder/extract.go

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
Copyright 2025 The Flux authors
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package builder
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"os"
23+
"path/filepath"
24+
"strings"
25+
26+
"github.com/fluxcd/pkg/tar"
27+
)
28+
29+
// tarballExtensions defines the recognized tarball file extensions.
30+
// These are the formats produced by:
31+
// - flux build artifact
32+
// - helm package
33+
//
34+
// Currently supported: .tar.gz and .tgz (gzip-compressed tar archives)
35+
var tarballExtensions = []string{".tar.gz", ".tgz"}
36+
37+
// isTarball checks if a file path has a recognized tarball extension.
38+
// The check is case-insensitive to handle variations like .TGZ or .Tar.Gz.
39+
func isTarball(path string) bool {
40+
lowerPath := strings.ToLower(path)
41+
for _, ext := range tarballExtensions {
42+
if strings.HasSuffix(lowerPath, ext) {
43+
return true
44+
}
45+
}
46+
return false
47+
}
48+
49+
// extractTarball extracts a tarball archive to the destination directory.
50+
// It uses fluxcd/pkg/tar.Untar for secure extraction which provides:
51+
// - Automatic gzip decompression
52+
// - Path traversal attack prevention
53+
// - Symlink security validation
54+
// - File permission preservation
55+
//
56+
// The tarball contents are extracted maintaining their internal directory structure.
57+
// If the destination directory doesn't exist, it will be created with 0755 permissions.
58+
func extractTarball(ctx context.Context,
59+
srcRoot *os.Root,
60+
srcPath string,
61+
stagingDir string,
62+
destPath string) error {
63+
if err := ctx.Err(); err != nil {
64+
return err
65+
}
66+
67+
// Open the tarball through the source root for secure file access
68+
srcFile, err := srcRoot.Open(srcPath)
69+
if err != nil {
70+
return fmt.Errorf("failed to open tarball %q: %w", srcPath, err)
71+
}
72+
defer srcFile.Close()
73+
74+
// Create the full destination path
75+
fullDestPath := filepath.Join(stagingDir, destPath)
76+
if err := os.MkdirAll(fullDestPath, 0o755); err != nil {
77+
return fmt.Errorf("failed to create destination directory %q: %w", fullDestPath, err)
78+
}
79+
80+
// Use fluxcd/pkg/tar.Untar for secure extraction
81+
if err := tar.Untar(srcFile, fullDestPath); err != nil {
82+
return fmt.Errorf("failed to extract tarball %q to %q: %w", srcPath, fullDestPath, err)
83+
}
84+
85+
return nil
86+
}

0 commit comments

Comments
 (0)