Skip to content

Commit 41bfb71

Browse files
JAORMXclaude
andcommitted
Add reproducible OCI skill packager
Implement the SkillPackager interface with deterministic OCI artifact creation from skill directories. Parses SKILL.md YAML frontmatter for metadata, creates reproducible tar.gz content layers, per-platform OCI configs with skill metadata in labels, manifests with annotations, and multi-platform image indexes. Key components: - tar.go: Deterministic tar (sorted entries, normalized headers, PAX format) - gzip.go: Deterministic gzip (OS=255, no name/comment, BestCompression) - packager.go: Full packaging pipeline with directory reading and frontmatter parsing Security: rejects symlinks (files and directories), hardlinks, device entries, path traversal in both filesystem reads and tar extraction. Size limits on decompression and per-file extraction. Resolves: #16 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 1b6a967 commit 41bfb71

7 files changed

Lines changed: 1969 additions & 1 deletion

File tree

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ require (
99
github.com/stretchr/testify v1.11.1
1010
go.uber.org/mock v0.6.0
1111
golang.org/x/net v0.49.0
12+
gopkg.in/yaml.v3 v3.0.1
1213
)
1314

1415
require (
@@ -22,5 +23,4 @@ require (
2223
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect
2324
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
2425
google.golang.org/protobuf v1.36.10 // indirect
25-
gopkg.in/yaml.v3 v3.0.1 // indirect
2626
)

oci/skills/gzip.go

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package skills
5+
6+
import (
7+
"bytes"
8+
"compress/gzip"
9+
"fmt"
10+
"io"
11+
"time"
12+
)
13+
14+
// gzipOSUnknown is the OS value for "unknown" in gzip headers (RFC 1952).
15+
// Using this value ensures cross-platform reproducibility.
16+
const gzipOSUnknown = 255
17+
18+
// GzipOptions configures reproducible gzip compression.
19+
type GzipOptions struct {
20+
// Level is the compression level (defaults to gzip.BestCompression).
21+
Level int
22+
23+
// Epoch is the modification time to use in the gzip header.
24+
// If zero, uses Unix epoch (1970-01-01) for reproducibility.
25+
Epoch time.Time
26+
}
27+
28+
// DefaultGzipOptions returns default options for reproducible gzip compression.
29+
func DefaultGzipOptions() GzipOptions {
30+
return GzipOptions{
31+
Level: gzip.BestCompression,
32+
Epoch: time.Unix(0, 0).UTC(),
33+
}
34+
}
35+
36+
// Compress creates a reproducible gzip compressed byte slice.
37+
// Headers are explicitly controlled for reproducibility:
38+
// - ModTime: uses opts.Epoch (defaults to Unix epoch)
39+
// - Name: empty (no filename)
40+
// - Comment: empty
41+
// - OS: 255 (unknown) for cross-platform consistency
42+
func Compress(data []byte, opts GzipOptions) ([]byte, error) {
43+
if opts.Level == 0 {
44+
opts.Level = gzip.BestCompression
45+
}
46+
47+
// Use Unix epoch if no epoch specified
48+
epoch := opts.Epoch
49+
if epoch.IsZero() {
50+
epoch = time.Unix(0, 0).UTC()
51+
}
52+
53+
var buf bytes.Buffer
54+
gw, err := gzip.NewWriterLevel(&buf, opts.Level)
55+
if err != nil {
56+
return nil, fmt.Errorf("creating gzip writer: %w", err)
57+
}
58+
59+
// Explicitly set header fields for reproducibility
60+
gw.ModTime = epoch
61+
gw.Name = ""
62+
gw.Comment = ""
63+
gw.OS = gzipOSUnknown
64+
65+
if _, err := gw.Write(data); err != nil {
66+
return nil, fmt.Errorf("writing gzip data: %w", err)
67+
}
68+
69+
if err := gw.Close(); err != nil {
70+
return nil, fmt.Errorf("closing gzip writer: %w", err)
71+
}
72+
73+
return buf.Bytes(), nil
74+
}
75+
76+
// MaxDecompressedSize is the maximum size of decompressed data (100MB).
77+
// This prevents decompression bombs.
78+
const MaxDecompressedSize = 100 * 1024 * 1024
79+
80+
// Decompress decompresses gzip data.
81+
func Decompress(data []byte) ([]byte, error) {
82+
return DecompressWithLimit(data, MaxDecompressedSize)
83+
}
84+
85+
// DecompressWithLimit decompresses gzip data with a size limit.
86+
func DecompressWithLimit(data []byte, maxSize int64) ([]byte, error) {
87+
gr, err := gzip.NewReader(bytes.NewReader(data))
88+
if err != nil {
89+
return nil, fmt.Errorf("creating gzip reader: %w", err)
90+
}
91+
defer func() { _ = gr.Close() }()
92+
93+
// Limit read size to prevent decompression bombs
94+
limitedReader := io.LimitReader(gr, maxSize+1)
95+
result, err := io.ReadAll(limitedReader)
96+
if err != nil {
97+
return nil, fmt.Errorf("reading gzip data: %w", err)
98+
}
99+
100+
if int64(len(result)) > maxSize {
101+
return nil, fmt.Errorf("decompressed data exceeds maximum size of %d bytes", maxSize)
102+
}
103+
104+
return result, nil
105+
}
106+
107+
// CompressTar creates a reproducible .tar.gz from the given files.
108+
func CompressTar(files []FileEntry, tarOpts TarOptions, gzipOpts GzipOptions) ([]byte, error) {
109+
tarData, err := CreateTar(files, tarOpts)
110+
if err != nil {
111+
return nil, fmt.Errorf("creating tar: %w", err)
112+
}
113+
114+
gzipData, err := Compress(tarData, gzipOpts)
115+
if err != nil {
116+
return nil, fmt.Errorf("compressing tar: %w", err)
117+
}
118+
119+
return gzipData, nil
120+
}
121+
122+
// DecompressTar extracts files from a .tar.gz archive.
123+
func DecompressTar(data []byte) ([]FileEntry, error) {
124+
tarData, err := Decompress(data)
125+
if err != nil {
126+
return nil, fmt.Errorf("decompressing gzip: %w", err)
127+
}
128+
129+
files, err := ExtractTar(tarData)
130+
if err != nil {
131+
return nil, fmt.Errorf("extracting tar: %w", err)
132+
}
133+
134+
return files, nil
135+
}

oci/skills/gzip_test.go

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package skills
5+
6+
import (
7+
"bytes"
8+
"compress/gzip"
9+
"testing"
10+
"time"
11+
12+
"github.com/stretchr/testify/assert"
13+
"github.com/stretchr/testify/require"
14+
)
15+
16+
func TestCompress_Reproducible(t *testing.T) {
17+
t.Parallel()
18+
19+
data := []byte("test data for compression")
20+
opts := DefaultGzipOptions()
21+
22+
gz1, err := Compress(data, opts)
23+
require.NoError(t, err)
24+
25+
gz2, err := Compress(data, opts)
26+
require.NoError(t, err)
27+
28+
assert.Equal(t, gz1, gz2, "Compress should produce identical output for same input")
29+
}
30+
31+
func TestCompress_HeaderFieldsForReproducibility(t *testing.T) {
32+
t.Parallel()
33+
34+
data := []byte("test data")
35+
epoch := time.Unix(1234567890, 0).UTC()
36+
opts := GzipOptions{
37+
Level: gzip.BestCompression,
38+
Epoch: epoch,
39+
}
40+
41+
compressed, err := Compress(data, opts)
42+
require.NoError(t, err)
43+
44+
gr, err := gzip.NewReader(bytes.NewReader(compressed))
45+
require.NoError(t, err)
46+
defer gr.Close()
47+
48+
assert.True(t, gr.ModTime.Equal(epoch), "ModTime should match epoch")
49+
assert.Empty(t, gr.Name, "Name should be empty")
50+
assert.Empty(t, gr.Comment, "Comment should be empty")
51+
assert.Equal(t, byte(gzipOSUnknown), gr.OS, "OS should be 255 (unknown)")
52+
}
53+
54+
func TestCompress_DifferentEpochs(t *testing.T) {
55+
t.Parallel()
56+
57+
data := []byte("test data")
58+
59+
tests := []struct {
60+
name string
61+
epoch1 time.Time
62+
epoch2 time.Time
63+
wantEqual bool
64+
}{
65+
{
66+
name: "same epoch produces same output",
67+
epoch1: time.Unix(1609459200, 0).UTC(),
68+
epoch2: time.Unix(1609459200, 0).UTC(),
69+
wantEqual: true,
70+
},
71+
{
72+
name: "different epochs produce different output",
73+
epoch1: time.Unix(0, 0).UTC(),
74+
epoch2: time.Unix(1000000, 0).UTC(),
75+
wantEqual: false,
76+
},
77+
}
78+
79+
for _, tt := range tests {
80+
t.Run(tt.name, func(t *testing.T) {
81+
t.Parallel()
82+
83+
opts1 := GzipOptions{Level: gzip.BestCompression, Epoch: tt.epoch1}
84+
opts2 := GzipOptions{Level: gzip.BestCompression, Epoch: tt.epoch2}
85+
86+
gz1, err := Compress(data, opts1)
87+
require.NoError(t, err)
88+
89+
gz2, err := Compress(data, opts2)
90+
require.NoError(t, err)
91+
92+
if tt.wantEqual {
93+
assert.Equal(t, gz1, gz2)
94+
} else {
95+
assert.NotEqual(t, gz1, gz2)
96+
}
97+
})
98+
}
99+
}
100+
101+
func TestCompress_SameEpochAlwaysReproducible(t *testing.T) {
102+
t.Parallel()
103+
104+
data := []byte("test data for reproducibility check")
105+
epoch := time.Unix(1609459200, 0).UTC()
106+
opts := GzipOptions{Level: gzip.BestCompression, Epoch: epoch}
107+
108+
results := make([][]byte, 5)
109+
for i := range results {
110+
var err error
111+
results[i], err = Compress(data, opts)
112+
require.NoError(t, err)
113+
}
114+
115+
for i := 1; i < len(results); i++ {
116+
assert.Equal(t, results[0], results[i], "iteration %d should match", i)
117+
}
118+
}
119+
120+
func TestCompressDecompress_RoundTrip(t *testing.T) {
121+
t.Parallel()
122+
123+
original := []byte("test data for round trip")
124+
opts := DefaultGzipOptions()
125+
126+
compressed, err := Compress(original, opts)
127+
require.NoError(t, err)
128+
129+
decompressed, err := Decompress(compressed)
130+
require.NoError(t, err)
131+
132+
assert.Equal(t, original, decompressed)
133+
}
134+
135+
func TestDecompressWithLimit_RejectsOversized(t *testing.T) {
136+
t.Parallel()
137+
138+
// Create compressed data that exceeds the limit when decompressed
139+
data := bytes.Repeat([]byte("x"), 1024)
140+
compressed, err := Compress(data, DefaultGzipOptions())
141+
require.NoError(t, err)
142+
143+
_, err = DecompressWithLimit(compressed, 100)
144+
assert.Error(t, err)
145+
assert.Contains(t, err.Error(), "exceeds maximum size")
146+
}
147+
148+
func TestCompressTar_Reproducible(t *testing.T) {
149+
t.Parallel()
150+
151+
files := []FileEntry{
152+
{Path: "b.txt", Content: []byte("content b")},
153+
{Path: "a.txt", Content: []byte("content a")},
154+
}
155+
156+
tarOpts := DefaultTarOptions()
157+
gzipOpts := DefaultGzipOptions()
158+
159+
gz1, err := CompressTar(files, tarOpts, gzipOpts)
160+
require.NoError(t, err)
161+
162+
gz2, err := CompressTar(files, tarOpts, gzipOpts)
163+
require.NoError(t, err)
164+
165+
assert.Equal(t, gz1, gz2, "CompressTar should produce identical output")
166+
}
167+
168+
func TestCompressTar_RoundTrip(t *testing.T) {
169+
t.Parallel()
170+
171+
originalFiles := []FileEntry{
172+
{Path: "a.txt", Content: []byte("content a")},
173+
{Path: "dir/b.txt", Content: []byte("content b")},
174+
}
175+
176+
tarOpts := DefaultTarOptions()
177+
gzipOpts := DefaultGzipOptions()
178+
179+
compressed, err := CompressTar(originalFiles, tarOpts, gzipOpts)
180+
require.NoError(t, err)
181+
182+
extractedFiles, err := DecompressTar(compressed)
183+
require.NoError(t, err)
184+
185+
require.Len(t, extractedFiles, len(originalFiles))
186+
for i, f := range extractedFiles {
187+
assert.Equal(t, originalFiles[i].Path, f.Path)
188+
assert.Equal(t, originalFiles[i].Content, f.Content)
189+
}
190+
}

0 commit comments

Comments
 (0)