Skip to content

Commit aa1ebf4

Browse files
JAORMXclaude
andcommitted
Add reproducible OCI skill packager
Implement the SkillPackager interface with deterministic OCI artifact creation from skill directories. Uses ocispec types from github.com/opencontainers/image-spec for OCI 1.1 compliance. Parses SKILL.md YAML frontmatter for metadata, creates reproducible tar.gz content layers, per-platform OCI configs (ocispec.Image) with skill metadata in labels, manifests (ocispec.Manifest) with annotations, and multi-platform image indexes (ocispec.Index). Key components: - tar.go: Deterministic tar (sorted entries, normalized headers, PAX format) - gzip.go: Deterministic gzip (OS=255, no name/comment, BestCompression) - packager.go: Full packaging pipeline using ocispec types with digest.FromBytes for diff IDs and specs.Versioned for schema version Security: rejects symlinks (files and directories), hardlinks, device entries, path traversal in both filesystem reads and tar extraction. Size limits on decompression and per-file extraction. Resolves: #16 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d2f369f commit aa1ebf4

7 files changed

Lines changed: 1939 additions & 1 deletion

File tree

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ require (
1010
github.com/stretchr/testify v1.11.1
1111
go.uber.org/mock v0.6.0
1212
golang.org/x/net v0.49.0
13+
gopkg.in/yaml.v3 v3.0.1
1314
oras.land/oras-go/v2 v2.6.0
1415
)
1516

@@ -25,5 +26,4 @@ require (
2526
google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect
2627
google.golang.org/genproto/googleapis/rpc v0.0.0-20240826202546-f6391c0de4c7 // indirect
2728
google.golang.org/protobuf v1.36.10 // indirect
28-
gopkg.in/yaml.v3 v3.0.1 // indirect
2929
)

oci/skills/gzip.go

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package skills
5+
6+
import (
7+
"bytes"
8+
"compress/gzip"
9+
"fmt"
10+
"io"
11+
"time"
12+
)
13+
14+
// gzipOSUnknown is the OS value for "unknown" in gzip headers (RFC 1952).
15+
// Using this value ensures cross-platform reproducibility.
16+
const gzipOSUnknown = 255
17+
18+
// GzipOptions configures reproducible gzip compression.
19+
type GzipOptions struct {
20+
// Level is the compression level (defaults to gzip.BestCompression).
21+
Level int
22+
23+
// Epoch is the modification time to use in the gzip header.
24+
// If zero, uses Unix epoch (1970-01-01) for reproducibility.
25+
Epoch time.Time
26+
}
27+
28+
// DefaultGzipOptions returns default options for reproducible gzip compression.
29+
func DefaultGzipOptions() GzipOptions {
30+
return GzipOptions{
31+
Level: gzip.BestCompression,
32+
Epoch: time.Unix(0, 0).UTC(),
33+
}
34+
}
35+
36+
// Compress creates a reproducible gzip compressed byte slice.
37+
// Headers are explicitly controlled for reproducibility:
38+
// - ModTime: uses opts.Epoch (defaults to Unix epoch)
39+
// - Name: empty (no filename)
40+
// - Comment: empty
41+
// - OS: 255 (unknown) for cross-platform consistency
42+
func Compress(data []byte, opts GzipOptions) ([]byte, error) {
43+
if opts.Level == 0 {
44+
opts.Level = gzip.BestCompression
45+
}
46+
47+
// Use Unix epoch if no epoch specified
48+
epoch := opts.Epoch
49+
if epoch.IsZero() {
50+
epoch = time.Unix(0, 0).UTC()
51+
}
52+
53+
var buf bytes.Buffer
54+
gw, err := gzip.NewWriterLevel(&buf, opts.Level)
55+
if err != nil {
56+
return nil, fmt.Errorf("creating gzip writer: %w", err)
57+
}
58+
59+
// Explicitly set header fields for reproducibility
60+
gw.ModTime = epoch
61+
gw.Name = ""
62+
gw.Comment = ""
63+
gw.OS = gzipOSUnknown
64+
65+
if _, err := gw.Write(data); err != nil {
66+
return nil, fmt.Errorf("writing gzip data: %w", err)
67+
}
68+
69+
if err := gw.Close(); err != nil {
70+
return nil, fmt.Errorf("closing gzip writer: %w", err)
71+
}
72+
73+
return buf.Bytes(), nil
74+
}
75+
76+
// MaxDecompressedSize is the maximum size of decompressed data (100MB).
77+
// This prevents decompression bombs.
78+
const MaxDecompressedSize = 100 * 1024 * 1024
79+
80+
// Decompress decompresses gzip data.
81+
func Decompress(data []byte) ([]byte, error) {
82+
return DecompressWithLimit(data, MaxDecompressedSize)
83+
}
84+
85+
// DecompressWithLimit decompresses gzip data with a size limit.
86+
func DecompressWithLimit(data []byte, maxSize int64) ([]byte, error) {
87+
gr, err := gzip.NewReader(bytes.NewReader(data))
88+
if err != nil {
89+
return nil, fmt.Errorf("creating gzip reader: %w", err)
90+
}
91+
defer func() { _ = gr.Close() }()
92+
93+
// Limit read size to prevent decompression bombs
94+
limitedReader := io.LimitReader(gr, maxSize+1)
95+
result, err := io.ReadAll(limitedReader)
96+
if err != nil {
97+
return nil, fmt.Errorf("reading gzip data: %w", err)
98+
}
99+
100+
if int64(len(result)) > maxSize {
101+
return nil, fmt.Errorf("decompressed data exceeds maximum size of %d bytes", maxSize)
102+
}
103+
104+
return result, nil
105+
}
106+
107+
// CompressTar creates a reproducible .tar.gz from the given files.
108+
func CompressTar(files []FileEntry, tarOpts TarOptions, gzipOpts GzipOptions) ([]byte, error) {
109+
tarData, err := CreateTar(files, tarOpts)
110+
if err != nil {
111+
return nil, fmt.Errorf("creating tar: %w", err)
112+
}
113+
114+
gzipData, err := Compress(tarData, gzipOpts)
115+
if err != nil {
116+
return nil, fmt.Errorf("compressing tar: %w", err)
117+
}
118+
119+
return gzipData, nil
120+
}
121+
122+
// DecompressTar extracts files from a .tar.gz archive.
123+
func DecompressTar(data []byte) ([]FileEntry, error) {
124+
tarData, err := Decompress(data)
125+
if err != nil {
126+
return nil, fmt.Errorf("decompressing gzip: %w", err)
127+
}
128+
129+
files, err := ExtractTar(tarData)
130+
if err != nil {
131+
return nil, fmt.Errorf("extracting tar: %w", err)
132+
}
133+
134+
return files, nil
135+
}

oci/skills/gzip_test.go

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package skills
5+
6+
import (
7+
"bytes"
8+
"compress/gzip"
9+
"testing"
10+
"time"
11+
12+
"github.com/stretchr/testify/assert"
13+
"github.com/stretchr/testify/require"
14+
)
15+
16+
func TestCompress_Reproducible(t *testing.T) {
17+
t.Parallel()
18+
19+
data := []byte("test data for compression")
20+
opts := DefaultGzipOptions()
21+
22+
gz1, err := Compress(data, opts)
23+
require.NoError(t, err)
24+
25+
gz2, err := Compress(data, opts)
26+
require.NoError(t, err)
27+
28+
assert.Equal(t, gz1, gz2, "Compress should produce identical output for same input")
29+
}
30+
31+
func TestCompress_HeaderFieldsForReproducibility(t *testing.T) {
32+
t.Parallel()
33+
34+
data := []byte("test data")
35+
epoch := time.Unix(1234567890, 0).UTC()
36+
opts := GzipOptions{
37+
Level: gzip.BestCompression,
38+
Epoch: epoch,
39+
}
40+
41+
compressed, err := Compress(data, opts)
42+
require.NoError(t, err)
43+
44+
gr, err := gzip.NewReader(bytes.NewReader(compressed))
45+
require.NoError(t, err)
46+
defer gr.Close()
47+
48+
assert.True(t, gr.ModTime.Equal(epoch), "ModTime should match epoch")
49+
assert.Empty(t, gr.Name, "Name should be empty")
50+
assert.Empty(t, gr.Comment, "Comment should be empty")
51+
assert.Equal(t, byte(gzipOSUnknown), gr.OS, "OS should be 255 (unknown)")
52+
}
53+
54+
func TestCompress_DifferentEpochs(t *testing.T) {
55+
t.Parallel()
56+
57+
data := []byte("test data")
58+
59+
tests := []struct {
60+
name string
61+
epoch1 time.Time
62+
epoch2 time.Time
63+
wantEqual bool
64+
}{
65+
{
66+
name: "same epoch produces same output",
67+
epoch1: time.Unix(1609459200, 0).UTC(),
68+
epoch2: time.Unix(1609459200, 0).UTC(),
69+
wantEqual: true,
70+
},
71+
{
72+
name: "different epochs produce different output",
73+
epoch1: time.Unix(0, 0).UTC(),
74+
epoch2: time.Unix(1000000, 0).UTC(),
75+
wantEqual: false,
76+
},
77+
}
78+
79+
for _, tt := range tests {
80+
t.Run(tt.name, func(t *testing.T) {
81+
t.Parallel()
82+
83+
opts1 := GzipOptions{Level: gzip.BestCompression, Epoch: tt.epoch1}
84+
opts2 := GzipOptions{Level: gzip.BestCompression, Epoch: tt.epoch2}
85+
86+
gz1, err := Compress(data, opts1)
87+
require.NoError(t, err)
88+
89+
gz2, err := Compress(data, opts2)
90+
require.NoError(t, err)
91+
92+
if tt.wantEqual {
93+
assert.Equal(t, gz1, gz2)
94+
} else {
95+
assert.NotEqual(t, gz1, gz2)
96+
}
97+
})
98+
}
99+
}
100+
101+
func TestCompress_SameEpochAlwaysReproducible(t *testing.T) {
102+
t.Parallel()
103+
104+
data := []byte("test data for reproducibility check")
105+
epoch := time.Unix(1609459200, 0).UTC()
106+
opts := GzipOptions{Level: gzip.BestCompression, Epoch: epoch}
107+
108+
results := make([][]byte, 5)
109+
for i := range results {
110+
var err error
111+
results[i], err = Compress(data, opts)
112+
require.NoError(t, err)
113+
}
114+
115+
for i := 1; i < len(results); i++ {
116+
assert.Equal(t, results[0], results[i], "iteration %d should match", i)
117+
}
118+
}
119+
120+
func TestCompressDecompress_RoundTrip(t *testing.T) {
121+
t.Parallel()
122+
123+
original := []byte("test data for round trip")
124+
opts := DefaultGzipOptions()
125+
126+
compressed, err := Compress(original, opts)
127+
require.NoError(t, err)
128+
129+
decompressed, err := Decompress(compressed)
130+
require.NoError(t, err)
131+
132+
assert.Equal(t, original, decompressed)
133+
}
134+
135+
func TestDecompressWithLimit_RejectsOversized(t *testing.T) {
136+
t.Parallel()
137+
138+
// Create compressed data that exceeds the limit when decompressed
139+
data := bytes.Repeat([]byte("x"), 1024)
140+
compressed, err := Compress(data, DefaultGzipOptions())
141+
require.NoError(t, err)
142+
143+
_, err = DecompressWithLimit(compressed, 100)
144+
assert.Error(t, err)
145+
assert.Contains(t, err.Error(), "exceeds maximum size")
146+
}
147+
148+
func TestCompressTar_Reproducible(t *testing.T) {
149+
t.Parallel()
150+
151+
files := []FileEntry{
152+
{Path: "b.txt", Content: []byte("content b")},
153+
{Path: "a.txt", Content: []byte("content a")},
154+
}
155+
156+
tarOpts := DefaultTarOptions()
157+
gzipOpts := DefaultGzipOptions()
158+
159+
gz1, err := CompressTar(files, tarOpts, gzipOpts)
160+
require.NoError(t, err)
161+
162+
gz2, err := CompressTar(files, tarOpts, gzipOpts)
163+
require.NoError(t, err)
164+
165+
assert.Equal(t, gz1, gz2, "CompressTar should produce identical output")
166+
}
167+
168+
func TestCompressTar_RoundTrip(t *testing.T) {
169+
t.Parallel()
170+
171+
originalFiles := []FileEntry{
172+
{Path: "a.txt", Content: []byte("content a")},
173+
{Path: "dir/b.txt", Content: []byte("content b")},
174+
}
175+
176+
tarOpts := DefaultTarOptions()
177+
gzipOpts := DefaultGzipOptions()
178+
179+
compressed, err := CompressTar(originalFiles, tarOpts, gzipOpts)
180+
require.NoError(t, err)
181+
182+
extractedFiles, err := DecompressTar(compressed)
183+
require.NoError(t, err)
184+
185+
require.Len(t, extractedFiles, len(originalFiles))
186+
for i, f := range extractedFiles {
187+
assert.Equal(t, originalFiles[i].Path, f.Path)
188+
assert.Equal(t, originalFiles[i].Content, f.Content)
189+
}
190+
}

0 commit comments

Comments
 (0)