Skip to content

Commit ddbfe47

Browse files
authored
Merge pull request #133 from stacklok/feat/oci-artifact-shared-primitives
oci: extract artifact-agnostic primitives into oci/artifact (Phase 0, THV-0077)
2 parents b2eae46 + c58e7ca commit ddbfe47

19 files changed

Lines changed: 1108 additions & 363 deletions

CLAUDE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ task license-fix # Add missing license headers
7272
| `env` | Environment variable abstraction with `Reader` interface for testable code |
7373
| `httperr` | Wrap errors with HTTP status codes; use `WithCode()`, `Code()`, `New()` |
7474
| `logging` | Pre-configured `*slog.Logger` factory with consistent ToolHive defaults (Alpha) |
75+
| `oci/artifact` | Artifact-agnostic OCI tar/gzip/extraction/platform primitives shared by oci/skills and oci/plugins (Alpha) |
7576
| `oci/skills` | OCI artifact types, media types, and registry operations for ToolHive skills (Alpha) |
7677
| `postgres` | PostgreSQL connection pool with optional AWS RDS IAM dynamic auth (Alpha) |
7778
| `recovery` | HTTP panic recovery middleware (Beta) |

oci/artifact/doc.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
/*
5+
Package artifact provides artifact-agnostic OCI primitives shared by the
6+
ToolHive ecosystem: reproducible tar archive creation and extraction,
7+
reproducible gzip compression, OCI platform helpers, and pull-hardening
8+
(size/count/digest validation) for registry operations.
9+
10+
These primitives are independent of any particular artifact type (skills,
11+
plugins, etc.). Artifact-specific media types, labels, and annotations live in
12+
the packages that define those artifacts (for example oci/skills).
13+
14+
# Reproducible Archives
15+
16+
CreateTar and Compress produce byte-stable output for identical input, which is
17+
what makes artifact digests deterministic:
18+
19+
data, err := artifact.CompressTar(files, artifact.DefaultTarOptions(), artifact.DefaultGzipOptions())
20+
21+
# Platform Helpers
22+
23+
PlatformString and ParsePlatform convert between OCI platform values and their
24+
"os/arch" or "os/arch/variant" string form.
25+
26+
# Pull Hardening
27+
28+
ValidatingTarget wraps an oras.Target and enforces size and structure limits on
29+
pushed content, defending against OOM and resource exhaustion from malicious
30+
registries during pull operations.
31+
32+
# Stability
33+
34+
This package is Alpha. Breaking changes are possible between minor versions.
35+
*/
36+
package artifact
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
22
// SPDX-License-Identifier: Apache-2.0
33

4-
package skills
4+
package artifact
55

66
import (
77
"bytes"
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
22
// SPDX-License-Identifier: Apache-2.0
33

4-
package skills
4+
package artifact
55

66
import (
77
"bytes"

oci/artifact/platform.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package artifact
5+
6+
import (
7+
"fmt"
8+
"strings"
9+
10+
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
11+
)
12+
13+
// PlatformString returns the platform in "os/arch" or "os/arch/variant" format.
14+
func PlatformString(p ocispec.Platform) string {
15+
s := p.OS + "/" + p.Architecture
16+
if p.Variant != "" {
17+
s += "/" + p.Variant
18+
}
19+
return s
20+
}
21+
22+
// ParsePlatform parses a platform string in "os/arch" or "os/arch/variant" format.
23+
func ParsePlatform(s string) (ocispec.Platform, error) {
24+
parts := strings.Split(s, "/")
25+
if len(parts) < 2 || len(parts) > 3 {
26+
return ocispec.Platform{}, fmt.Errorf("invalid platform format: %q (expected os/arch or os/arch/variant)", s)
27+
}
28+
osName := strings.TrimSpace(parts[0])
29+
arch := strings.TrimSpace(parts[1])
30+
if osName == "" || arch == "" {
31+
return ocispec.Platform{}, fmt.Errorf("invalid platform format: %q (os and arch cannot be empty)", s)
32+
}
33+
p := ocispec.Platform{OS: osName, Architecture: arch}
34+
if len(parts) == 3 {
35+
variant := strings.TrimSpace(parts[2])
36+
if variant == "" {
37+
return ocispec.Platform{}, fmt.Errorf("invalid platform format: %q (variant cannot be empty)", s)
38+
}
39+
p.Variant = variant
40+
}
41+
return p, nil
42+
}
43+
44+
// OS and architecture constants for OCI platform specifications.
45+
const (
46+
// OSLinux is the Linux OS identifier used in OCI platform specs.
47+
OSLinux = "linux"
48+
// ArchAMD64 is the x86-64 architecture identifier used in OCI platform specs.
49+
ArchAMD64 = "amd64"
50+
// ArchARM64 is the 64-bit ARM architecture identifier used in OCI platform specs.
51+
ArchARM64 = "arm64"
52+
)
53+
54+
// DefaultPlatforms are the default platforms for artifacts.
55+
// These cover most Kubernetes clusters.
56+
var DefaultPlatforms = []ocispec.Platform{
57+
{OS: OSLinux, Architecture: ArchAMD64},
58+
{OS: OSLinux, Architecture: ArchARM64},
59+
}

oci/artifact/platform_test.go

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package artifact
5+
6+
import (
7+
"testing"
8+
9+
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
10+
"github.com/stretchr/testify/assert"
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
// testArchARM is the 32-bit ARM architecture identifier used in test platform specs.
15+
const testArchARM = "arm"
16+
17+
func TestParsePlatform(t *testing.T) {
18+
t.Parallel()
19+
20+
tests := []struct {
21+
name string
22+
input string
23+
want ocispec.Platform
24+
wantErr bool
25+
}{
26+
{
27+
name: "os/arch",
28+
input: "linux/amd64",
29+
want: ocispec.Platform{OS: OSLinux, Architecture: ArchAMD64},
30+
},
31+
{
32+
name: "os/arch/variant",
33+
input: "linux/arm/v7",
34+
want: ocispec.Platform{OS: OSLinux, Architecture: testArchARM, Variant: "v7"},
35+
},
36+
{
37+
name: "fewer than 2 parts (no slash)",
38+
input: "linuxamd64",
39+
wantErr: true,
40+
},
41+
{
42+
name: "more than 3 parts",
43+
input: "linux/amd64/v8/extra",
44+
wantErr: true,
45+
},
46+
{
47+
name: "empty os",
48+
input: "/amd64",
49+
wantErr: true,
50+
},
51+
{
52+
name: "empty arch",
53+
input: "linux/",
54+
wantErr: true,
55+
},
56+
{
57+
name: "empty variant",
58+
input: "linux/arm/",
59+
wantErr: true,
60+
},
61+
}
62+
63+
for _, tt := range tests {
64+
t.Run(tt.name, func(t *testing.T) {
65+
t.Parallel()
66+
67+
got, err := ParsePlatform(tt.input)
68+
if tt.wantErr {
69+
require.Error(t, err)
70+
return
71+
}
72+
require.NoError(t, err)
73+
assert.Equal(t, tt.want, got)
74+
})
75+
}
76+
}
77+
78+
func TestPlatformString(t *testing.T) {
79+
t.Parallel()
80+
81+
tests := []struct {
82+
name string
83+
platform ocispec.Platform
84+
want string
85+
}{
86+
{
87+
name: "os/arch",
88+
platform: ocispec.Platform{OS: OSLinux, Architecture: ArchAMD64},
89+
want: "linux/amd64",
90+
},
91+
{
92+
name: "os/arch/variant",
93+
platform: ocispec.Platform{OS: OSLinux, Architecture: testArchARM, Variant: "v7"},
94+
want: "linux/arm/v7",
95+
},
96+
}
97+
98+
for _, tt := range tests {
99+
t.Run(tt.name, func(t *testing.T) {
100+
t.Parallel()
101+
assert.Equal(t, tt.want, PlatformString(tt.platform))
102+
})
103+
}
104+
}
105+
106+
func TestParsePlatform_PlatformString_Roundtrip(t *testing.T) {
107+
t.Parallel()
108+
109+
platforms := []ocispec.Platform{
110+
{OS: OSLinux, Architecture: ArchAMD64},
111+
{OS: OSLinux, Architecture: ArchARM64},
112+
{OS: OSLinux, Architecture: testArchARM, Variant: "v7"},
113+
}
114+
115+
for _, p := range platforms {
116+
parsed, err := ParsePlatform(PlatformString(p))
117+
require.NoError(t, err)
118+
assert.Equal(t, p, parsed)
119+
}
120+
}
121+
122+
func TestDefaultPlatforms(t *testing.T) {
123+
t.Parallel()
124+
125+
require.Len(t, DefaultPlatforms, 2)
126+
assert.Equal(t, ocispec.Platform{OS: OSLinux, Architecture: ArchAMD64}, DefaultPlatforms[0])
127+
assert.Equal(t, ocispec.Platform{OS: OSLinux, Architecture: ArchARM64}, DefaultPlatforms[1])
128+
}
Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
22
// SPDX-License-Identifier: Apache-2.0
33

4-
package skills
4+
package artifact
55

66
import (
77
"archive/tar"
@@ -112,7 +112,8 @@ func ExtractTarWithLimit(data []byte, maxFileSize int64) ([]FileEntry, error) {
112112
}
113113

114114
// Reject path traversal
115-
if err := validateTarPath(hdr.Name); err != nil {
115+
cleanedPath, err := validateTarPath(hdr.Name)
116+
if err != nil {
116117
return nil, err
117118
}
118119

@@ -148,7 +149,7 @@ func ExtractTarWithLimit(data []byte, maxFileSize int64) ([]FileEntry, error) {
148149
}
149150

150151
files = append(files, FileEntry{
151-
Path: hdr.Name,
152+
Path: cleanedPath,
152153
Content: content,
153154
Mode: hdr.Mode,
154155
})
@@ -157,16 +158,20 @@ func ExtractTarWithLimit(data []byte, maxFileSize int64) ([]FileEntry, error) {
157158
return files, nil
158159
}
159160

160-
// validateTarPath checks that a tar entry path is safe.
161-
func validateTarPath(p string) error {
161+
// validateTarPath checks that a tar entry path is safe and returns its cleaned path.
162+
func validateTarPath(p string) (string, error) {
163+
if strings.Contains(p, `\\`) {
164+
return "", fmt.Errorf("backslash path separators not allowed in archive: %s", p)
165+
}
166+
162167
// path.Clean resolves all ".." segments; any remaining leading ".."
163168
// means the path escapes the archive root.
164169
cleaned := path.Clean(p)
165-
if strings.HasPrefix(cleaned, "..") {
166-
return fmt.Errorf("path traversal detected in archive: %s", p)
170+
if cleaned == ".." || strings.HasPrefix(cleaned, "../") {
171+
return "", fmt.Errorf("path traversal detected in archive: %s", p)
167172
}
168173
if path.IsAbs(cleaned) {
169-
return fmt.Errorf("absolute path not allowed in archive: %s", p)
174+
return "", fmt.Errorf("absolute path not allowed in archive: %s", p)
170175
}
171-
return nil
176+
return cleaned, nil
172177
}
Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
22
// SPDX-License-Identifier: Apache-2.0
33

4-
package skills
4+
package artifact
55

66
import (
77
"archive/tar"
@@ -205,6 +205,7 @@ func TestExtractTar_RejectsPathTraversal(t *testing.T) {
205205
{name: "dotdot prefix", path: "../etc/passwd"},
206206
{name: "dotdot in middle", path: "foo/../../etc/passwd"},
207207
{name: "absolute path", path: "/etc/passwd"},
208+
{name: "windows traversal", path: `foo\\..\\..\\etc\\passwd`},
208209
}
209210

210211
for _, tt := range tests {
@@ -231,6 +232,30 @@ func TestExtractTar_RejectsPathTraversal(t *testing.T) {
231232
}
232233
}
233234

235+
func TestExtractTar_CleansPath(t *testing.T) {
236+
t.Parallel()
237+
238+
var buf bytes.Buffer
239+
tw := tar.NewWriter(&buf)
240+
241+
content := []byte("test")
242+
require.NoError(t, tw.WriteHeader(&tar.Header{
243+
Name: "foo/./bar.txt",
244+
Size: int64(len(content)),
245+
Typeflag: tar.TypeReg,
246+
Mode: 0644,
247+
}))
248+
_, err := tw.Write(content)
249+
require.NoError(t, err)
250+
require.NoError(t, tw.Close())
251+
252+
files, err := ExtractTar(buf.Bytes())
253+
require.NoError(t, err)
254+
require.Len(t, files, 1)
255+
assert.Equal(t, "foo/bar.txt", files[0].Path)
256+
assert.Equal(t, content, files[0].Content)
257+
}
258+
234259
func TestExtractTarWithLimit_RejectsOversized(t *testing.T) {
235260
t.Parallel()
236261

oci/artifact/testconsts_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// SPDX-FileCopyrightText: Copyright 2026 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package artifact
5+
6+
const (
7+
testFileA = "a.txt"
8+
testFileB = "b.txt"
9+
)

0 commit comments

Comments
 (0)