Skip to content

Commit 0474d61

Browse files
JAORMXclaude
andcommitted
Add per-file manifest to extract cache for tamper detection
The previous .version marker only stored the expected aggregate bundle hash; isValid compared the stored string to the recomputed one. It did not re-read the extracted files, so a cached binary or library that had been replaced or silently corrupted after extraction was treated as valid on every subsequent Ensure. Replace the marker with a JSON manifest listing the bundle version, the aggregate hash, and a per-file SHA-256 of every extracted artifact. On Ensure, isValid now walks the manifest and re-hashes each file on disk; any mismatch triggers a re-extract, which removes the stale target dir first since os.Rename cannot replace an existing directory. Tests cover the happy path (Ensure -> isValid), wrong bundle hash, missing manifest, and — the new guarantee — a cached file modified after extraction causes isValid to return false and a follow-up Ensure to re-extract the original content. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 38f8fdb commit 0474d61

2 files changed

Lines changed: 121 additions & 23 deletions

File tree

extract/bundle.go

Lines changed: 81 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,34 @@ package extract
66
import (
77
"crypto/sha256"
88
"encoding/hex"
9+
"encoding/json"
910
"fmt"
11+
"io"
1012
"os"
1113
"path/filepath"
1214
"sync"
1315

1416
"github.com/gofrs/flock"
1517
)
1618

19+
// manifestName is the per-extract manifest file. It records the bundle
20+
// hash plus a per-file SHA-256 of every extracted artifact so Ensure can
21+
// re-verify the cache contents on subsequent calls and re-extract if
22+
// any file has been tampered with or replaced.
23+
const manifestName = ".manifest.json"
24+
25+
// manifest is the on-disk format written alongside extracted files.
26+
type manifest struct {
27+
// Version is the bundle version string.
28+
Version string `json:"version"`
29+
// Hash is the aggregate bundle hash (same value used in the cache
30+
// directory name). Kept for quick mismatch detection before walking
31+
// individual files.
32+
Hash string `json:"hash"`
33+
// Files maps the bundle-relative file name to its SHA-256 hex digest.
34+
Files map[string]string `json:"files"`
35+
}
36+
1737
// File describes a single file to extract.
1838
type File struct {
1939
Name string
@@ -90,12 +110,19 @@ func (b *Bundle) Ensure(cacheDir string) (string, error) {
90110
}
91111
}()
92112

93-
// Extract all files.
113+
// Extract all files and collect per-file hashes for the manifest.
114+
m := manifest{
115+
Version: b.version,
116+
Hash: hash,
117+
Files: make(map[string]string, len(b.files)),
118+
}
94119
for _, f := range b.files {
95120
if extractErr := b.extractFile(tmpDir, f); extractErr != nil {
96121
err = extractErr
97122
return "", fmt.Errorf("extract %s: %w", f.Name, extractErr)
98123
}
124+
fileHash := sha256.Sum256(f.Content)
125+
m.Files[f.Name] = hex.EncodeToString(fileHash[:])
99126
}
100127

101128
// Create symlinks.
@@ -106,11 +133,27 @@ func (b *Bundle) Ensure(cacheDir string) (string, error) {
106133
}
107134
}
108135

109-
// Write version file.
110-
versionPath := filepath.Join(tmpDir, ".version")
111-
if writeErr := os.WriteFile(versionPath, []byte(hash), 0o644); writeErr != nil {
136+
// Write manifest atomically last so a partial extraction never looks
137+
// valid to isValid.
138+
manifestData, mErr := json.Marshal(m)
139+
if mErr != nil {
140+
err = mErr
141+
return "", fmt.Errorf("marshal manifest: %w", mErr)
142+
}
143+
manifestPath := filepath.Join(tmpDir, manifestName)
144+
if writeErr := os.WriteFile(manifestPath, manifestData, 0o600); writeErr != nil {
112145
err = writeErr
113-
return "", fmt.Errorf("write version file: %w", writeErr)
146+
return "", fmt.Errorf("write manifest: %w", writeErr)
147+
}
148+
149+
// If a previous invalid extraction exists at targetDir (e.g. tampered
150+
// content detected by isValid), remove it before rename. The lock
151+
// held above serializes this against other callers.
152+
if _, statErr := os.Lstat(targetDir); statErr == nil {
153+
if rmErr := os.RemoveAll(targetDir); rmErr != nil {
154+
err = rmErr
155+
return "", fmt.Errorf("remove stale target: %w", rmErr)
156+
}
114157
}
115158

116159
// Atomic rename to target.
@@ -134,14 +177,43 @@ func (b *Bundle) computeHash() string {
134177
return hex.EncodeToString(h.Sum(nil))
135178
}
136179

137-
// isValid checks whether targetDir exists and contains a .version file
138-
// matching the expected hash.
180+
// isValid checks whether targetDir is a complete, unaltered extraction
181+
// for the expected bundle hash. It requires a manifest with a matching
182+
// bundle hash and every listed file's SHA-256 matching its current on-
183+
// disk content. Any mismatch causes a re-extract.
139184
func (b *Bundle) isValid(targetDir, hash string) bool {
140-
data, err := os.ReadFile(filepath.Join(targetDir, ".version"))
185+
data, err := os.ReadFile(filepath.Join(targetDir, manifestName))
141186
if err != nil {
142187
return false
143188
}
144-
return string(data) == hash
189+
var m manifest
190+
if err := json.Unmarshal(data, &m); err != nil {
191+
return false
192+
}
193+
if m.Hash != hash {
194+
return false
195+
}
196+
for name, want := range m.Files {
197+
got, err := hashFileOnDisk(filepath.Join(targetDir, name))
198+
if err != nil || got != want {
199+
return false
200+
}
201+
}
202+
return true
203+
}
204+
205+
// hashFileOnDisk returns the SHA-256 hex digest of the file at path.
206+
func hashFileOnDisk(path string) (string, error) {
207+
f, err := os.Open(path)
208+
if err != nil {
209+
return "", err
210+
}
211+
defer func() { _ = f.Close() }()
212+
h := sha256.New()
213+
if _, err := io.Copy(h, f); err != nil {
214+
return "", err
215+
}
216+
return hex.EncodeToString(h.Sum(nil)), nil
145217
}
146218

147219
// extractFile writes a single file atomically via a temp file and rename.

extract/bundle_test.go

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,9 @@ func TestBundle_Ensure_EmptyBundle(t *testing.T) {
152152
require.NoError(t, err)
153153
assert.True(t, info.IsDir())
154154

155-
versionData, err := os.ReadFile(filepath.Join(dir, ".version"))
155+
manifestData, err := os.ReadFile(filepath.Join(dir, manifestName))
156156
require.NoError(t, err)
157-
assert.NotEmpty(t, versionData)
157+
assert.NotEmpty(t, manifestData)
158158
}
159159

160160
func TestBundle_Ensure_ConcurrentAccess(t *testing.T) {
@@ -281,15 +281,15 @@ func TestBundle_ComputeHash_EmptyBundle(t *testing.T) {
281281
func TestBundle_IsValid_MatchingHash(t *testing.T) {
282282
t.Parallel()
283283

284+
// End-to-end through Ensure: the manifest it wrote must satisfy
285+
// isValid on the same inputs.
284286
b := NewBundle("v1", []File{
285287
{Name: "x.txt", Content: []byte("data"), Mode: 0o644},
286288
})
287-
hash := b.computeHash()
288-
289-
dir := t.TempDir()
290-
require.NoError(t, os.WriteFile(filepath.Join(dir, ".version"), []byte(hash), 0o644))
289+
dir, err := b.Ensure(t.TempDir())
290+
require.NoError(t, err)
291291

292-
assert.True(t, b.isValid(dir, hash))
292+
assert.True(t, b.isValid(dir, b.computeHash()))
293293
}
294294

295295
func TestBundle_IsValid_WrongHash(t *testing.T) {
@@ -298,25 +298,51 @@ func TestBundle_IsValid_WrongHash(t *testing.T) {
298298
b := NewBundle("v1", []File{
299299
{Name: "x.txt", Content: []byte("data"), Mode: 0o644},
300300
})
301-
hash := b.computeHash()
302-
303-
dir := t.TempDir()
304-
require.NoError(t, os.WriteFile(filepath.Join(dir, ".version"), []byte("wronghash"), 0o644))
301+
dir, err := b.Ensure(t.TempDir())
302+
require.NoError(t, err)
305303

306-
assert.False(t, b.isValid(dir, hash))
304+
assert.False(t, b.isValid(dir, "wronghash"))
307305
}
308306

309-
func TestBundle_IsValid_MissingVersionFile(t *testing.T) {
307+
func TestBundle_IsValid_MissingManifest(t *testing.T) {
310308
t.Parallel()
311309

312310
b := NewBundle("v1", nil)
313311
hash := b.computeHash()
314312

315313
dir := t.TempDir()
316-
// dir exists but has no .version file.
314+
// dir exists but has no manifest file.
317315
assert.False(t, b.isValid(dir, hash))
318316
}
319317

318+
func TestBundle_IsValid_TamperedFileTriggersReextract(t *testing.T) {
319+
t.Parallel()
320+
321+
// If a cached file has been modified after extraction, isValid must
322+
// return false so Ensure re-extracts rather than spawning a tampered
323+
// binary.
324+
cacheDir := t.TempDir()
325+
b := NewBundle("v-tamper", []File{
326+
{Name: "binary", Content: []byte("original-content"), Mode: 0o755},
327+
})
328+
dir, err := b.Ensure(cacheDir)
329+
require.NoError(t, err)
330+
331+
// Overwrite the cached file with different content.
332+
require.NoError(t, os.WriteFile(filepath.Join(dir, "binary"), []byte("tampered"), 0o755))
333+
334+
assert.False(t, b.isValid(dir, b.computeHash()),
335+
"tampered cached file must not be treated as valid")
336+
337+
// A subsequent Ensure should re-extract the original content.
338+
dir2, err := b.Ensure(cacheDir)
339+
require.NoError(t, err)
340+
got, err := os.ReadFile(filepath.Join(dir2, "binary"))
341+
require.NoError(t, err)
342+
assert.Equal(t, "original-content", string(got),
343+
"Ensure must re-extract the original content after tamper detection")
344+
}
345+
320346
func TestBundle_IsValid_NonexistentDir(t *testing.T) {
321347
t.Parallel()
322348

0 commit comments

Comments
 (0)