From 183dae580c3c29c96b343a340469addfa8e1c02b Mon Sep 17 00:00:00 2001 From: Eray Date: Mon, 30 Mar 2026 18:48:37 +0300 Subject: [PATCH] Update weights builder and harness behavior --- integration-tests/harness/harness.go | 34 ++++--- pkg/model/weight_builder.go | 40 ++++---- pkg/model/weight_builder_test.go | 141 ++++++++++++++++++++++++++- pkg/model/weights.go | 2 + 4 files changed, 184 insertions(+), 33 deletions(-) diff --git a/integration-tests/harness/harness.go b/integration-tests/harness/harness.go index 57e0e3d3af..23582f907d 100644 --- a/integration-tests/harness/harness.go +++ b/integration-tests/harness/harness.go @@ -1079,14 +1079,15 @@ type mockWeightsLock struct { // mockWeightFile mirrors WeightFile from pkg/model/weights.go // SYNC: If pkg/model/WeightFile changes, update this copy. type mockWeightFile struct { - Name string `json:"name"` - Dest string `json:"dest"` - DigestOriginal string `json:"digestOriginal"` - Digest string `json:"digest"` - Size int64 `json:"size"` - SizeUncompressed int64 `json:"sizeUncompressed"` - MediaType string `json:"mediaType"` - ContentType string `json:"contentType,omitempty"` + Name string `json:"name"` + Dest string `json:"dest"` + DigestOriginal string `json:"digestOriginal"` + Digest string `json:"digest"` + Size int64 `json:"size"` + SourceMtimeUnixNano int64 `json:"sourceMtimeUnixNano,omitempty"` + SizeUncompressed int64 `json:"sizeUncompressed"` + MediaType string `json:"mediaType"` + ContentType string `json:"contentType,omitempty"` } // cmdMockWeights generates mock weight files and a weights.lock file. @@ -1166,18 +1167,23 @@ func (h *Harness) cmdMockWeights(ts *testscript.TestScript, neg bool, args []str if err := os.WriteFile(filePath, data, 0o644); err != nil { ts.Fatalf("mock-weights: failed to write %s: %v", filename, err) } + fi, err := os.Stat(filePath) + if err != nil { + ts.Fatalf("mock-weights: failed to stat %s: %v", filename, err) + } // Compute digest (uncompressed, since we're not actually compressing for tests) hash := sha256.Sum256(data) digest := "sha256:" + hex.EncodeToString(hash[:]) files = append(files, mockWeightFile{ - Name: weightName, - Dest: "/cache/" + filename, - DigestOriginal: digest, - Digest: digest, // Same as original since we're not compressing - Size: size, - SizeUncompressed: size, + Name: weightName, + Dest: "/cache/" + filename, + DigestOriginal: digest, + Digest: digest, // Same as original since we're not compressing + Size: size, + SourceMtimeUnixNano: fi.ModTime().UnixNano(), + SizeUncompressed: size, // MediaType matches production WeightBuilder output (uncompressed). MediaType: "application/vnd.cog.weight.layer.v1", ContentType: "application/octet-stream", diff --git a/pkg/model/weight_builder.go b/pkg/model/weight_builder.go index d99b27156b..5febca6903 100644 --- a/pkg/model/weight_builder.go +++ b/pkg/model/weight_builder.go @@ -54,14 +54,12 @@ func (b *WeightBuilder) Build(ctx context.Context, spec ArtifactSpec) (Artifact, } return nil, fmt.Errorf("stat weight file %s: %w", ws.Source, err) } + sourceMtimeUnixNano := fi.ModTime().UnixNano() - // Check lockfile cache: if we have a matching entry (name + size), skip hashing. - // NOTE: This cache only checks name + file size. Same-size modifications (rare for - // weight files) won't be detected. Delete the lockfile to force re-hashing. - // TODO: Consider adding mtime to the cache key for stronger invalidation. + // Check lockfile cache: if we have a matching entry (name + size + source mtime), skip hashing. var digestStr string var size int64 - if cached := b.findCachedEntry(ws.Name(), fi.Size()); cached != nil { + if cached := b.findCachedEntry(ws.Name(), fi.Size(), sourceMtimeUnixNano); cached != nil { digestStr = cached.Digest size = cached.Size } else { @@ -95,16 +93,16 @@ func (b *WeightBuilder) Build(ctx context.Context, spec ArtifactSpec) (Artifact, } // Update lockfile - if err := b.updateLockfile(ws, digestStr, size); err != nil { + if err := b.updateLockfile(ws, digestStr, size, sourceMtimeUnixNano); err != nil { return nil, fmt.Errorf("update lockfile: %w", err) } return NewWeightArtifact(ws.Name(), desc, absPath, ws.Target, cfg), nil } -// findCachedEntry checks the lockfile for an entry matching name and fileSize. -// Returns the cached WeightFile if found and size matches, nil otherwise. -func (b *WeightBuilder) findCachedEntry(name string, fileSize int64) *WeightFile { +// findCachedEntry checks the lockfile for an entry matching name, file size, and source mtime. +// Returns the cached WeightFile if found, nil otherwise. +func (b *WeightBuilder) findCachedEntry(name string, fileSize, sourceMtimeUnixNano int64) *WeightFile { if _, err := os.Stat(b.lockPath); err != nil { return nil } @@ -113,7 +111,12 @@ func (b *WeightBuilder) findCachedEntry(name string, fileSize int64) *WeightFile return nil } for i, f := range lock.Files { - if f.Name == name && f.Size == fileSize { + // Zero mtime means the lockfile entry came from legacy code and is not trusted + // for cache hits. + if f.SourceMtimeUnixNano == 0 { + continue + } + if f.Name == name && f.Size == fileSize && f.SourceMtimeUnixNano == sourceMtimeUnixNano { return &lock.Files[i] } } @@ -122,7 +125,7 @@ func (b *WeightBuilder) findCachedEntry(name string, fileSize int64) *WeightFile // updateLockfile loads the existing lockfile (if any), adds or updates // the entry for the given weight, and saves it back. -func (b *WeightBuilder) updateLockfile(ws *WeightSpec, digest string, size int64) error { +func (b *WeightBuilder) updateLockfile(ws *WeightSpec, digest string, size, sourceMtimeUnixNano int64) error { // Load existing lockfile, or start fresh. // LoadWeightsLock wraps the underlying error, so we check the raw file first. lock := &WeightsLock{ @@ -138,13 +141,14 @@ func (b *WeightBuilder) updateLockfile(ws *WeightSpec, digest string, size int64 } entry := WeightFile{ - Name: ws.Name(), - Dest: ws.Target, - Digest: digest, - DigestOriginal: digest, - Size: size, - SizeUncompressed: size, - MediaType: MediaTypeWeightLayer, + Name: ws.Name(), + Dest: ws.Target, + Digest: digest, + DigestOriginal: digest, + Size: size, + SourceMtimeUnixNano: sourceMtimeUnixNano, + SizeUncompressed: size, + MediaType: MediaTypeWeightLayer, } // Update existing entry or append diff --git a/pkg/model/weight_builder_test.go b/pkg/model/weight_builder_test.go index 17c188d2a5..7ba85f26f2 100644 --- a/pkg/model/weight_builder_test.go +++ b/pkg/model/weight_builder_test.go @@ -7,6 +7,7 @@ import ( "os" "path/filepath" "testing" + "time" "github.com/stretchr/testify/require" @@ -72,7 +73,10 @@ func TestWeightBuilder_WritesLockfile(t *testing.T) { // After Build(), a weights.lock should be written/updated at lockPath. tmpDir := t.TempDir() weightContent := []byte("lockfile test weight") - err := os.WriteFile(filepath.Join(tmpDir, "model.bin"), weightContent, 0o644) + weightPath := filepath.Join(tmpDir, "model.bin") + err := os.WriteFile(weightPath, weightContent, 0o644) + require.NoError(t, err) + fi, err := os.Stat(weightPath) require.NoError(t, err) hash := sha256.Sum256(weightContent) @@ -106,6 +110,7 @@ func TestWeightBuilder_WritesLockfile(t *testing.T) { require.Equal(t, "/weights/model.bin", wf.Dest) require.Equal(t, expectedDigest, wf.Digest) require.Equal(t, int64(len(weightContent)), wf.Size) + require.Equal(t, fi.ModTime().UnixNano(), wf.SourceMtimeUnixNano) } func TestWeightBuilder_UpdatesExistingLockfile(t *testing.T) { @@ -194,6 +199,7 @@ func TestWeightBuilder_CacheHit(t *testing.T) { require.NoError(t, err) require.Len(t, lock.Files, 1) require.Equal(t, "my-model", lock.Files[0].Name) + require.NotZero(t, lock.Files[0].SourceMtimeUnixNano) } func TestWeightBuilder_CacheMiss_SizeChanged(t *testing.T) { @@ -234,6 +240,139 @@ func TestWeightBuilder_CacheMiss_SizeChanged(t *testing.T) { require.Equal(t, int64(len(content2)), wa2.Descriptor().Size) } +func TestWeightBuilder_CacheMiss_SameSizeMtimeChanged(t *testing.T) { + // When file content changes without a size change, the builder should re-hash + // if the source mtime changed. + tmpDir := t.TempDir() + content1 := []byte("same-size-content-v1") + content2 := []byte("same-size-content-v2") + require.Equal(t, len(content1), len(content2)) + + weightPath := filepath.Join(tmpDir, "model.bin") + err := os.WriteFile(weightPath, content1, 0o644) + require.NoError(t, err) + + src := NewSourceFromConfig(&config.Config{ + Weights: []config.WeightSource{ + {Name: "my-model", Source: "model.bin", Target: "/weights/model.bin"}, + }, + }, tmpDir) + + lockPath := filepath.Join(tmpDir, "weights.lock") + wb := NewWeightBuilder(src, "0.15.0", lockPath) + spec := NewWeightSpec("my-model", "model.bin", "/weights/model.bin") + + // First build + _, err = wb.Build(context.Background(), spec) + require.NoError(t, err) + + lock1, err := LoadWeightsLock(lockPath) + require.NoError(t, err) + require.Len(t, lock1.Files, 1) + firstMtime := lock1.Files[0].SourceMtimeUnixNano + require.NotZero(t, firstMtime) + + // Rewrite with same-size content and force a different mtime. + err = os.WriteFile(weightPath, content2, 0o644) + require.NoError(t, err) + + fi, err := os.Stat(weightPath) + require.NoError(t, err) + forcedMtime := fi.ModTime().Add(2 * time.Second) + err = os.Chtimes(weightPath, forcedMtime, forcedMtime) + require.NoError(t, err) + + artifact2, err := wb.Build(context.Background(), spec) + require.NoError(t, err) + + wa2 := artifact2.(*WeightArtifact) + hash2 := sha256.Sum256(content2) + expectedDigest2 := "sha256:" + hex.EncodeToString(hash2[:]) + require.Equal(t, expectedDigest2, wa2.Descriptor().Digest.String()) + + lock2, err := LoadWeightsLock(lockPath) + require.NoError(t, err) + require.Len(t, lock2.Files, 1) + require.NotEqual(t, firstMtime, lock2.Files[0].SourceMtimeUnixNano) +} + +func TestWeightBuilder_CacheMiss_WhenLockfileEntryHasNoSourceMtime(t *testing.T) { + // Old lockfiles without sourceMtimeUnixNano should miss cache and be rewritten. + tmpDir := t.TempDir() + content := []byte("legacy lockfile content") + weightPath := filepath.Join(tmpDir, "model.bin") + err := os.WriteFile(weightPath, content, 0o644) + require.NoError(t, err) + + src := NewSourceFromConfig(&config.Config{ + Weights: []config.WeightSource{ + {Name: "my-model", Source: "model.bin", Target: "/weights/model.bin"}, + }, + }, tmpDir) + + lockPath := filepath.Join(tmpDir, "weights.lock") + legacyLock := &WeightsLock{ + Version: "1.0", + Created: time.Now().UTC(), + Files: []WeightFile{ + { + Name: "my-model", + Dest: "/weights/model.bin", + DigestOriginal: "sha256:stale", + Digest: "sha256:stale", + Size: int64(len(content)), + SizeUncompressed: int64(len(content)), + MediaType: MediaTypeWeightLayer, + // SourceMtimeUnixNano intentionally omitted (zero). + }, + }, + } + require.NoError(t, legacyLock.Save(lockPath)) + + wb := NewWeightBuilder(src, "0.15.0", lockPath) + spec := NewWeightSpec("my-model", "model.bin", "/weights/model.bin") + artifact, err := wb.Build(context.Background(), spec) + require.NoError(t, err) + + expectedHash := sha256.Sum256(content) + expectedDigest := "sha256:" + hex.EncodeToString(expectedHash[:]) + wa := artifact.(*WeightArtifact) + require.Equal(t, expectedDigest, wa.Descriptor().Digest.String()) + + lock, err := LoadWeightsLock(lockPath) + require.NoError(t, err) + require.Len(t, lock.Files, 1) + require.Equal(t, expectedDigest, lock.Files[0].Digest) + require.NotZero(t, lock.Files[0].SourceMtimeUnixNano) +} + +func TestWeightBuilder_FindCachedEntry_MissesLegacyZeroMtime(t *testing.T) { + tmpDir := t.TempDir() + lockPath := filepath.Join(tmpDir, "weights.lock") + + lock := &WeightsLock{ + Version: "1.0", + Created: time.Now().UTC(), + Files: []WeightFile{ + { + Name: "my-model", + Dest: "/weights/model.bin", + DigestOriginal: "sha256:legacy", + Digest: "sha256:legacy", + Size: 123, + SourceMtimeUnixNano: 0, // Legacy entry: must never be treated as a cache hit. + SizeUncompressed: 123, + MediaType: MediaTypeWeightLayer, + }, + }, + } + require.NoError(t, lock.Save(lockPath)) + + wb := NewWeightBuilder(NewSourceFromConfig(&config.Config{}, tmpDir), "0.15.0", lockPath) + cached := wb.findCachedEntry("my-model", 123, 0) + require.Nil(t, cached) +} + func TestWeightBuilder_ErrorWrongSpecType(t *testing.T) { tmpDir := t.TempDir() src := NewSourceFromConfig(&config.Config{}, tmpDir) diff --git a/pkg/model/weights.go b/pkg/model/weights.go index e2e1283883..101638e390 100644 --- a/pkg/model/weights.go +++ b/pkg/model/weights.go @@ -14,6 +14,8 @@ type WeightFile struct { Digest string `json:"digest"` // Size is the compressed size in bytes. Size int64 `json:"size"` + // SourceMtimeUnixNano is the source file modification time used for cache invalidation. + SourceMtimeUnixNano int64 `json:"sourceMtimeUnixNano,omitempty"` // SizeUncompressed is the original size in bytes. SizeUncompressed int64 `json:"sizeUncompressed"` // MediaType is the OCI layer media type (e.g., application/vnd.cog.weight.layer.v1+gzip).