Skip to content

Commit bc9edcb

Browse files
bussyjdOisinKyne
authored andcommitted
chore(dev): tag locally-built dev images :dev-<sha> instead of :latest
Under OBOL_DEVELOPMENT, both the manifest rewrite (internal/defaults) and the image build (internal/stack) used a shared, mutable `<image>:latest` tag. On a host running more than one obol-stack worktree against the same Docker daemon, one worktree's build silently overwrites :latest and a different worktree's `obol stack up` then deploys the wrong binary — observed during rc9 QA, where a sibling branch's serviceoffer-controller (reading a per-purchase Secret this branch's RBAC doesn't grant) poisoned an unrelated stack and presented as a hang, not an obvious image mismatch. Tag dev images with `dev-<short-git-sha>` instead: - defaults.DevImageTag() = dev-<sha> of the working tree (`latest` fallback when not a git checkout, preserving prior behaviour for tarball builds). - CopyInfrastructure stamps the rendered manifests with the dev tag AND persists it to $CONFIG_DIR/.dev-image-tag, so internal/stack builds/imports the exact tag the cluster pins — even if HEAD moves between `stack init` and `stack up`. - Different branches/worktrees get distinct tags (no cross-worktree poisoning); committing changes the SHA and triggers a fresh build; uncommitted changes reuse the committed tag unless OBOL_FORCE_REBUILD_LOCAL_DEV_IMAGES is set. Validated: go test ./... (33 pkgs); `obol stack init` (dev) rewrites both the controller and buyer pins to :dev-<sha> with no leftover @sha256 and persists the tag; `docker build -t …:dev-<sha>` + `k3d image import` succeed (the exact build/import path buildAndImportLocalImages runs).
1 parent a63d043 commit bc9edcb

3 files changed

Lines changed: 116 additions & 26 deletions

File tree

internal/defaults/defaults.go

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,50 @@ const (
2525
stackIDFile = ".stack-id"
2626
stackBackendFile = ".stack-backend"
2727
stampFile = ".obol-defaults-stamp"
28+
// devImageTagFile records the tag the dev-mode manifest rewrite stamped
29+
// the locally-built images with. internal/stack reads it at build time so
30+
// the image it builds/imports matches what the rendered manifests pin —
31+
// even if HEAD moved between `stack init` and `stack up`.
32+
devImageTagFile = ".dev-image-tag"
2833
)
2934

35+
// DevImageTag returns the tag used for locally-built dev images under
36+
// OBOL_DEVELOPMENT. It is `dev-<short-git-sha>` of the working tree, so each
37+
// branch/worktree builds a distinct tag and parallel dev stacks sharing one
38+
// Docker daemon never clobber each other's images (the `:latest` collision that
39+
// let a sibling worktree's build poison an unrelated stack). Committing changes
40+
// the SHA and triggers a fresh build; uncommitted changes reuse the committed
41+
// tag unless OBOL_FORCE_REBUILD_LOCAL_DEV_IMAGES is set. Falls back to `latest`
42+
// when the source is not a git checkout (e.g. a tarball build), preserving the
43+
// previous behaviour there.
44+
func DevImageTag() string {
45+
cmd := exec.Command("git", "rev-parse", "--short=12", "HEAD")
46+
out, err := cmd.Output()
47+
if err != nil {
48+
return "latest"
49+
}
50+
sha := strings.TrimSpace(string(out))
51+
if !regexp.MustCompile(`^[0-9a-f]{7,40}$`).MatchString(sha) {
52+
return "latest"
53+
}
54+
return "dev-" + sha
55+
}
56+
57+
// ReadDevImageTag returns the dev image tag persisted at CopyInfrastructure
58+
// time, or "latest" if none was recorded (non-dev install, or pre-dates this
59+
// mechanism). internal/stack uses it to tag the images it builds.
60+
func ReadDevImageTag(cfg *config.Config) string {
61+
data, err := os.ReadFile(filepath.Join(cfg.ConfigDir, devImageTagFile))
62+
if err != nil {
63+
return "latest"
64+
}
65+
tag := strings.TrimSpace(string(data))
66+
if tag == "" {
67+
return "latest"
68+
}
69+
return tag
70+
}
71+
3072
// RefreshInfrastructureIfChanged refreshes the generated defaults tree when
3173
// the embedded infrastructure assets, backend, or stack ID changed.
3274
func RefreshInfrastructureIfChanged(cfg *config.Config, backendName, stackID string) (bool, error) {
@@ -63,16 +105,21 @@ func CopyInfrastructure(cfg *config.Config, backendName, stackID string) error {
63105
}
64106

65107
// Under OBOL_DEVELOPMENT we build images from the working tree and
66-
// import them into k3d as `<image>:latest`. The embedded templates
67-
// pin published digests for production safety, which means the
68-
// cluster ignores our locally-built images and silently uses stale
69-
// ghcr.io binaries. Rewrite digest pins to :latest after copy so the
70-
// dev cycle Just Works without operators having to kubectl-set-image
71-
// every loop.
108+
// import them into k3d. The embedded templates pin published digests for
109+
// production safety, which means the cluster ignores our locally-built
110+
// images and silently uses stale ghcr.io binaries. Rewrite the digest
111+
// pins to a per-commit `dev-<sha>` tag after copy so the dev cycle Just
112+
// Works without operators having to kubectl-set-image every loop, and so
113+
// parallel worktree stacks don't collide on a shared `:latest`. Persist
114+
// the tag so internal/stack builds/imports the exact tag we pinned here.
72115
if os.Getenv("OBOL_DEVELOPMENT") == "true" {
73-
if err := rewriteDevDigestPins(defaultsDir); err != nil {
116+
devTag := DevImageTag()
117+
if err := rewriteDevDigestPins(defaultsDir, devTag); err != nil {
74118
return fmt.Errorf("rewrite dev digest pins: %w", err)
75119
}
120+
if err := os.WriteFile(filepath.Join(cfg.ConfigDir, devImageTagFile), []byte(devTag), 0o600); err != nil {
121+
return fmt.Errorf("persist dev image tag: %w", err)
122+
}
76123
}
77124

78125
stamp, err := infrastructureStamp(backendName, stackID)
@@ -106,7 +153,7 @@ var devLocallyBuiltImageBases = []string{
106153
// — in either case the local dev build is tagged `:latest`, so the
107154
// rewrite needs to catch both forms or `obol stack up` would pull from
108155
// the registry instead of using the freshly-built local image.
109-
func rewriteDevDigestPins(defaultsDir string) error {
156+
func rewriteDevDigestPins(defaultsDir, devTag string) error {
110157
patterns := make([]*regexp.Regexp, 0, len(devLocallyBuiltImageBases))
111158
replaceWith := make([]string, 0, len(devLocallyBuiltImageBases))
112159
for _, base := range devLocallyBuiltImageBases {
@@ -122,7 +169,7 @@ func rewriteDevDigestPins(defaultsDir string) error {
122169
// silently bypasses the local build (root cause of the no-debug-logs
123170
// regression in flow-11 step 43 chase, May 2026).
124171
patterns = append(patterns, regexp.MustCompile(regexp.QuoteMeta(base)+"(:[a-f0-9]{7,40}@sha256:[a-f0-9]{64}|@sha256:[a-f0-9]{64}|:[a-f0-9]{7,40})"))
125-
replaceWith = append(replaceWith, base+":latest")
172+
replaceWith = append(replaceWith, base+":"+devTag)
126173
}
127174

128175
return filepath.WalkDir(defaultsDir, func(path string, d fs.DirEntry, walkErr error) error {

internal/defaults/defaults_test.go

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,37 @@ func TestCopyInfrastructure_DevModeRewritesDigestPins(t *testing.T) {
2525
}
2626
out := string(data)
2727

28-
// Every locally-built image must have lost its @sha256: pin and gained
29-
// :latest, otherwise the cluster pulls a stale ghcr.io binary even
30-
// when OBOL_DEVELOPMENT=true rebuilt the image locally.
28+
// The dev rewrite swaps the published digest/SHA pins for the per-commit
29+
// dev tag (dev-<sha>, or :latest when not a git checkout). In CI this is a
30+
// git checkout, so expect dev-<sha>.
31+
devTag := DevImageTag()
32+
33+
// Every locally-built image must have lost its @sha256: pin and gained the
34+
// dev tag, otherwise the cluster pulls a stale ghcr.io binary even when
35+
// OBOL_DEVELOPMENT=true rebuilt the image locally.
3136
for _, base := range devLocallyBuiltImageBases {
3237
if strings.Contains(out, base+"@sha256:") {
3338
t.Errorf("dev mode left digest pin on %s in %s", base, x402Path)
3439
}
3540
}
36-
for _, want := range []string{
37-
"ghcr.io/obolnetwork/x402-verifier:latest",
38-
"ghcr.io/obolnetwork/serviceoffer-controller:latest",
41+
for _, base := range []string{
42+
"ghcr.io/obolnetwork/x402-verifier",
43+
"ghcr.io/obolnetwork/serviceoffer-controller",
3944
} {
45+
want := base + ":" + devTag
4046
if !strings.Contains(out, want) {
4147
t.Errorf("dev mode did not rewrite to %q in %s", want, x402Path)
4248
}
4349
}
4450

51+
// The persisted dev tag MUST equal what was stamped into the manifests, or
52+
// internal/stack would build/import a tag the cluster doesn't pin.
53+
if got := ReadDevImageTag(cfg); got != devTag {
54+
t.Errorf("persisted dev image tag = %q, want %q", got, devTag)
55+
}
56+
4557
// Combo tag+digest form (used by x402-buyer in llm.yaml) must be
46-
// rewritten to a clean `:latest` with no stale `@sha256:` suffix.
58+
// rewritten to a clean `:<devTag>` with no stale `@sha256:` suffix.
4759
// Regression guard for the bug where the old regex matched only
4860
// the `:b13254e` part and left `@sha256:...` behind, causing Docker
4961
// to silently pull the registry-pinned image instead of the local
@@ -55,17 +67,37 @@ func TestCopyInfrastructure_DevModeRewritesDigestPins(t *testing.T) {
5567
t.Fatalf("read llm.yaml: %v", err)
5668
}
5769
llmOut := string(llmData)
58-
if !strings.Contains(llmOut, "ghcr.io/obolnetwork/x402-buyer:latest") {
59-
t.Errorf("dev mode did not rewrite x402-buyer to :latest in %s", llmPath)
70+
buyer := "ghcr.io/obolnetwork/x402-buyer"
71+
if !strings.Contains(llmOut, buyer+":"+devTag) {
72+
t.Errorf("dev mode did not rewrite x402-buyer to :%s in %s", devTag, llmPath)
6073
}
61-
if strings.Contains(llmOut, "ghcr.io/obolnetwork/x402-buyer:latest@sha256:") {
62-
t.Errorf("dev mode left orphan @sha256: suffix on x402-buyer:latest in %s — regex missed the combo form", llmPath)
74+
if strings.Contains(llmOut, buyer+":"+devTag+"@sha256:") {
75+
t.Errorf("dev mode left orphan @sha256: suffix on x402-buyer:%s in %s — regex missed the combo form", devTag, llmPath)
6376
}
64-
if strings.Contains(llmOut, "ghcr.io/obolnetwork/x402-buyer@sha256:") {
77+
if strings.Contains(llmOut, buyer+"@sha256:") {
6578
t.Errorf("dev mode left @sha256: digest pin on x402-buyer in %s — regex missed it", llmPath)
6679
}
6780
}
6881

82+
func TestDevImageTag_Format(t *testing.T) {
83+
// Tests run inside the git checkout, so expect dev-<sha>; tolerate the
84+
// :latest fallback for non-git build environments.
85+
tag := DevImageTag()
86+
if tag == "latest" {
87+
t.Skip("not a git checkout (DevImageTag fell back to latest) — nothing to assert")
88+
}
89+
if !regexp.MustCompile(`^dev-[0-9a-f]{7,40}$`).MatchString(tag) {
90+
t.Errorf("DevImageTag() = %q, want dev-<short-sha> or latest", tag)
91+
}
92+
}
93+
94+
func TestReadDevImageTag_FallbackWhenAbsent(t *testing.T) {
95+
cfg := &config.Config{ConfigDir: t.TempDir()}
96+
if got := ReadDevImageTag(cfg); got != "latest" {
97+
t.Errorf("ReadDevImageTag with no file = %q, want latest", got)
98+
}
99+
}
100+
69101
func TestCopyInfrastructure_ProductionPreservesImagePins(t *testing.T) {
70102
// Without OBOL_DEVELOPMENT=true, the immutable image pins must
71103
// survive untouched. A regression here would silently downgrade prod

internal/stack/stack.go

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,9 +1141,20 @@ func buildAndImportLocalImages(cfg *config.Config, u *ui.UI) {
11411141
serverCID := k3dServerContainerID(clusterName)
11421142
cache := loadImportedImageCache(cfg)
11431143

1144+
// Tag locally-built images with the per-commit dev tag the manifest
1145+
// rewrite stamped (defaults.CopyInfrastructure persisted it). This keeps a
1146+
// branch/worktree's images on their own tag instead of a shared :latest,
1147+
// so parallel dev stacks on one Docker daemon can't clobber each other's
1148+
// images (the cross-worktree poisoning that masquerades as a stale build).
1149+
devTag := stackdefaults.ReadDevImageTag(cfg)
1150+
11441151
var built, pulled, imported, total int
11451152

11461153
for _, img := range baseLocalImages {
1154+
// img.tag carries the published `:latest` placeholder; swap it for the
1155+
// dev tag the rendered manifests actually pin.
1156+
imgTag := strings.TrimSuffix(img.tag, ":latest") + ":" + devTag
1157+
11471158
contextDir := projectRoot
11481159
if img.contextDir != "" {
11491160
if filepath.IsAbs(img.contextDir) {
@@ -1163,27 +1174,27 @@ func buildAndImportLocalImages(cfg *config.Config, u *ui.UI) {
11631174

11641175
total++
11651176

1166-
if shouldForceRebuild(img.tag) || !dockerImageAvailableLocally(img.tag) {
1177+
if shouldForceRebuild(imgTag) || !dockerImageAvailableLocally(imgTag) {
11671178
if u != nil {
1168-
u.Infof("Building %s from %s", img.tag, img.dockerfile)
1179+
u.Infof("Building %s from %s", imgTag, img.dockerfile)
11691180
}
11701181
buildCmd := exec.Command("docker", "build",
11711182
"-f", dockerfilePath,
1172-
"-t", img.tag,
1183+
"-t", imgTag,
11731184
contextDir,
11741185
)
11751186
buildCmd.Stdout = os.Stdout
11761187
buildCmd.Stderr = os.Stderr
11771188
if err := buildCmd.Run(); err != nil {
11781189
if u != nil {
1179-
u.Warnf("Failed to build %s: %v", img.tag, err)
1190+
u.Warnf("Failed to build %s: %v", imgTag, err)
11801191
}
11811192
continue
11821193
}
11831194
built++
11841195
}
11851196

1186-
if importImageWithCache(k3dBinary, clusterName, img.tag, serverCID, &cache, u) {
1197+
if importImageWithCache(k3dBinary, clusterName, imgTag, serverCID, &cache, u) {
11871198
imported++
11881199
}
11891200
}

0 commit comments

Comments
 (0)