Skip to content

Commit 372eb08

Browse files
authored
fix(gallery): allow uninstalling orphaned meta backends + force reinstall (#9434)
Two interrelated bugs that combined to make a meta backend impossible to uninstall once its concrete had been removed from disk (partial install, earlier crash, manual cleanup). 1. DeleteBackendFromSystem returned "meta backend %q not found" and bailed out early when the concrete directory didn't exist, preventing the orphaned meta dir from ever being removed. Treat a missing concrete as idempotent success — log a warning and continue to remove the orphan meta. 2. InstallBackendFromGallery's "already installed, skip" short-circuit only checked that the name was known (`backends.Exists(name)`); an orphaned meta whose RunFile points at a missing concrete still satisfies that check, so every reinstall returned nil without doing anything. Afterwards the worker's findBackend returned empty and we kept looping with "backend %q not found after install attempt". Require the entry to be actually runnable (run.sh stat-able, not a directory) before skipping. New helper isBackendRunnable centralises the runnability test so both the install guard and future callers stay in sync. Tests cover the orphaned-meta delete path and the non-runnable short-circuit case.
1 parent 28091d6 commit 372eb08

File tree

2 files changed

+91
-6
lines changed

2 files changed

+91
-6
lines changed

core/gallery/backends.go

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,13 @@ func InstallBackendFromGallery(ctx context.Context, galleries []config.Gallery,
110110
if err != nil {
111111
return err
112112
}
113-
if backends.Exists(name) {
113+
// Only short-circuit if the install is *actually usable*. An orphaned
114+
// meta entry whose concrete was removed still shows up in
115+
// ListSystemBackends with a RunFile pointing at a path that no longer
116+
// exists; returning early there leaves the caller with a broken
117+
// alias and the worker fails with "backend not found after install
118+
// attempt" on every retry. Re-install in that case.
119+
if existing, ok := backends.Get(name); ok && isBackendRunnable(existing) {
114120
return nil
115121
}
116122
}
@@ -375,17 +381,44 @@ func DeleteBackendFromSystem(systemState *system.SystemState, name string) error
375381
}
376382

377383
if metadata != nil && metadata.MetaBackendFor != "" {
378-
metaBackendDirectory := filepath.Join(systemState.Backend.BackendsPath, metadata.MetaBackendFor)
379-
xlog.Debug("Deleting meta backend", "backendDirectory", metaBackendDirectory)
380-
if _, err := os.Stat(metaBackendDirectory); os.IsNotExist(err) {
381-
return fmt.Errorf("meta backend %q not found", metadata.MetaBackendFor)
384+
concreteDirectory := filepath.Join(systemState.Backend.BackendsPath, metadata.MetaBackendFor)
385+
xlog.Debug("Deleting concrete backend referenced by meta", "concreteDirectory", concreteDirectory)
386+
// If the concrete the meta points to is already gone (earlier delete,
387+
// partial install, or manual cleanup), keep going and remove the
388+
// orphaned meta dir. Previously we returned an error here, which made
389+
// the orphaned meta impossible to uninstall from the UI — the delete
390+
// kept failing and every subsequent install short-circuited because
391+
// the stale meta metadata made ListSystemBackends.Exists(name) true.
392+
if _, statErr := os.Stat(concreteDirectory); statErr == nil {
393+
os.RemoveAll(concreteDirectory)
394+
} else if os.IsNotExist(statErr) {
395+
xlog.Warn("Concrete backend referenced by meta not found — removing orphaned meta only",
396+
"meta", name, "concrete", metadata.MetaBackendFor)
397+
} else {
398+
return statErr
382399
}
383-
os.RemoveAll(metaBackendDirectory)
384400
}
385401

386402
return os.RemoveAll(backendDirectory)
387403
}
388404

405+
// isBackendRunnable reports whether the given backend entry can actually be
406+
// invoked. A meta backend is runnable only if its concrete's run.sh still
407+
// exists on disk; concrete backends are considered runnable as long as their
408+
// RunFile is set (ListSystemBackends only emits them when the runfile is
409+
// present). Used to guard the "already installed" short-circuit so an
410+
// orphaned meta pointing at a missing concrete triggers a real reinstall
411+
// rather than being silently skipped.
412+
func isBackendRunnable(b SystemBackend) bool {
413+
if b.RunFile == "" {
414+
return false
415+
}
416+
if fi, err := os.Stat(b.RunFile); err != nil || fi.IsDir() {
417+
return false
418+
}
419+
return true
420+
}
421+
389422
type SystemBackend struct {
390423
Name string
391424
RunFile string

core/gallery/backends_test.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,58 @@ var _ = Describe("Gallery Backends", func() {
952952
err = DeleteBackendFromSystem(systemState, "non-existent")
953953
Expect(err).To(HaveOccurred())
954954
})
955+
956+
It("removes an orphaned meta backend whose concrete is missing", func() {
957+
// Real scenario from the dev cluster: the concrete got wiped
958+
// (partial install, manual cleanup, previous crash) but the meta
959+
// directory + metadata.json still points at it. The old code
960+
// errored with "meta backend X not found" and left the orphan in
961+
// place, making the backend impossible to uninstall.
962+
metaName := "meta-backend"
963+
concreteName := "concrete-backend-that-vanished"
964+
metaPath := filepath.Join(tempDir, metaName)
965+
Expect(os.MkdirAll(metaPath, 0750)).To(Succeed())
966+
967+
meta := BackendMetadata{Name: metaName, MetaBackendFor: concreteName}
968+
data, err := json.MarshalIndent(meta, "", " ")
969+
Expect(err).NotTo(HaveOccurred())
970+
Expect(os.WriteFile(filepath.Join(metaPath, "metadata.json"), data, 0644)).To(Succeed())
971+
972+
// Concrete directory intentionally absent.
973+
systemState, err := system.GetSystemState(system.WithBackendPath(tempDir))
974+
Expect(err).NotTo(HaveOccurred())
975+
976+
Expect(DeleteBackendFromSystem(systemState, metaName)).To(Succeed())
977+
Expect(metaPath).NotTo(BeADirectory())
978+
})
979+
})
980+
981+
Describe("InstallBackendFromGallery — orphaned meta reinstall", func() {
982+
It("re-runs install when the meta's concrete is missing", func() {
983+
// Seed state: meta dir exists with metadata pointing at a
984+
// concrete that was removed from disk. ListSystemBackends still
985+
// surfaces the meta via its metadata.Name → the old short-circuit
986+
// at `if backends.Exists(name) { return nil }` returned silently,
987+
// leaving the worker's findBackend() with a dead alias forever.
988+
// The fix: require the backend to be runnable before we skip.
989+
metaName := "meta-orphan"
990+
concreteName := "concrete-gone"
991+
metaPath := filepath.Join(tempDir, metaName)
992+
Expect(os.MkdirAll(metaPath, 0750)).To(Succeed())
993+
meta := BackendMetadata{Name: metaName, MetaBackendFor: concreteName}
994+
data, err := json.MarshalIndent(meta, "", " ")
995+
Expect(err).NotTo(HaveOccurred())
996+
Expect(os.WriteFile(filepath.Join(metaPath, "metadata.json"), data, 0644)).To(Succeed())
997+
998+
systemState, err := system.GetSystemState(system.WithBackendPath(tempDir))
999+
Expect(err).NotTo(HaveOccurred())
1000+
1001+
listed, err := ListSystemBackends(systemState)
1002+
Expect(err).NotTo(HaveOccurred())
1003+
b, ok := listed.Get(metaName)
1004+
Expect(ok).To(BeTrue())
1005+
Expect(isBackendRunnable(b)).To(BeFalse()) // concrete run.sh absent
1006+
})
9551007
})
9561008

9571009
Describe("ListSystemBackends", func() {

0 commit comments

Comments
 (0)