Skip to content

Commit 24baa83

Browse files
kvapsclaude
andcommitted
fix(rest): eager-place only on clone; bare restore stays an empty shell
PR #153 changed the bare snapshot-restore handler to synchronously stamp a replica on EVERY snapshot node. That fixed the clone verb (a one-shot CSI op with no follow-up autoplace) but regressed the staged cross-node restore workflow: the e2e restore lanes place the data-bearing replica on a snapshot node first, wait UpToDate, then add a cross-node replica that SyncTargets it. With both snapshot nodes already eagerly placed, place_count was satisfied and the cross-node replica was never created (stuck Connecting / no devicePath). Split the placement policy by caller. The clone path (cloneWithData) eager-places on the snapshot nodes in the source pool. The bare restore handler leaves an empty shell when no --node-name is given, so the operator / linstor-csi drives placement — exactly the pre-#153 / upstream restore-then-scale-out contract. Explicit node lists are still stamped verbatim on both paths. Cross-backend protection is unaffected: the placer's restore-source backend pin keeps the operator's follow-up autoplace on the source backend, and the REST autoplace handler still constrains a no-node-list autoplace of a restore-marked RD to the snapshot nodes. Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Andrei Kvapil <kvapss@gmail.com>
1 parent 11632d1 commit 24baa83

3 files changed

Lines changed: 110 additions & 44 deletions

File tree

pkg/rest/rd_clone.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,11 @@ func (s *Server) cloneWithData(w http.ResponseWriter, r *http.Request, src *apiv
226226

227227
restoreReq := &snapshotRestoreRequest{ToResource: req.Name}
228228

229-
_, err := s.materializeRestoredRD(ctx, src.Name, restoreReq, snap)
229+
// Clone path: eagerPlace=true. `rd clone` is a one-shot CSI
230+
// operation with no follow-up autoplace, so the clone replicas must
231+
// materialise on the snapshot-holding nodes in the source pool here
232+
// (same backend by construction — Bug 038).
233+
_, err := s.materializeRestoredRD(ctx, src.Name, restoreReq, snap, true)
230234
if err != nil {
231235
writeCloneRefused(w, http.StatusInternalServerError, src.Name, req.Name, &apiv1.APICallRc{
232236
RetCode: apiCallRcError,

pkg/rest/snapshot_restore.go

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,11 @@ func (s *Server) handleSnapshotRestore(w http.ResponseWriter, r *http.Request) {
308308
return
309309
}
310310

311-
newRDName, err := s.materializeRestoredRD(r.Context(), srcRD, &req, &snap)
311+
// Bare restore: eagerPlace=false. With no explicit --node-name the
312+
// target is left an empty shell for the operator / linstor-csi to
313+
// place (restore-then-scale-out); an explicit node list is still
314+
// stamped verbatim inside materializeRestoredRD.
315+
newRDName, err := s.materializeRestoredRD(r.Context(), srcRD, &req, &snap, false)
312316
if err != nil {
313317
writeStoreError(w, err)
314318

@@ -411,7 +415,29 @@ func resolveSnapshotName(r *http.Request, req *snapshotRestoreRequest) string {
411415
// RD's LayerStack + Props (snapshot Props win when set) and hydrates
412416
// its VolumeDefinitions from the snapshot's recorded volume layout.
413417
// Returns the new RD's name on success.
414-
func (s *Server) materializeRestoredRD(ctx context.Context, srcRD string, req *snapshotRestoreRequest, snap *apiv1.Snapshot) (string, error) {
418+
//
419+
// eagerPlace selects the placement policy for an EMPTY caller node list:
420+
//
421+
// - true (the clone path, cloneWithData): stamp one diskful replica on
422+
// EVERY snapshot-holding node in the source pool. `rd clone` is a
423+
// one-shot CSI operation with no follow-up autoplace, so the clone
424+
// replicas must materialise here; keeping them on the snapshot nodes
425+
// in the SOURCE backend is what closes Bug 038 (no cross-backend
426+
// stream into `zfs recv`).
427+
// - false (the bare snapshot-restore handler): leave an EMPTY shell —
428+
// no Resources stamped — so the operator / linstor-csi drives
429+
// placement explicitly via `rd ap`. This preserves upstream's
430+
// restore-then-scale-out workflow and the legitimate STAGED
431+
// cross-node bring-up the e2e restore lanes rely on (place the
432+
// data-bearing replica on a snapshot node first, then add a
433+
// cross-node replica that SyncTargets it). The placer's restore-
434+
// source backend pin (constrainFilterToRestoreSource) keeps that
435+
// later autoplace same-backend, so Bug 038 stays fixed without the
436+
// eager all-nodes stamp.
437+
//
438+
// An explicit caller node list is always stamped verbatim, regardless of
439+
// eagerPlace.
440+
func (s *Server) materializeRestoredRD(ctx context.Context, srcRD string, req *snapshotRestoreRequest, snap *apiv1.Snapshot, eagerPlace bool) (string, error) {
415441
srcRDObj, err := s.Store.ResourceDefinitions().Get(ctx, srcRD)
416442
if err != nil {
417443
return "", err //nolint:wrapcheck // surfaced via writeStoreError
@@ -464,7 +490,7 @@ func (s *Server) materializeRestoredRD(ctx context.Context, srcRD string, req *s
464490
// observed a Resource for the new RD, so the BlockstorRestoreFromSnapshot
465491
// prop marker on the RD was dead code and the restored RD stayed an
466492
// empty shell. Mirrors upstream CtrlSnapshotRestoreApiCallHandler.
467-
err = s.placeRestoredResources(ctx, srcRD, &newRD, req, snap)
493+
err = s.placeRestoredResources(ctx, srcRD, &newRD, req, snap, eagerPlace)
468494
if err != nil {
469495
return "", err
470496
}
@@ -502,15 +528,29 @@ func (s *Server) materializeRestoredRD(ctx context.Context, srcRD string, req *s
502528
//
503529
// The Nodes / NodeNames request fields are aliased — callers may use
504530
// either; we normalise to one canonical list before iterating.
505-
func (s *Server) placeRestoredResources(ctx context.Context, srcRDName string, newRD *apiv1.ResourceDefinition, req *snapshotRestoreRequest, snap *apiv1.Snapshot) error {
531+
func (s *Server) placeRestoredResources(ctx context.Context, srcRDName string, newRD *apiv1.ResourceDefinition, req *snapshotRestoreRequest, snap *apiv1.Snapshot, eagerPlace bool) error {
506532
nodes := canonicalRestoreNodeList(req)
507533

508534
if len(nodes) == 0 {
509-
// Upstream default: restore on every node the snapshot exists
510-
// on. An empty snap.Nodes (legacy snapshot CRD without the
535+
if !eagerPlace {
536+
// Bare restore with no explicit nodes → EMPTY shell. The
537+
// operator / linstor-csi drives placement via `rd ap`,
538+
// which the placer keeps on the source backend
539+
// (constrainFilterToRestoreSource). This preserves the
540+
// upstream restore-then-scale-out workflow and the staged
541+
// cross-node bring-up the e2e restore lanes rely on. _ =
542+
// snap keeps the signature uniform with the eager branch.
543+
_ = snap
544+
545+
return nil
546+
}
547+
548+
// Clone path (eager): stamp one replica on every snapshot node
549+
// in the source pool. `rd clone` is a one-shot CSI op with no
550+
// follow-up autoplace, so the clone replicas must materialise
551+
// here. An empty snap.Nodes (legacy snapshot CRD without the
511552
// node list) stamps nothing — the operator can still drive
512-
// placement explicitly via `linstor rd ap <new>`, which the
513-
// autoplace handler constrains to snapshot-compatible pools.
553+
// placement explicitly via `linstor rd ap <new>`.
514554
nodes = snap.Nodes
515555
}
516556

pkg/rest/snapshot_restore_test.go

Lines changed: 57 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -293,26 +293,27 @@ func TestSnapshotRestoreConstrainsProviderToSource(t *testing.T) {
293293
t.Fatalf("restore status: got %d, want 201", resp.StatusCode)
294294
}
295295

296-
// 2) the restore itself stamped replicas on the snapshot's nodes
297-
// in the SOURCE pool (Bug 038 / upstream restore semantics).
296+
// 2) the bare restore (no --node-name) leaves an EMPTY shell — the
297+
// operator / linstor-csi drives placement, preserving the
298+
// restore-then-scale-out workflow and the staged cross-node
299+
// bring-up the e2e restore lanes rely on. The cross-backend
300+
// protection moved to the placer, asserted in step 3.
298301
got, err := st.Resources().ListByDefinition(ctx, "pvc-2")
299302
if err != nil {
300303
t.Fatalf("list pvc-2: %v", err)
301304
}
302305

303-
if len(got) != 2 {
304-
t.Fatalf("restored replicas: got %d, want 2 (one per snapshot node)", len(got))
305-
}
306-
307-
for i := range got {
308-
if stor := got[i].Props["StorPoolName"]; stor != "zpool" {
309-
t.Errorf("replica on %s landed on %q, want the source pool %q",
310-
got[i].NodeName, stor, "zpool")
311-
}
306+
if len(got) != 0 {
307+
t.Fatalf("bare restore replicas: got %d, want 0 (empty shell — "+
308+
"placement is the operator's call)", len(got))
312309
}
313310

314-
// 3) a follow-up autoplace at the same place_count is an
315-
// idempotent no-op — no extra replicas, no pool churn.
311+
// 3) the operator's autoplace honours the restore-source BACKEND
312+
// pin: even though the LVM_THIN candidates are roomier (FreeCapacity
313+
// 9000 vs the ZFS_THIN 1000), every diskful replica must land on a
314+
// ZFS_THIN pool — the placer's constrainFilterToRestoreSource drops
315+
// the mismatched backend (Bug 038: a ZFS snapshot stream piped into
316+
// an LVM receiver fails opaquely).
316317
body, _ = json.Marshal(map[string]any{
317318
"select_filter": map[string]any{"place_count": 2},
318319
})
@@ -330,7 +331,16 @@ func TestSnapshotRestoreConstrainsProviderToSource(t *testing.T) {
330331
}
331332

332333
if len(got) != 2 {
333-
t.Fatalf("replicas after idempotent autoplace: got %d, want 2", len(got))
334+
t.Fatalf("replicas after autoplace: got %d, want 2", len(got))
335+
}
336+
337+
for i := range got {
338+
stor := got[i].Props["StorPoolName"]
339+
if stor != "zfs-target-n1" && stor != "zfs-target-n2" {
340+
t.Errorf("replica on %s landed on %q, want a ZFS_THIN target "+
341+
"(the LVM_THIN decoy must be filtered by the backend pin)",
342+
got[i].NodeName, stor)
343+
}
334344
}
335345
}
336346

@@ -922,39 +932,51 @@ func TestSnapshotRestoreBug354AutoplacesWhenNodesEmpty(t *testing.T) {
922932
t.Fatalf("status: got %d, want 201", resp.StatusCode)
923933
}
924934

935+
// The bare restore (no --node-name) leaves an EMPTY shell — the
936+
// operator / linstor-csi drives placement. (Bug 354 was about
937+
// stamping SOMETHING so satellites reconcile; the explicit
938+
// autoplace below does that, while the placer's backend pin keeps
939+
// it on the source backend — the Bug 038 fix without the eager
940+
// all-nodes stamp that broke staged cross-node bring-up.)
925941
got, err := st.Resources().ListByDefinition(ctx, "pvc-restored")
926942
if err != nil {
927943
t.Fatalf("list restored Resources: %v", err)
928944
}
929945

930-
if len(got) != 2 {
931-
t.Fatalf("Resource CRDs stamped: got %d, want %d (one per snapshot node)",
932-
len(got), 2)
946+
if len(got) != 0 {
947+
t.Fatalf("bare restore Resource CRDs: got %d, want 0 (empty shell)", len(got))
933948
}
934949

935-
// Both replicas must land on the snapshot's nodes, in the
936-
// SOURCE pool — never on the roomier different-backend decoy
937-
// (Bug 038: a FILE_THIN→ZFS placement loops forever on
938-
// `zfs recv … bad magic number` at the satellite).
939-
wantNodes := map[string]bool{"n1": false, "n2": false}
940-
for _, res := range got {
941-
if _, ok := wantNodes[res.NodeName]; !ok {
942-
t.Errorf("placed off snapshot node set: got %q", res.NodeName)
950+
// Operator autoplace: with the parent RG's empty SelectFilter the
951+
// caller supplies place_count explicitly. The placer's restore-
952+
// source backend pin must keep every replica on the ZFS_THIN source
953+
// pool, never the roomier LVM_THIN decoy (Bug 038: a ZFS snapshot
954+
// stream into an LVM receiver fails opaquely at the satellite).
955+
body, _ = json.Marshal(map[string]any{
956+
"select_filter": map[string]any{"place_count": 2},
957+
})
943958

944-
continue
945-
}
959+
resp = httpPost(t, base+"/v1/resource-definitions/pvc-restored/autoplace", body)
960+
_ = resp.Body.Close()
946961

947-
wantNodes[res.NodeName] = true
962+
if resp.StatusCode != http.StatusOK {
963+
t.Fatalf("autoplace status: got %d, want 200", resp.StatusCode)
964+
}
948965

949-
if pool := res.Props["StorPoolName"]; pool != "zpool" {
950-
t.Errorf("replica on %s landed on pool %q, want the source pool %q",
951-
res.NodeName, pool, "zpool")
952-
}
966+
got, err = st.Resources().ListByDefinition(ctx, "pvc-restored")
967+
if err != nil {
968+
t.Fatalf("list restored Resources after autoplace: %v", err)
969+
}
970+
971+
if len(got) != 2 {
972+
t.Fatalf("Resource CRDs after autoplace: got %d, want 2", len(got))
953973
}
954974

955-
for n, placed := range wantNodes {
956-
if !placed {
957-
t.Errorf("expected a replica on snapshot node %q, none placed", n)
975+
for _, res := range got {
976+
if pool := res.Props["StorPoolName"]; pool != "zpool" {
977+
t.Errorf("replica on %s landed on pool %q, want the source pool %q "+
978+
"(the LVM_THIN decoy must be filtered by the backend pin)",
979+
res.NodeName, pool, "zpool")
958980
}
959981
}
960982
}

0 commit comments

Comments
 (0)