Skip to content

Commit 240d089

Browse files
kvapsclaude
andcommitted
fix(placer): constrain restore-marked RDs to snapshot nodes and source backend
Bug 038 defense in depth: the REST autoplace handler already pinned candidate nodes/providers for snapshot-restored RDs, but the controller-side Place callers (ResourceGroup apply, RG rebalance, node replacement) fed the placer the raw RG SelectFilter and could place a clone replica on a different backend. Enforce the restore-source constraints inside Place() so every caller honours them; the REST handler's provider lookup now delegates to the shared placer helper. The constraint operates on a copy of the caller's filter, no-ops for RDs without the BlockstorRestoreFromSnapshot marker, and falls through unchanged when the marker/snapshot/source state is unresolvable. .golangci.yml: pkg/placer/ joins the per-path godox exclusion list (Bug-NNN cross-reference comments, same as the sibling packages). Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Andrei Kvapil <kvapss@gmail.com>
1 parent cba8530 commit 240d089

4 files changed

Lines changed: 348 additions & 34 deletions

File tree

.golangci.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,9 @@ linters:
378378
- linters:
379379
- godox
380380
path: pkg/dispatcher/
381+
- linters:
382+
- godox
383+
path: pkg/placer/
381384
- linters:
382385
- godox
383386
path: pkg/version/

pkg/placer/placer.go

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,13 @@ func New(st store.Store) *Placer {
159159
// the migration semantic: even if 2 replicas exist, if one is on an
160160
// evicted node the placer will create a third on a healthy peer.
161161
func (p *Placer) Place(ctx context.Context, rdName string, filter *apiv1.AutoSelectFilter) (int, int, error) {
162+
// Bug 038: an RD born from a snapshot restore / clone must never
163+
// gain a diskful replica on a foreign backend or a snapshot-less
164+
// node — enforce the restore-source constraints HERE so every
165+
// Place caller (REST autoplace, RG apply, RG rebalance, node
166+
// replacement) honours them, not just the REST edge.
167+
filter = p.constrainFilterToRestoreSource(ctx, rdName, filter)
168+
162169
plan, err := p.buildPlan(ctx, rdName, filter)
163170
if err != nil {
164171
return 0, 0, err
@@ -1734,3 +1741,111 @@ func tupleKey(tuple map[string]string) string {
17341741

17351742
return buf.String()
17361743
}
1744+
1745+
// restoreFromSnapshotProp is the RD prop the snapshot-restore /
1746+
// clone REST handlers stamp on a restored target
1747+
// (`<srcRD>:<snapName>`). The placer keys its Bug 038 restore-source
1748+
// constraints on it; the literal mirrors pkg/rest and pkg/dispatcher.
1749+
const restoreFromSnapshotProp = "BlockstorRestoreFromSnapshot"
1750+
1751+
// constrainFilterToRestoreSource applies the restore-source placement
1752+
// constraints for an RD that was materialised from a snapshot
1753+
// (BlockstorRestoreFromSnapshot prop): candidate nodes default to the
1754+
// snapshot's node set, and candidate pools are pinned to the source
1755+
// replica's ProviderKind. Returns the caller's filter untouched when
1756+
// the RD carries no restore marker (the overwhelmingly common case).
1757+
//
1758+
// Bug 038 (release gate): the REST autoplace handler already applied
1759+
// both constraints at the wire edge, but the controller-side Place
1760+
// callers (ResourceGroup apply, RG rebalance, node replacement) fed
1761+
// the placer the RAW RG SelectFilter. On a clone whose source lived
1762+
// on a FILE_THIN pool, the RG reconciler's pass landed the target
1763+
// replica on a ZFS pool — the satellite then piped the FILE_THIN
1764+
// snapshot stream into `zfs recv`, which looped forever on `cannot
1765+
// receive: invalid stream (bad magic number)` and the clone never
1766+
// reached UpToDate. Enforcing the constraint inside Place closes the
1767+
// gap for every current and future caller.
1768+
//
1769+
// The provider pin overrides any caller-supplied ProviderList (same
1770+
// contract as the REST autoplace handler): snapshot streams of
1771+
// different backends are not interchangeable, so a wider caller list
1772+
// can never be honoured safely. An explicit caller NodeNameList is
1773+
// respected — the REST layer validates it against the snapshot's
1774+
// nodes (Bug 397) before any Store mutation.
1775+
//
1776+
// Every lookup is best-effort: when the marker is malformed, the
1777+
// snapshot is gone, or the source has no diskful replica left, the
1778+
// filter passes through unchanged and the regular placement gates
1779+
// apply (satellite-side Bug 397 blank-fallback seeding still protects
1780+
// data integrity in those edge cases).
1781+
func (p *Placer) constrainFilterToRestoreSource(ctx context.Context, rdName string, filter *apiv1.AutoSelectFilter) *apiv1.AutoSelectFilter {
1782+
rd, err := p.store.ResourceDefinitions().Get(ctx, rdName)
1783+
if err != nil {
1784+
return filter
1785+
}
1786+
1787+
stamp := rd.Props[restoreFromSnapshotProp]
1788+
if stamp == "" {
1789+
return filter
1790+
}
1791+
1792+
srcRD, snapName, ok := strings.Cut(stamp, ":")
1793+
if !ok || srcRD == "" || snapName == "" {
1794+
return filter
1795+
}
1796+
1797+
// Copy so the constraint never leaks into the caller's filter
1798+
// (RG reconcilers reuse their filter across sibling RDs).
1799+
out := *filter
1800+
1801+
if len(out.NodeNameList) == 0 {
1802+
snap, snapErr := p.store.Snapshots().Get(ctx, srcRD, snapName)
1803+
if snapErr == nil && len(snap.Nodes) > 0 {
1804+
out.NodeNameList = append([]string(nil), snap.Nodes...)
1805+
}
1806+
}
1807+
1808+
if kind := SourceProviderKind(ctx, p.store, srcRD); kind != "" {
1809+
out.ProviderList = []string{kind}
1810+
}
1811+
1812+
return &out
1813+
}
1814+
1815+
// SourceProviderKind returns the ProviderKind of the pool backing the
1816+
// named RD's first non-Diskless replica, or "" when none is
1817+
// resolvable. Shared by the placer's restore-source constraint and
1818+
// the REST autoplace handler's clone provider pin — `zfs send` and
1819+
// dd/lvm payloads are not interchangeable, so a clone target must
1820+
// stay on the source's backend.
1821+
func SourceProviderKind(ctx context.Context, st store.Store, srcRDName string) string {
1822+
resList, err := st.Resources().ListByDefinition(ctx, srcRDName)
1823+
if err != nil {
1824+
return ""
1825+
}
1826+
1827+
for i := range resList {
1828+
res := &resList[i]
1829+
if slices.Contains(res.Flags, apiv1.ResourceFlagDiskless) {
1830+
continue
1831+
}
1832+
1833+
pool := res.Props[storPoolNameProp]
1834+
if pool == "" {
1835+
continue
1836+
}
1837+
1838+
sp, err := st.StoragePools().Get(ctx, res.NodeName, pool)
1839+
if err != nil {
1840+
continue
1841+
}
1842+
1843+
if sp.ProviderKind == "" || sp.ProviderKind == apiv1.StoragePoolKindDiskless {
1844+
continue
1845+
}
1846+
1847+
return sp.ProviderKind
1848+
}
1849+
1850+
return ""
1851+
}
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
3+
/*
4+
Copyright 2026 Cozystack contributors.
5+
6+
Licensed under the Apache License, Version 2.0 (the "License");
7+
you may not use this file except in compliance with the License.
8+
You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing, software
13+
distributed under the License is distributed on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
See the License for the specific language governing permissions and
16+
limitations under the License.
17+
*/
18+
19+
package placer_test
20+
21+
import (
22+
"testing"
23+
24+
apiv1 "github.com/cozystack/blockstor/pkg/api/v1"
25+
"github.com/cozystack/blockstor/pkg/placer"
26+
"github.com/cozystack/blockstor/pkg/store"
27+
)
28+
29+
// Bug 038 (release gate): the controller-side Place callers
30+
// (ResourceGroup apply, RG rebalance, node replacement) feed the
31+
// placer the RAW RG SelectFilter — no snapshot-node constraint, no
32+
// provider pin. On stand `big` that landed a FILE_THIN-sourced clone
33+
// replica on a ZFS pool; the satellite then piped the source's
34+
// FILE_THIN snapshot stream into `zfs recv`, which looped forever on
35+
// `cannot receive: invalid stream (bad magic number)` and the clone
36+
// never reached UpToDate.
37+
//
38+
// These tests pin the placer-internal restore-source constraint:
39+
// Place() on an RD carrying the BlockstorRestoreFromSnapshot marker
40+
// must (a) only consider the snapshot's nodes when the caller didn't
41+
// pin any, and (b) only consider pools whose ProviderKind matches the
42+
// source replica's — even when a different-backend pool is roomier
43+
// and would win the capacity-weighted ranking.
44+
45+
// seedRestoreMarkedRD builds the Bug 038 repro shape: a deployed
46+
// FILE_THIN source RD (replicas on n1/n2 in pool "stand"), its
47+
// internal clone snapshot on those nodes, a restore-marked target RD,
48+
// and a roomier ZFS_THIN decoy pool on every node.
49+
func seedRestoreMarkedRD(t *testing.T, st store.Store) {
50+
t.Helper()
51+
52+
ctx := t.Context()
53+
54+
for _, n := range []string{"n1", "n2", "n3"} {
55+
if err := st.Nodes().Create(ctx, &apiv1.Node{Name: n, Type: apiv1.NodeTypeSatellite}); err != nil {
56+
t.Fatalf("seed node %s: %v", n, err)
57+
}
58+
59+
if err := st.StoragePools().Create(ctx, &apiv1.StoragePool{
60+
NodeName: n,
61+
StoragePoolName: "stand",
62+
ProviderKind: apiv1.StoragePoolKindFileThin,
63+
FreeCapacity: 1000,
64+
TotalCapacity: 10000,
65+
}); err != nil {
66+
t.Fatalf("seed FILE_THIN pool on %s: %v", n, err)
67+
}
68+
69+
// The decoy: same nodes, far more free space AND a far
70+
// better Free/Total ratio (0.9 vs 0.1) — the capacity-
71+
// weighted ranking picks it whenever the provider pin is
72+
// absent (see TestPlaceUnmarkedRDKeepsFreePlacement).
73+
if err := st.StoragePools().Create(ctx, &apiv1.StoragePool{
74+
NodeName: n,
75+
StoragePoolName: "zfs-thin",
76+
ProviderKind: apiv1.StoragePoolKindZFSThin,
77+
FreeCapacity: 900000,
78+
TotalCapacity: 1000000,
79+
}); err != nil {
80+
t.Fatalf("seed ZFS decoy pool on %s: %v", n, err)
81+
}
82+
}
83+
84+
if err := st.ResourceDefinitions().Create(ctx, &apiv1.ResourceDefinition{Name: "src"}); err != nil {
85+
t.Fatalf("seed source RD: %v", err)
86+
}
87+
88+
for _, n := range []string{"n1", "n2"} {
89+
if err := st.Resources().Create(ctx, &apiv1.Resource{
90+
Name: "src", NodeName: n,
91+
Props: map[string]string{"StorPoolName": "stand"},
92+
}); err != nil {
93+
t.Fatalf("seed source replica on %s: %v", n, err)
94+
}
95+
}
96+
97+
if err := st.Snapshots().Create(ctx, &apiv1.Snapshot{
98+
Name: "clone-target",
99+
ResourceName: "src",
100+
Nodes: []string{"n1", "n2"},
101+
VolumeDefinitions: []apiv1.SnapshotVolumeDef{
102+
{VolumeNumber: 0, SizeKib: 64},
103+
},
104+
}); err != nil {
105+
t.Fatalf("seed snapshot: %v", err)
106+
}
107+
108+
if err := st.ResourceDefinitions().Create(ctx, &apiv1.ResourceDefinition{
109+
Name: "target",
110+
Props: map[string]string{"BlockstorRestoreFromSnapshot": "src:clone-target"},
111+
}); err != nil {
112+
t.Fatalf("seed restore-marked target RD: %v", err)
113+
}
114+
115+
if err := st.VolumeDefinitions().Create(ctx, "target", &apiv1.VolumeDefinition{
116+
VolumeNumber: 0, SizeKib: 64,
117+
}); err != nil {
118+
t.Fatalf("seed target VD: %v", err)
119+
}
120+
}
121+
122+
// TestPlaceConstrainsRestoreMarkedRDToSourceBackend drives Place with
123+
// the exact filter shape the RG reconcilers use (bare PlaceCount, no
124+
// pins) and asserts the replica lands on the source's FILE_THIN pool
125+
// on a snapshot node — not on the roomier ZFS decoy.
126+
func TestPlaceConstrainsRestoreMarkedRDToSourceBackend(t *testing.T) {
127+
t.Parallel()
128+
129+
st := store.NewInMemory()
130+
seedRestoreMarkedRD(t, st)
131+
132+
placed, want, err := placer.New(st).Place(t.Context(), "target", &apiv1.AutoSelectFilter{
133+
PlaceCount: 1,
134+
})
135+
if err != nil {
136+
t.Fatalf("Place: %v", err)
137+
}
138+
139+
if placed != 1 || want != 1 {
140+
t.Fatalf("placed/want: got %d/%d, want 1/1", placed, want)
141+
}
142+
143+
resList, err := st.Resources().ListByDefinition(t.Context(), "target")
144+
if err != nil {
145+
t.Fatalf("list target replicas: %v", err)
146+
}
147+
148+
if len(resList) != 1 {
149+
t.Fatalf("target replicas: got %d, want 1", len(resList))
150+
}
151+
152+
res := resList[0]
153+
if res.NodeName != "n1" && res.NodeName != "n2" {
154+
t.Errorf("replica node: got %q, want a snapshot node (n1/n2)", res.NodeName)
155+
}
156+
157+
if pool := res.Props["StorPoolName"]; pool != "stand" {
158+
t.Errorf("replica pool: got %q, want the source's FILE_THIN pool %q "+
159+
"(a ZFS placement feeds the FILE_THIN snapshot stream into "+
160+
"`zfs recv` → bad magic loop)", pool, "stand")
161+
}
162+
}
163+
164+
// TestPlaceRestoreConstraintDoesNotLeakIntoCallerFilter pins that the
165+
// constraint operates on a copy: RG reconcilers reuse one filter
166+
// value across sibling RDs, so a restore-marked RD must not pollute
167+
// the next sibling's candidate set.
168+
func TestPlaceRestoreConstraintDoesNotLeakIntoCallerFilter(t *testing.T) {
169+
t.Parallel()
170+
171+
st := store.NewInMemory()
172+
seedRestoreMarkedRD(t, st)
173+
174+
filter := apiv1.AutoSelectFilter{PlaceCount: 1}
175+
176+
_, _, err := placer.New(st).Place(t.Context(), "target", &filter)
177+
if err != nil {
178+
t.Fatalf("Place: %v", err)
179+
}
180+
181+
if len(filter.NodeNameList) != 0 || len(filter.ProviderList) != 0 {
182+
t.Errorf("caller filter mutated: NodeNameList=%v ProviderList=%v, want both empty",
183+
filter.NodeNameList, filter.ProviderList)
184+
}
185+
}
186+
187+
// TestPlaceUnmarkedRDKeepsFreePlacement is the negative control: an
188+
// RD without the restore marker keeps the unconstrained behaviour
189+
// (the roomier ZFS decoy wins the ranking).
190+
func TestPlaceUnmarkedRDKeepsFreePlacement(t *testing.T) {
191+
t.Parallel()
192+
193+
st := store.NewInMemory()
194+
seedRestoreMarkedRD(t, st)
195+
196+
if err := st.ResourceDefinitions().Create(t.Context(), &apiv1.ResourceDefinition{Name: "plain"}); err != nil {
197+
t.Fatalf("seed plain RD: %v", err)
198+
}
199+
200+
placed, _, err := placer.New(st).Place(t.Context(), "plain", &apiv1.AutoSelectFilter{
201+
PlaceCount: 1,
202+
})
203+
if err != nil {
204+
t.Fatalf("Place: %v", err)
205+
}
206+
207+
if placed != 1 {
208+
t.Fatalf("placed: got %d, want 1", placed)
209+
}
210+
211+
resList, err := st.Resources().ListByDefinition(t.Context(), "plain")
212+
if err != nil {
213+
t.Fatalf("list plain replicas: %v", err)
214+
}
215+
216+
if len(resList) != 1 {
217+
t.Fatalf("plain replicas: got %d, want 1", len(resList))
218+
}
219+
220+
if pool := resList[0].Props["StorPoolName"]; pool != "zfs-thin" {
221+
t.Errorf("unmarked RD pool: got %q, want the roomiest pool %q "+
222+
"(free placement must stay unconstrained)", pool, "zfs-thin")
223+
}
224+
}

0 commit comments

Comments
 (0)