Skip to content

Commit e055663

Browse files
kvapsclaude
andauthored
fix(storage): drop invalid lvcreate --kernel flag breaking LVM-thin clone/restore (BUG-043) + n-rst harness (BUG-044) (#158)
* fix(storage): drop invalid lvcreate --kernel flag from LVM-thin clone/restore (BUG-043) Thin.RestoreVolumeFromSnapshot built an `lvcreate --snapshot ...` command that included a `--kernel` token. That is not a valid lvcreate option (`lvcreate: unrecognized option '--kernel'`, exit 3) on LVM >= 2.03, so the destination backing LV was never created. As a result DRBD never came up and clone / snapshot-restore onto LVM-thin pools never converged to UpToDate. This is the shared seed path for both clone and snapshot-restore on LVM-thin (LUKS cells default to POOL=lvm-thin and hit it too); ZFS pools use a separate `zfs clone` path and were unaffected. Remove the `--kernel` token. The remaining args form a correct thin snapshot/clone command — `lvcreate --snapshot --setactivationskip n --activate y --name <tgt> <vg>/<src-snap>` — matching the normal CreateSnapshot shape plus the activation flags needed to make the cloned LV usable. The stale doc-comment is corrected accordingly. Add a unit regression pinning the generated lvcreate argv for the thin restore/clone path: it asserts the expected thin-snapshot form and that no lvcreate invocation carries `--kernel`. Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Andrei Kvapil <kvapss@gmail.com> * test(harness): read wire flags via .flags not .rsc_flags in cli-matrix (BUG-044) n-rst-recreates-tiebreaker and several sibling cli-matrix cells read the resource-level flags from the `linstor resource list --machine-readable` output via a `.rsc_flags` jq key that does not exist on the blockstor wire — the field is `.flags` (pkg/api/v1/resource.go: `Flags []string json:"flags"`). `.rsc_flags // []` therefore always evaluated to an empty array, so every flag check silently counted zero matches: n-rst saw 0 TIE_BREAKER even though the witness is correctly recreated, and the tiebreaker-collapse / reap / diskless-selection assertions in the sibling cells degenerated to no-ops or wrong-row selections. Switch every `.rsc_flags` reference to `.flags`, mirroring the working sibling cells (r-c-over-tiebreaker-skip-sync.sh, r-l-conns-shapes.sh) that already read `.flags` from the same machine-readable resource list. The product is correct; this is a harness-only fix. Co-Authored-By: Claude <noreply@anthropic.com> Signed-off-by: Andrei Kvapil <kvapss@gmail.com> --------- Signed-off-by: Andrei Kvapil <kvapss@gmail.com> Co-authored-by: Claude <noreply@anthropic.com>
1 parent c193457 commit e055663

8 files changed

Lines changed: 58 additions & 13 deletions

pkg/storage/lvm/lvm_thin.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ func (t *Thin) DeleteSnapshot(ctx context.Context, snap storage.Snapshot) error
256256
//
257257
// Upstream LINSTOR equivalent:
258258
//
259-
// lvcreate -s --kernel --activate y --name <tgt> <vg>/<src-snap>
259+
// lvcreate -s --setactivationskip n --activate y --name <tgt> <vg>/<src-snap>
260260
//
261261
// Idempotent: target LV present → resumed reconcile, no-op.
262262
func (t *Thin) RestoreVolumeFromSnapshot(ctx context.Context, target storage.Volume, src storage.Snapshot) error {
@@ -272,7 +272,6 @@ func (t *Thin) RestoreVolumeFromSnapshot(ctx context.Context, target storage.Vol
272272

273273
_, err := t.exec.Run(ctx, "lvcreate",
274274
Args("--snapshot",
275-
"--kernel",
276275
"--setactivationskip", "n",
277276
"--activate", "y",
278277
"--name", tgtName,

pkg/storage/lvm/lvm_thin_test.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,52 @@ func TestThinCreateSnapshot(t *testing.T) {
346346
}
347347
}
348348

349+
// TestThinRestoreVolumeFromSnapshotIssuesThinSnapshotLvcreate is the
350+
// BUG-043 regression pin. The clone / snapshot-restore seed path on
351+
// LVM-thin must drive a plain thin-snapshot `lvcreate --snapshot
352+
// --setactivationskip n --activate y --name <tgt> <vg>/<src-snap>` —
353+
// the original code emitted a `--kernel` token, which is NOT a valid
354+
// lvcreate option (`lvcreate: unrecognized option '--kernel'`, exit 3)
355+
// on LVM >= 2.03. With that flag the destination backing LV was never
356+
// created, so DRBD never came up and clone/restore onto LVM-thin pools
357+
// never converged to UpToDate. The negative assertion (no `--kernel`)
358+
// is load-bearing; the positive assertion pins the activation
359+
// semantics so the cloned LV is actually usable (skip-activation
360+
// cleared, activated).
361+
func TestThinRestoreVolumeFromSnapshotIssuesThinSnapshotLvcreate(t *testing.T) {
362+
fx := storage.NewFakeExec()
363+
// Target LV not yet present → restore proceeds past the
364+
// idempotency short-circuit.
365+
fx.Expect("lvs --config devices { filter=['r|^/dev/drbd|','r|^/dev/zd|'] } --noheadings -o lv_name vg/pvc-2_00000",
366+
storage.FakeResponse{Stdout: []byte("")})
367+
// Source snapshot LV exists → clone source is found.
368+
fx.Expect("lvs --config devices { filter=['r|^/dev/drbd|','r|^/dev/zd|'] } --noheadings -o lv_name vg/pvc-1_snap-1_00000",
369+
storage.FakeResponse{Stdout: []byte(" pvc-1_snap-1_00000")})
370+
371+
p := lvm.NewThin(lvm.ThinConfig{VolumeGroup: "vg", ThinPool: "thinpool"}, fx)
372+
373+
err := p.RestoreVolumeFromSnapshot(t.Context(),
374+
storage.Volume{ResourceName: "pvc-2", VolumeNumber: 0},
375+
storage.Snapshot{ResourceName: "pvc-1", SnapshotName: "snap-1"})
376+
if err != nil {
377+
t.Fatalf("RestoreVolumeFromSnapshot: %v", err)
378+
}
379+
380+
want := "lvcreate --config devices { filter=['r|^/dev/drbd|','r|^/dev/zd|'] } --snapshot --setactivationskip n --activate y --name pvc-2_00000 vg/pvc-1_snap-1_00000"
381+
if !slices.Contains(fx.CommandLines(), want) {
382+
t.Errorf("expected %q in calls; got %v", want, fx.CommandLines())
383+
}
384+
385+
// BUG-043: no lvcreate invocation may carry the invalid `--kernel`
386+
// flag — it makes LVM >= 2.03 reject the command with exit 3.
387+
for _, line := range fx.CommandLines() {
388+
if strings.HasPrefix(line, "lvcreate ") && strings.Contains(line, "--kernel") {
389+
t.Errorf("BUG-043: thin restore lvcreate must NOT pass the invalid "+
390+
"--kernel flag; got %q", line)
391+
}
392+
}
393+
}
394+
349395
// TestThinExecError surfaces the wrapped exec error verbatim.
350396
func TestThinExecError(t *testing.T) {
351397
fx := storage.NewFakeExec()

tests/e2e/cli-matrix/multi-volume-late-vd-create.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ if [[ "$late_up" != "true" ]]; then
126126
# Surface the smoking gun: a Diskless line for a non-DISKLESS spec.
127127
echo "----- linstor r l --resources $RD (with flags) -----" >&2
128128
"${LCTL[@]}" --machine-readable resource list --resources "$RD" 2>/dev/null \
129-
| jq -r '.[][]? | "\(.node_name) vol=\(.vlms[]?.vlm_nr) state=\(.vlms[]?.state.disk_state) flags=\(.rsc_flags//[])"' >&2 || true
129+
| jq -r '.[][]? | "\(.node_name) vol=\(.vlms[]?.vlm_nr) state=\(.vlms[]?.state.disk_state) flags=\(.flags//[])"' >&2 || true
130130
exit 1
131131
fi
132132

@@ -137,7 +137,7 @@ fi
137137
# Bug 332 (Unintentional Diskless) from spec-pinned diskless replicas.
138138
echo ">> [Bug 332] kernel-truth: drbdadm status on a diskful node"
139139
satellite_node=$("${LCTL[@]}" --machine-readable resource list --resources "$RD" 2>/dev/null \
140-
| jq -r '.[][]? | select((.rsc_flags//[]) | (map(. == "DISKLESS") | any | not)) | .node_name' \
140+
| jq -r '.[][]? | select((.flags//[]) | (map(. == "DISKLESS") | any | not)) | .node_name' \
141141
2>/dev/null | head -1)
142142

143143
if [[ -z "$satellite_node" ]]; then

tests/e2e/cli-matrix/n-rst-recreates-tiebreaker.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ n_tb=0
186186
while (( $(date +%s) < deadline )); do
187187
wire=$(linstor_r_l_json "$RD")
188188
n_tb=$(printf '%s' "$wire" \
189-
| jq -r '.[][] | select((.rsc_flags // []) | index("TIE_BREAKER")) | .name' 2>/dev/null \
189+
| jq -r '.[][] | select((.flags // []) | index("TIE_BREAKER")) | .name' 2>/dev/null \
190190
| wc -l | tr -d ' ' || echo 0)
191191
if [[ "$n_tb" == "1" ]]; then
192192
break
@@ -195,7 +195,7 @@ while (( $(date +%s) < deadline )); do
195195
done
196196
if [[ "$n_tb" != "1" ]]; then
197197
echo "FAIL (Bug 386): linstor r l shows $n_tb TIE_BREAKER rows for $RD, want 1" >&2
198-
printf '%s\n' "$wire" | jq '.[][]| {name, node_name, flags: .rsc_flags}' 2>/dev/null >&2 || true
198+
printf '%s\n' "$wire" | jq '.[][]| {name, node_name, flags: .flags}' 2>/dev/null >&2 || true
199199
exit 1
200200
fi
201201

tests/e2e/cli-matrix/r-d-collapses-tiebreaker.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,11 @@ wire=$(linstor_r_l_json "$RD")
165165
n_wire=$(printf '%s' "$wire" | jq -r '.[][] | .name' 2>/dev/null | wc -l | tr -d ' ' || echo 0)
166166
if [[ "$n_wire" != "1" ]]; then
167167
echo "FAIL (Bug 338): linstor r l shows $n_wire rows for $RD, want 1" >&2
168-
printf '%s\n' "$wire" | jq '.[][]| {name, node_name, flags: .rsc_flags}' 2>/dev/null >&2 || true
168+
printf '%s\n' "$wire" | jq '.[][]| {name, node_name, flags: .flags}' 2>/dev/null >&2 || true
169169
exit 1
170170
fi
171171

172-
wire_flags=$(printf '%s' "$wire" | jq -r '.[][] | .rsc_flags // [] | join(",")' 2>/dev/null || echo "")
172+
wire_flags=$(printf '%s' "$wire" | jq -r '.[][] | .flags // [] | join(",")' 2>/dev/null || echo "")
173173
if [[ "$wire_flags" == *"TIE_BREAKER"* ]] || [[ "$wire_flags" == *"DISKLESS"* ]]; then
174174
echo "FAIL (Bug 338): surviving row carries unexpected flags=$wire_flags" >&2
175175
exit 1

tests/e2e/cli-matrix/r-td-diskless-reaps-tiebreaker.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,11 @@ wire=$(linstor_r_l_json "$RD")
171171
n_wire=$(printf '%s' "$wire" | jq -r '.[][] | .name' 2>/dev/null | wc -l | tr -d ' ' || echo 0)
172172
if [[ "$n_wire" != "2" ]]; then
173173
echo "FAIL: linstor r l shows $n_wire rows for $RD, want 2" >&2
174-
printf '%s\n' "$wire" | jq '.[][]| {name, node_name, flags: .rsc_flags}' 2>/dev/null >&2 || true
174+
printf '%s\n' "$wire" | jq '.[][]| {name, node_name, flags: .flags}' 2>/dev/null >&2 || true
175175
exit 1
176176
fi
177177

178-
wire_flags=$(printf '%s' "$wire" | jq -r '.[][] | .rsc_flags // [] | join(",")' 2>/dev/null || echo "")
178+
wire_flags=$(printf '%s' "$wire" | jq -r '.[][] | .flags // [] | join(",")' 2>/dev/null || echo "")
179179
if [[ "$wire_flags" == *"TIE_BREAKER"* ]]; then
180180
echo "FAIL: a surviving row still carries TIE_BREAKER (flags=$wire_flags)" >&2
181181
exit 1

tests/e2e/cli-matrix/rd-d-deleting-surface.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,13 +135,13 @@ while (( $(date +%s) < deadline )); do
135135
# Wire-level assertion: the Resource view carries the DELETE flag for
136136
# the blocked replica. golinstor's `r l -o json` is a doubly-nested
137137
# array of per-replica rows; the resource-level flags live under
138-
# `rsc_flags` (see multi-volume-late-vd-create.sh / n-rst-recreates-
138+
# `flags` (see multi-volume-late-vd-create.sh / n-rst-recreates-
139139
# tiebreaker.sh for the same shape).
140140
rl_json=$("${LCTL[@]}" resource list --resources "$RD" -o json 2>/dev/null || echo '[]')
141141
has_delete=$(jq -r --arg n "$N2" '
142142
[ .[]?[]?
143143
| select(.node_name == $n)
144-
| (.rsc_flags // []) | index("DELETE") ] | any' <<<"$rl_json" 2>/dev/null || echo false)
144+
| (.flags // []) | index("DELETE") ] | any' <<<"$rl_json" 2>/dev/null || echo false)
145145
# Fallback: the human-readable State column rendered by the CLI.
146146
rl_txt=$("${LCTL[@]}" resource list --resources "$RD" 2>/dev/null || true)
147147
if [[ "$has_delete" == "true" ]] || grep -qiE 'DELETING' <<<"$rl_txt"; then

tests/e2e/cli-matrix/vd-d-prune-resource-volumes-no-flap.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ both_up=false
119119
while (( $(date +%s) < deadline )); do
120120
# 2 diskful replicas × 2 volumes = 4 disk_state strings, all UpToDate.
121121
states=$("${LCTL[@]}" --machine-readable resource list --resources "$RD" 2>/dev/null \
122-
| jq -r '[.[][]? | select((.rsc_flags//[]) | (map(. == "DISKLESS" or . == "TIE_BREAKER") | any | not)) | .vlms[]? | .state.disk_state // "Unknown"] | join(",")' \
122+
| jq -r '[.[][]? | select((.flags//[]) | (map(. == "DISKLESS" or . == "TIE_BREAKER") | any | not)) | .vlms[]? | .state.disk_state // "Unknown"] | join(",")' \
123123
2>/dev/null || echo "")
124124
count_uptodate=$(awk -F, '{ for (i=1;i<=NF;i++) if ($i=="UpToDate") n++ } END { print n+0 }' <<<"$states")
125125
if (( count_uptodate == 4 )); then

0 commit comments

Comments
 (0)