|
| 1 | +#!/usr/bin/env bash |
| 2 | +# |
| 3 | +# usage: encryption-passphrase-luks-rd.sh WORK_DIR |
| 4 | +# |
| 5 | +# L6 cli-matrix cell — Bug 023 (fix: encryption create-passphrase |
| 6 | +# unlocks LUKS provisioning). |
| 7 | +# |
| 8 | +# Audit gap: `linstor encryption create-passphrase` stored the cluster |
| 9 | +# master passphrase in the blockstor-cluster-passphrase Secret, but |
| 10 | +# nothing downstream read it: |
| 11 | +# - the LUKS RD-create gate only consulted the legacy |
| 12 | +# DrbdOptions/EncryptPassphrase controller property, so the |
| 13 | +# upstream-standard flow (create-passphrase → rd c -l |
| 14 | +# drbd,luks,storage) was rejected with "LUKS layer requires |
| 15 | +# DrbdOptions/EncryptPassphrase to be set first" — and the hint |
| 16 | +# told operators to store a PLAINTEXT passphrase in a controller |
| 17 | +# prop; |
| 18 | +# - the satellite lifted the LUKS key onto the LuksPassphrase wire |
| 19 | +# prop only from controller-scope props, so a Secret-only cluster |
| 20 | +# looped on "LUKS in layer stack but Props.LuksPassphrase empty" |
| 21 | +# at apply time. |
| 22 | +# |
| 23 | +# Post-fix contract (pinned here): the Secret set by `encryption |
| 24 | +# create-passphrase` is the PRIMARY, upstream-parity key source — the |
| 25 | +# whole LUKS lifecycle must work WITHOUT the legacy controller prop |
| 26 | +# ever being set. The sibling cells (luks-rd-create-encrypted.sh, |
| 27 | +# luks-clone-encrypted.sh, replay/luks-encrypted-rd.yaml) still set |
| 28 | +# the legacy prop and keep covering the deprecated path. |
| 29 | +# |
| 30 | +# Flow + assertions: |
| 31 | +# 1. cleanup_encryption_state → known-clean baseline (no Secret, no |
| 32 | +# legacy prop). |
| 33 | +# 2. linstor encryption create-passphrase --passphrase <pw> → exit 0. |
| 34 | +# 3. legacy prop ABSENT on `controller list-properties` (and stays |
| 35 | +# absent through the whole cell — provisioning must not depend on |
| 36 | +# anything writing it behind our back). |
| 37 | +# 4. rd c -l drbd,luks,storage → exit 0 (pre-fix: rejected). |
| 38 | +# 5. vd c + r c --auto-place=2 → both diskful replicas UpToDate. |
| 39 | +# 6. kernel-level proof on EACH replica: backing LV/zvol carries a |
| 40 | +# real LUKS header AND the cluster passphrase opens it |
| 41 | +# (cryptsetup --test-passphrase) — the Secret value travelled the |
| 42 | +# satellite channel to luksFormat, not just past the REST gate. |
| 43 | +# |
| 44 | +# Unit pins: pkg/rest/luks_gate_bug023_test.go, |
| 45 | +# pkg/satellite/controllers/luks_passphrase_internal_test.go. This |
| 46 | +# cell is the stand-side companion: real python-linstor → apiserver → |
| 47 | +# satellite → cryptsetup. |
| 48 | + |
| 49 | +set -euo pipefail |
| 50 | + |
| 51 | +WORK_DIR=${1:?work_dir required} |
| 52 | +export KUBECONFIG="$WORK_DIR/kubeconfig" |
| 53 | + |
| 54 | +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) |
| 55 | +# shellcheck source=lib.sh |
| 56 | +source "$SCRIPT_DIR/lib.sh" |
| 57 | + |
| 58 | +require_workers 2 |
| 59 | + |
| 60 | +linstor_cli_setup |
| 61 | + |
| 62 | +RD=cli-matrix-023-pp-luks |
| 63 | +POOL=${POOL:-lvm-thin} |
| 64 | +PASSPHRASE='cli-matrix-023-secret-pp!' |
| 65 | + |
| 66 | +cleanup() { |
| 67 | + delete_rd "$RD" |
| 68 | + assert_no_orphans "$RD" |
| 69 | + cleanup_encryption_state |
| 70 | + linstor_cli_teardown |
| 71 | +} |
| 72 | +trap cleanup EXIT |
| 73 | + |
| 74 | +# assert_legacy_prop_absent <phase> — the Bug 023 core invariant: the |
| 75 | +# deprecated DrbdOptions/EncryptPassphrase controller property must |
| 76 | +# never appear during the Secret-only flow. Checked via the same |
| 77 | +# machine-readable list-properties surface the python CLI renders. |
| 78 | +assert_legacy_prop_absent() { |
| 79 | + local phase=$1 |
| 80 | + local present |
| 81 | + present=$("${LCTL[@]}" --machine-readable controller list-properties 2>/dev/null \ |
| 82 | + | jq -r '[.. | objects | select(.key == "DrbdOptions/EncryptPassphrase")] | length' \ |
| 83 | + 2>/dev/null || echo 0) |
| 84 | + if [[ "$present" != "0" ]]; then |
| 85 | + echo "FAIL (Bug 023): legacy DrbdOptions/EncryptPassphrase controller prop present ($phase)" >&2 |
| 86 | + echo " the Secret-only flow must not set or require it" >&2 |
| 87 | + exit 1 |
| 88 | + fi |
| 89 | +} |
| 90 | + |
| 91 | +echo ">> [Bug 023] pre-flight: 2 healthy $POOL SPs" |
| 92 | +sp_json=$("${LCTL[@]}" --machine-readable storage-pool list --storage-pools "$POOL" 2>/dev/null || echo "[]") |
| 93 | +ok_nodes=$(jq -r '[.[]? | .[]? | select(.provider_kind != null) | .node_name] | unique | length' <<<"$sp_json" 2>/dev/null || echo 0) |
| 94 | +if (( ok_nodes < 2 )); then |
| 95 | + echo "SKIP: $POOL SP not on >=2 nodes (got $ok_nodes) — encrypted-RD autoplace fixture unavailable" |
| 96 | + exit 0 |
| 97 | +fi |
| 98 | + |
| 99 | +# Known-clean baseline: no passphrase Secret, no legacy controller |
| 100 | +# prop. Without this the create-passphrase below answers "already |
| 101 | +# set" and the cell would silently test the wrong (modify) path. |
| 102 | +cleanup_encryption_state |
| 103 | + |
| 104 | +echo ">> [Bug 023] linstor encryption create-passphrase (Secret-only flow)" |
| 105 | +err_file=$(mktemp) |
| 106 | +if ! "${LCTL[@]}" encryption create-passphrase --passphrase "$PASSPHRASE" 2>"$err_file"; then |
| 107 | + rc=$? |
| 108 | + echo "FAIL (Bug 023): create-passphrase exited $rc" >&2 |
| 109 | + cat "$err_file" >&2 |
| 110 | + rm -f "$err_file" |
| 111 | + exit 1 |
| 112 | +fi |
| 113 | +rm -f "$err_file" |
| 114 | + |
| 115 | +echo ">> [Bug 023] legacy DrbdOptions/EncryptPassphrase prop is ABSENT" |
| 116 | +assert_legacy_prop_absent "after create-passphrase" |
| 117 | + |
| 118 | +echo ">> [Bug 023] linstor rd c $RD -l drbd,luks,storage (no legacy prop set)" |
| 119 | +err_file=$(mktemp) |
| 120 | +if ! "${LCTL[@]}" resource-definition create "$RD" \ |
| 121 | + --layer-list drbd,luks,storage 2>"$err_file"; then |
| 122 | + rc=$? |
| 123 | + echo "FAIL (Bug 023): rd create rejected (exit $rc) — Secret-backed passphrase not accepted by the LUKS gate?" >&2 |
| 124 | + cat "$err_file" >&2 |
| 125 | + rm -f "$err_file" |
| 126 | + exit 1 |
| 127 | +fi |
| 128 | +rm -f "$err_file" |
| 129 | + |
| 130 | +echo ">> [Bug 023] linstor vd c $RD 128M" |
| 131 | +"${LCTL[@]}" volume-definition create "$RD" 128M >/dev/null |
| 132 | + |
| 133 | +echo ">> [Bug 023] linstor r c $RD --auto-place=2 -s $POOL" |
| 134 | +err_file=$(mktemp) |
| 135 | +if ! "${LCTL[@]}" resource create --auto-place=2 --storage-pool="$POOL" "$RD" 2>"$err_file"; then |
| 136 | + rc=$? |
| 137 | + echo "FAIL (Bug 023): encrypted auto-place=2 exited $rc" >&2 |
| 138 | + cat "$err_file" >&2 |
| 139 | + rm -f "$err_file" |
| 140 | + exit 1 |
| 141 | +fi |
| 142 | +rm -f "$err_file" |
| 143 | + |
| 144 | +echo ">> [Bug 023] wait for 2 diskful Resource CRDs to land" |
| 145 | +# auto-place=2 may add a DISKLESS TIE_BREAKER witness on top of the 2 |
| 146 | +# diskful replicas — count diskful only (same convention as the other |
| 147 | +# autoplace cells) so the luksDump checks never target a backing-less |
| 148 | +# witness. |
| 149 | +deadline=$(( $(date +%s) + 60 )) |
| 150 | +placed_nodes=() |
| 151 | +while (( $(date +%s) < deadline )); do |
| 152 | + mapfile -t placed_nodes < <(linstor_diskful_nodes "$RD") |
| 153 | + if (( ${#placed_nodes[@]} == 2 )); then |
| 154 | + break |
| 155 | + fi |
| 156 | + sleep 2 |
| 157 | +done |
| 158 | +if (( ${#placed_nodes[@]} != 2 )); then |
| 159 | + echo "FAIL (Bug 023): autoplace did not stage 2 diskful Resource CRDs within 60s (got ${#placed_nodes[@]})" >&2 |
| 160 | + echo " all replicas: $(linstor_replica_count "$RD"), tiebreaker: $(linstor_tiebreaker_node "$RD")" >&2 |
| 161 | + exit 1 |
| 162 | +fi |
| 163 | +echo " placed (diskful) on: ${placed_nodes[*]}" |
| 164 | + |
| 165 | +N1="${placed_nodes[0]}" |
| 166 | +N2="${placed_nodes[1]}" |
| 167 | + |
| 168 | +echo ">> [Bug 023] wait both replicas UpToDate (Secret-fed luksFormat ran)" |
| 169 | +# Pre-fix failure mode for a gate-only patch: rd-create passes but the |
| 170 | +# satellite loops on "LUKS in layer stack but Props.LuksPassphrase |
| 171 | +# empty" and the replicas never converge. UpToDate within the bound is |
| 172 | +# the proof the Secret reached the satellite channel. |
| 173 | +wait_uptodate "$RD" "$N1" "$N2" |
| 174 | + |
| 175 | +echo ">> [Bug 023] legacy prop STILL absent after provisioning" |
| 176 | +assert_legacy_prop_absent "after provisioning" |
| 177 | + |
| 178 | +echo ">> [Bug 023] LUKS header present + Secret passphrase opens it on EACH replica" |
| 179 | +for node in "$N1" "$N2"; do |
| 180 | + backing=$(luks_backing_device "$RD" "$node" 0) |
| 181 | + if [[ -z "$backing" ]]; then |
| 182 | + echo "FAIL (Bug 023): could not resolve backing device for $RD on $node" >&2 |
| 183 | + exit 1 |
| 184 | + fi |
| 185 | + echo " $node: backing=$backing" |
| 186 | + if ! wait_luks_header_present "$node" "$backing" 60; then |
| 187 | + echo "FAIL (Bug 023): no LUKS header on $node:$backing" >&2 |
| 188 | + exit 1 |
| 189 | + fi |
| 190 | + if ! assert_luks_passphrase_opens "$node" "$backing" "$PASSPHRASE"; then |
| 191 | + echo "FAIL (Bug 023): Secret-backed passphrase does not unlock $node:$backing" >&2 |
| 192 | + exit 1 |
| 193 | + fi |
| 194 | +done |
| 195 | + |
| 196 | +echo ">> encryption-passphrase-luks-rd OK (Bug 023: Secret-only passphrase provisions LUKS end-to-end, no legacy prop)" |
0 commit comments