diff --git a/tests/e2e/cli-matrix/r-activate-deactivate-lifecycle.sh b/tests/e2e/cli-matrix/r-activate-deactivate-lifecycle.sh index 43c309a1..7ede426d 100755 --- a/tests/e2e/cli-matrix/r-activate-deactivate-lifecycle.sh +++ b/tests/e2e/cli-matrix/r-activate-deactivate-lifecycle.sh @@ -50,7 +50,7 @@ # # 2. IO during INACTIVE: # - Promote N1 to Primary if not already -# - Write ~64 MiB random pattern to /dev/drbd/by-res//0 +# - Write ~64 MiB random pattern to the RD's /dev/drbdN device # - Must complete without quorum block (single-replica quorum # override). Capture md5 of pattern. # @@ -153,9 +153,14 @@ wait_role "$RD" "$N1" "Primary" 30 \ # written BEFORE the first deactivate so both replicas hold the # same bytes and the GI baseline is established. echo ">> seed initial ${IO_MIB} MiB pattern on $N1 (will be the md5 anchor)" +# Resolve via `drbdadm sh-dev` (lib.sh resolve_drbd_device): the +# /dev/drbd/by-res symlink is not reliably present in the satellite +# mount namespace, so readlink-based resolution aborts on the stand. +# Last-resort minor enumeration kept for stands where sh-dev fails. +dev=$(resolve_drbd_device "$N1" "$RD" 0 2>/dev/null) || dev="" on_node "$N1" bash -c " set -e - dev=\$(readlink -f /dev/drbd/by-res/${RD}/0 2>/dev/null || true) + dev='$dev' if [ -z \"\$dev\" ]; then dev=\$(ls -1 /dev/drbd* 2>/dev/null | grep -vE 'by-(res|disk)' | head -1) fi @@ -289,9 +294,11 @@ for cycle in 1 2 3; do # the sole voter and DRBD's single-replica quorum override must # let writes through. A failure here = quorum frame is wrong on # INACTIVE peers. + # Same portable resolver as the seeding step (sh-dev, not by-res). + dev=$(resolve_drbd_device "$N1" "$RD" 0 2>/dev/null) || dev="" io_out=$(on_node "$N1" bash -c " set -e - dev=\$(readlink -f /dev/drbd/by-res/${RD}/0 2>/dev/null || true) + dev='$dev' if [ -z \"\$dev\" ]; then dev=\$(ls -1 /dev/drbd* 2>/dev/null | grep -vE 'by-(res|disk)' | head -1) fi @@ -317,8 +324,9 @@ for cycle in 1 2 3; do # — the deactivate path must not have corrupted it. (md5 of the # FIRST ${IO_MIB} MiB of the device, since the seed lives at # offset 0 and the new write was at offset ${IO_MIB} MiB.) + dev=$(resolve_drbd_device "$N1" "$RD" 0 2>/dev/null) || dev="" seed_md5_now=$(on_node "$N1" bash -c " - dev=\$(readlink -f /dev/drbd/by-res/${RD}/0 2>/dev/null || true) + dev='$dev' if [ -z \"\$dev\" ]; then dev=\$(ls -1 /dev/drbd* 2>/dev/null | grep -vE 'by-(res|disk)' | head -1) fi @@ -424,8 +432,9 @@ for cycle in 1 2 3; do # the pre-deact pattern. The partial-sync handshake must not # have written stale bytes onto the surviving replica's data. echo ">> md5 of seed on $N1 still matches pre-deact pattern" + dev=$(resolve_drbd_device "$N1" "$RD" 0 2>/dev/null) || dev="" seed_md5_after=$(on_node "$N1" bash -c " - dev=\$(readlink -f /dev/drbd/by-res/${RD}/0 2>/dev/null || true) + dev='$dev' if [ -z \"\$dev\" ]; then dev=\$(ls -1 /dev/drbd* 2>/dev/null | grep -vE 'by-(res|disk)' | head -1) fi diff --git a/tests/e2e/cli-matrix/r-c-over-tiebreaker-skip-sync.sh b/tests/e2e/cli-matrix/r-c-over-tiebreaker-skip-sync.sh index be9981d4..c963a5d1 100755 --- a/tests/e2e/cli-matrix/r-c-over-tiebreaker-skip-sync.sh +++ b/tests/e2e/cli-matrix/r-c-over-tiebreaker-skip-sync.sh @@ -224,7 +224,11 @@ fi # Write 32 MiB; secondary will need to catch up. With skip-sync # the catch-up is fast but still passes through SyncSource on # the source side per upstream events2 semantics. -on_node "$prim" bash -c "dd if=/dev/urandom of=/dev/drbd/by-res/$RD/0 bs=1M count=32 status=none oflag=direct 2>/dev/null" || true +# Resolve via `drbdadm sh-dev` (lib.sh resolve_drbd_device): the +# /dev/drbd/by-res symlink is not reliably present in the satellite +# mount namespace, so the by-res dd silently no-ops on the stand. +dev=$(resolve_drbd_device "$prim" "$RD" 0 2>/dev/null) || dev="" +[ -n "$dev" ] && on_node "$prim" bash -c "dd if=/dev/urandom of=$dev bs=1M count=32 status=none oflag=direct 2>/dev/null" || true # Capture wire-shape for ~10s post-mutation. shape_ok=false diff --git a/tests/e2e/cli-matrix/r-d-last-uptodate-midsync-rejected.sh b/tests/e2e/cli-matrix/r-d-last-uptodate-midsync-rejected.sh index f7443c85..aeb6bc93 100755 --- a/tests/e2e/cli-matrix/r-d-last-uptodate-midsync-rejected.sh +++ b/tests/e2e/cli-matrix/r-d-last-uptodate-midsync-rejected.sh @@ -82,7 +82,11 @@ echo ">> Phase 2: write data on $N1 so the second replica must really resync" # Primary + dd a chunk to bump the GI; a fresh empty volume could # skip-sync and erase the SyncTarget window. on_node "$N1" bash -c "drbdadm primary --force $RD 2>/dev/null" || true -on_node "$N1" bash -c "dd if=/dev/urandom of=/dev/drbd/by-res/$RD/0 bs=1M count=256 status=none oflag=direct 2>/dev/null" || true +# Resolve via `drbdadm sh-dev` (lib.sh resolve_drbd_device): the +# /dev/drbd/by-res symlink is not reliably present in the satellite +# mount namespace, so the by-res dd silently no-ops on the stand. +dev=$(resolve_drbd_device "$N1" "$RD" 0 2>/dev/null) || dev="" +[ -n "$dev" ] && on_node "$N1" bash -c "dd if=/dev/urandom of=$dev bs=1M count=256 status=none oflag=direct 2>/dev/null" || true on_node "$N1" bash -c "drbdadm secondary $RD 2>/dev/null" || true echo ">> Phase 3: throttle resync (c-max-rate 1024 KiB/s) so the add stays SyncTarget" diff --git a/tests/e2e/cli-matrix/snap-create-multiple-group-consistency.sh b/tests/e2e/cli-matrix/snap-create-multiple-group-consistency.sh index 1e5d62f3..070b7e79 100755 --- a/tests/e2e/cli-matrix/snap-create-multiple-group-consistency.sh +++ b/tests/e2e/cli-matrix/snap-create-multiple-group-consistency.sh @@ -114,9 +114,14 @@ on_node "$N1" drbdadm primary --force "$RD_B" 2>/dev/null || true # different counter values whenever the writer made progress # between the two per-RD snap calls. echo ">> start cross-RD correlated writer on $N1 (counter into rd-a + rd-b)" +# Resolve via `drbdadm sh-dev` (lib.sh resolve_drbd_device): the +# /dev/drbd/by-res symlink is not reliably present in the satellite +# mount namespace, so readlink-based resolution aborts on the stand. +dev_a=$(resolve_drbd_device "$N1" "$RD_A" 0 2>/dev/null) || dev_a="" +dev_b=$(resolve_drbd_device "$N1" "$RD_B" 0 2>/dev/null) || dev_b="" on_node "$N1" bash -c " - dev_a=\$(readlink -f /dev/drbd/by-res/$RD_A/0 2>/dev/null || true) - dev_b=\$(readlink -f /dev/drbd/by-res/$RD_B/0 2>/dev/null || true) + dev_a='$dev_a' + dev_b='$dev_b' if [ -z \"\$dev_a\" ] || [ -z \"\$dev_b\" ]; then echo 'note: could not resolve drbd device paths' exit 0 diff --git a/tests/e2e/cli-matrix/snap-create-multiple-lifecycle.sh b/tests/e2e/cli-matrix/snap-create-multiple-lifecycle.sh index 7f07668c..c82dc252 100755 --- a/tests/e2e/cli-matrix/snap-create-multiple-lifecycle.sh +++ b/tests/e2e/cli-matrix/snap-create-multiple-lifecycle.sh @@ -119,10 +119,16 @@ done # Phase 2: cross-RD correlated writer (counter into bytes 0-7 of all 3) # ===================================================================== echo ">> Phase 2: start cross-RD correlated writer on $N1" +# Resolve via `drbdadm sh-dev` (lib.sh resolve_drbd_device): the +# /dev/drbd/by-res symlink is not reliably present in the satellite +# mount namespace, so readlink-based resolution aborts on the stand. +dev_a=$(resolve_drbd_device "$N1" "$RD_A" 0 2>/dev/null) || dev_a="" +dev_b=$(resolve_drbd_device "$N1" "$RD_B" 0 2>/dev/null) || dev_b="" +dev_c=$(resolve_drbd_device "$N1" "$RD_C" 0 2>/dev/null) || dev_c="" on_node "$N1" bash -c " - dev_a=\$(readlink -f /dev/drbd/by-res/$RD_A/0 2>/dev/null || true) - dev_b=\$(readlink -f /dev/drbd/by-res/$RD_B/0 2>/dev/null || true) - dev_c=\$(readlink -f /dev/drbd/by-res/$RD_C/0 2>/dev/null || true) + dev_a='$dev_a' + dev_b='$dev_b' + dev_c='$dev_c' if [ -z \"\$dev_a\" ] || [ -z \"\$dev_b\" ] || [ -z \"\$dev_c\" ]; then echo 'note: could not resolve all 3 drbd device paths' exit 0 diff --git a/tests/e2e/cli-matrix/snap-cross-node-consistency.sh b/tests/e2e/cli-matrix/snap-cross-node-consistency.sh index 5dc4e52f..aa6c281c 100755 --- a/tests/e2e/cli-matrix/snap-cross-node-consistency.sh +++ b/tests/e2e/cli-matrix/snap-cross-node-consistency.sh @@ -91,9 +91,14 @@ on_node "$N1" drbdadm primary --force "$RD" 2>/dev/null || true # capture (one snap took data at byte N, the other at byte N+delta) # yields visibly different md5. echo ">> seed deterministic 256 MiB pattern on $N1's DRBD device" +# Resolve via `drbdadm sh-dev` (lib.sh resolve_drbd_device): the +# /dev/drbd/by-res symlink is not reliably present in the satellite +# mount namespace, so readlink-based resolution aborts on the stand. +# Last-resort minor enumeration kept for stands where sh-dev fails. +dev=$(resolve_drbd_device "$N1" "$RD" 0 2>/dev/null) || dev="" on_node "$N1" bash -c " set -e - dev=\$(readlink -f /dev/drbd/by-res/$RD/0 2>/dev/null || true) + dev='$dev' if [ -z \"\$dev\" ]; then dev=\$(ls -1 /dev/drbd* 2>/dev/null | grep -vE 'by-(res|disk)' | head -1) fi @@ -111,8 +116,10 @@ wait_uptodate "$RD" "$N1" "$N2" # AFTER replica $N1 already finished its snapshot — and the two # resulting snapshots reflect that delta. echo ">> start continuous writer on $N1 (urandom → DRBD device)" +dev=$(resolve_drbd_device "$N1" "$RD" 0 2>/dev/null) || dev="" on_node "$N1" bash -c " - dev=\$(readlink -f /dev/drbd/by-res/$RD/0 2>/dev/null || ls -1 /dev/drbd* 2>/dev/null | grep -vE 'by-(res|disk)' | head -1) + dev='$dev' + [ -n \"\$dev\" ] || dev=\$(ls -1 /dev/drbd* 2>/dev/null | grep -vE 'by-(res|disk)' | head -1) while true; do dd if=/dev/urandom of=\$dev bs=4K count=128 oflag=direct status=none 2>/dev/null || break done >/tmp/cli-matrix-snap-writer.log 2>&1 & diff --git a/tests/e2e/cli-matrix/snap-full-lifecycle.sh b/tests/e2e/cli-matrix/snap-full-lifecycle.sh index 8fac2fd0..ab767f3e 100755 --- a/tests/e2e/cli-matrix/snap-full-lifecycle.sh +++ b/tests/e2e/cli-matrix/snap-full-lifecycle.sh @@ -115,9 +115,14 @@ on_node "$N1" drbdadm primary --force "$RD" 2>/dev/null || true # We therefore read the device's actual byte size and feed dd via # count_bytes so the write stops exactly at the DRBD boundary. echo ">> Phase 2: seed deterministic random pattern on $N1 (DRBD-fit bytes), then start writer" +# Resolve via `drbdadm sh-dev` (lib.sh resolve_drbd_device): the +# /dev/drbd/by-res symlink is not reliably present in the satellite +# mount namespace, so readlink-based resolution aborts on the stand. +# Last-resort minor enumeration kept for stands where sh-dev fails. +dev=$(resolve_drbd_device "$N1" "$RD" 0 2>/dev/null) || dev="" seed_out=$(on_node "$N1" bash -c " set -e - dev=\$(readlink -f /dev/drbd/by-res/$RD/0 2>/dev/null || true) + dev='$dev' if [ -z \"\$dev\" ]; then dev=\$(ls -1 /dev/drbd* 2>/dev/null | grep -vE 'by-(res|disk)' | head -1) fi @@ -136,8 +141,10 @@ echo "$seed_out" wait_uptodate "$RD" "$N1" "$N2" echo ">> Phase 2: start continuous writer on $N1" +dev=$(resolve_drbd_device "$N1" "$RD" 0 2>/dev/null) || dev="" on_node "$N1" bash -c " - dev=\$(readlink -f /dev/drbd/by-res/$RD/0 2>/dev/null || ls -1 /dev/drbd* 2>/dev/null | grep -vE 'by-(res|disk)' | head -1) + dev='$dev' + [ -n \"\$dev\" ] || dev=\$(ls -1 /dev/drbd* 2>/dev/null | grep -vE 'by-(res|disk)' | head -1) while true; do dd if=/dev/urandom of=\$dev bs=4K count=128 oflag=direct status=none 2>/dev/null || break done >/tmp/cli-matrix-snap-lifecycle-writer.log 2>&1 & diff --git a/tests/e2e/cli-matrix/snap-r-rst-stamps-resources.sh b/tests/e2e/cli-matrix/snap-r-rst-stamps-resources.sh index cfa1c24e..9a612bea 100755 --- a/tests/e2e/cli-matrix/snap-r-rst-stamps-resources.sh +++ b/tests/e2e/cli-matrix/snap-r-rst-stamps-resources.sh @@ -101,8 +101,12 @@ wait_uptodate "$SRC" "$N1" "$N2" # call and asserts Resource CRDs are stamped). echo ">> seed deterministic pattern on $N1 $SRC" on_node "$N1" drbdadm primary --force "$SRC" 2>/dev/null || true +# Resolve via `drbdadm sh-dev` (lib.sh resolve_drbd_device): the +# /dev/drbd/by-res symlink is not reliably present in the satellite +# mount namespace, so readlink-based resolution aborts on the stand. +dev=$(resolve_drbd_device "$N1" "$SRC" 0 2>/dev/null) || dev="" on_node "$N1" bash -c " - dev=\$(readlink -f /dev/drbd/by-res/$SRC/0 2>/dev/null || true) + dev='$dev' if [ -n \"\$dev\" ]; then printf 'BLOCKSTOR-BUG354-MARKER' | dd of=\"\$dev\" bs=1 count=24 conv=fsync status=none fi @@ -212,10 +216,13 @@ fi # ---- Bonus: read the marker on the restored replica ---------------------- echo ">> bonus assert: marker bytes restored from snapshot on $N1 $TGT" +# Same portable resolver as the seeding step: by-res symlinks are +# not reliably present in the satellite mount namespace. +dev=$(resolve_drbd_device "$N1" "$TGT" 0 2>/dev/null) || dev="" marker_read=$(on_node "$N1" bash -c " on_node_drbdadm() { drbdadm primary --force \$1 2>/dev/null; } on_node_drbdadm $TGT - dev=\$(readlink -f /dev/drbd/by-res/$TGT/0 2>/dev/null || true) + dev='$dev' if [ -n \"\$dev\" ]; then head -c 24 \"\$dev\" 2>/dev/null fi diff --git a/tests/e2e/cli-matrix/snap-suspend-resume-isolation-u138-u52.sh b/tests/e2e/cli-matrix/snap-suspend-resume-isolation-u138-u52.sh index fecc8327..7df2e60c 100755 --- a/tests/e2e/cli-matrix/snap-suspend-resume-isolation-u138-u52.sh +++ b/tests/e2e/cli-matrix/snap-suspend-resume-isolation-u138-u52.sh @@ -112,10 +112,14 @@ snap_ready() { # timeout. A still-suspended device makes dd block; `timeout` then kills # it and we FAIL — that is the U138 outage signal. write_survives() { - local rd=$1 + local rd=$1 dev on_node "$N1" drbdadm primary --force "$rd" 2>/dev/null || true + # Resolve via `drbdadm sh-dev` (lib.sh resolve_drbd_device): the + # /dev/drbd/by-res symlink is not reliably present in the satellite + # mount namespace, so readlink-based resolution aborts on the stand. + dev=$(resolve_drbd_device "$N1" "$rd" 0 2>/dev/null) || dev="" if ! on_node "$N1" bash -c " - dev=\$(readlink -f /dev/drbd/by-res/$rd/0 2>/dev/null || true) + dev='$dev' [ -z \"\$dev\" ] && { echo 'no drbd device node for $rd' >&2; exit 2; } timeout 20 dd if=/dev/zero of=\"\$dev\" bs=4096 count=16 oflag=direct conv=fsync status=none "; then