Skip to content

Commit 9d03303

Browse files
committed
root-hashes: simplify root detection and unsupported-layout UX
Compared to HEAD^, this commit updates initrd root-hash probing in: - initrd/bin/root-hashes-gui.sh - initrd/etc/functions Behavior expected to work: - Root-hash create/verify flow on latest Ubuntu, Debian, and PureOS under KVM. - LUKS/LVM root probing based on mountability + expected root directory checks. - Clear unsupported-layout whiptail guidance for unsupported filesystem/layout combinations. Current status and non-goals: - Fedora and QubesOS are untested in this change set. - QubesOS on coreboot q35 with Heads still does not support qemu/kvm; no regression is implied. Signed-off-by: Thierry Laurion <insurgo@riseup.net>
1 parent c37f96d commit 9d03303

File tree

2 files changed

+163
-23
lines changed

2 files changed

+163
-23
lines changed

initrd/bin/root-hashes-gui.sh

Lines changed: 137 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,29 @@ set -e -o pipefail
55
CONFIG_ROOT_DIRLIST="bin boot lib sbin usr"
66
HASH_FILE="/boot/kexec_root_hashes.txt"
77
ROOT_MOUNT="/root"
8+
ROOT_DETECT_UNSUPPORTED_REASON=""
9+
ROOT_SUPPORTED_LAYOUT_MSG="Filesystem support in this build:\n- ext4 (ext2/ext3 compatible)\n- xfs\n\nSupported root layouts:\n- LUKS + ext4/ext3/ext2 or xfs\n- LUKS+LVM + ext4/ext3/ext2 or xfs\n\nNot supported:\n- btrfs"
810

911
. /etc/functions
1012
. /etc/gui_functions
1113
. /tmp/config
1214

1315
export CONFIG_ROOT_DIRLIST_PRETTY=$(echo $CONFIG_ROOT_DIRLIST | sed -e 's/^/\//;s/ / \//g')
1416

17+
show_unsupported_root_layout_and_die() {
18+
local ACTION="$1"
19+
20+
whiptail_error --title 'ERROR: Unsupported Root Layout' \
21+
--msgbox "$ROOT_DETECT_UNSUPPORTED_REASON\n\n$ROOT_SUPPORTED_LAYOUT_MSG\n\nTry a supported root layout,\nor do not use root hashing,\nthen rerun $ACTION." 0 80
22+
die "$ROOT_DETECT_UNSUPPORTED_REASON"
23+
}
24+
1525
update_root_checksums() {
26+
TRACE_FUNC
1627
if ! detect_root_device; then
28+
if [ -n "$ROOT_DETECT_UNSUPPORTED_REASON" ]; then
29+
show_unsupported_root_layout_and_die "root hash update"
30+
fi
1731
whiptail_error --title 'ERROR: No Valid Root Disk Found' \
1832
--msgbox "No Valid Root Disk Found" 0 80
1933
die "No Valid Root Disk Found"
@@ -31,6 +45,7 @@ update_root_checksums() {
3145
mount -o rw,remount /boot
3246
fi
3347

48+
DEBUG "calculating hashes for $CONFIG_ROOT_DIRLIST_PRETTY on $ROOT_MOUNT"
3449
echo "+++ Calculating hashes for all files in $CONFIG_ROOT_DIRLIST_PRETTY "
3550
# Intentional wordsplit
3651
# shellcheck disable=SC2086
@@ -47,7 +62,12 @@ update_root_checksums() {
4762
unmount_root_device
4863
}
4964
check_root_checksums() {
65+
TRACE_FUNC
66+
DEBUG "verifying existing hash file for $CONFIG_ROOT_DIRLIST_PRETTY"
5067
if ! detect_root_device; then
68+
if [ -n "$ROOT_DETECT_UNSUPPORTED_REASON" ]; then
69+
show_unsupported_root_layout_and_die "root hash verification"
70+
fi
5171
whiptail_error --title 'ERROR: No Valid Root Disk Found' \
5272
--msgbox "No Valid Root Disk Found" 0 80
5373
die "No Valid Root Disk Found"
@@ -74,6 +94,7 @@ check_root_checksums() {
7494
update_root_checksums
7595
return 0
7696
else
97+
DEBUG "Root hash file not created (user declined)"
7798
exit 1
7899
fi
79100
fi
@@ -124,6 +145,7 @@ check_root_checksums() {
124145

125146
return 0
126147
else
148+
DEBUG "Signatures not updated (user declined after new-files warning)"
127149
return 1
128150
fi
129151
fi
@@ -154,6 +176,7 @@ check_root_checksums() {
154176
update_root_checksums
155177
return 0
156178
else
179+
DEBUG "Signatures not updated (user declined after hash-check failure)"
157180
return 1
158181
fi
159182
fi
@@ -164,21 +187,69 @@ check_root_checksums() {
164187
open_block_device_lvm() {
165188
TRACE_FUNC
166189
local VG="$1"
190+
local LV MAPPER_VG MAPPER_LV name lvpath FIRST_LV_PREFERRED FIRST_LV_FALLBACK
167191

168192
if ! lvm vgchange -ay "$VG"; then
169193
DEBUG "Can't open LVM VG: $VG"
170194
return 1
171195
fi
172196

173-
# Use the LV 'root'. This is the default name used by Qubes. There's no
174-
# way to configure this at the moment.
175-
if ! [ -e "/dev/mapper/$VG-root" ]; then
176-
DEBUG "LVM volume group does not have 'root' logical volume"
197+
# Prefer an LV named 'root' (used by Qubes), but fall back to any LV
198+
# in the VG. This ensures Ubuntu-style names (e.g. ubuntu-vg/ubuntu-root)
199+
# also work.
200+
LV="/dev/$VG/root"
201+
if ! [ -e "$LV" ]; then
202+
MAPPER_VG="${VG//-/--}"
203+
LV="/dev/mapper/${MAPPER_VG}-root"
204+
fi
205+
if ! [ -e "$LV" ]; then
206+
FIRST_LV_PREFERRED=""
207+
FIRST_LV_FALLBACK=""
208+
DEBUG "LVM VG $VG has no 'root' LV, enumerating all LVs"
209+
# list LV names and prefer root-like names
210+
for name in $(lvm lvs --noheadings -o lv_name --separator ' ' "$VG" 2>/dev/null); do
211+
# thin pool/metadata and swap-like LVs are not root filesystems
212+
case "$name" in
213+
*pool*|*tmeta*|*tdata*|*tpool*|swap*)
214+
DEBUG "skipping LV name $name (not a root LV candidate)"
215+
continue
216+
;;
217+
esac
218+
219+
lvpath="/dev/$VG/$name"
220+
if ! [ -e "$lvpath" ]; then
221+
MAPPER_LV="${name//-/--}"
222+
lvpath="/dev/mapper/${VG//-/--}-${MAPPER_LV}"
223+
fi
224+
if [ -e "$lvpath" ]; then
225+
case "$name" in
226+
root|dom0|dom0-root|qubes_dom0|qubes_dom0-root|*dom0*root*|*root*)
227+
[ -n "$FIRST_LV_PREFERRED" ] || FIRST_LV_PREFERRED="$lvpath"
228+
DEBUG "preferred LV candidate $lvpath (name $name)"
229+
;;
230+
*)
231+
[ -n "$FIRST_LV_FALLBACK" ] || FIRST_LV_FALLBACK="$lvpath"
232+
;;
233+
esac
234+
fi
235+
done
236+
237+
if [ -n "$FIRST_LV_PREFERRED" ]; then
238+
DEBUG "selecting preferred LV $FIRST_LV_PREFERRED in VG $VG"
239+
LV="$FIRST_LV_PREFERRED"
240+
elif [ -n "$FIRST_LV_FALLBACK" ]; then
241+
DEBUG "falling back to first mountable LV $FIRST_LV_FALLBACK in VG $VG"
242+
LV="$FIRST_LV_FALLBACK"
243+
else
244+
LV=""
245+
fi
246+
fi
247+
if ! [ -e "$LV" ]; then
248+
DEBUG "no usable LV found in VG $VG"
177249
return 1
178250
fi
179-
180-
# Use the root LV now
181-
open_block_device_layers "/dev/mapper/$VG-root"
251+
# Use selected LV
252+
open_block_device_layers "$LV"
182253
}
183254

184255
# Open a LUKS device, then continue looking for more layers.
@@ -195,6 +266,15 @@ open_block_device_luks() {
195266
return 1
196267
fi
197268

269+
# Inform LVM about any new physical volume inside this decrypted container.
270+
# Some distributions (Fedora) require a vgscan before LVM will create nodes
271+
# under /dev/mapper, otherwise our later search won't see the logical
272+
# volumes. This is harmless on systems without lvm installed.
273+
if command -v lvm >/dev/null 2>&1; then
274+
DEBUG "running vgscan to populate /dev/mapper after unlocking LUKS"
275+
lvm vgscan --mknodes >/dev/null 2>&1 || true
276+
fi
277+
198278
open_block_device_layers "/dev/mapper/$LUKSDEV"
199279
}
200280

@@ -241,14 +321,28 @@ open_block_device_layers() {
241321
open_root_device_no_clean_up() {
242322
TRACE_FUNC
243323
local DEVICE="$1"
244-
local FS_DEVICE
324+
local FS_DEVICE BLKID_OUT
245325

246326
# Open LUKS/LVM and get the name of the block device that should contain the
247327
# filesystem. If there are no LUKS/LVM layers, FS_DEVICE is just DEVICE.
248328
FS_DEVICE="$(open_block_device_layers "$DEVICE")" || return 1
249329

330+
# Keep detection minimal for initrd: only require blkid to return some
331+
# metadata before mount probing. TYPE is often unavailable in this initrd.
332+
BLKID_OUT="$(blkid "$FS_DEVICE" 2>/dev/null || true)"
333+
DEBUG "blkid output for $FS_DEVICE: $BLKID_OUT"
334+
335+
# If blkid reports nothing at all, this is likely not a filesystem-bearing
336+
# partition. Skip mount probing to avoid noisy kernel probe logs.
337+
if [ -z "$BLKID_OUT" ]; then
338+
ROOT_DETECT_UNSUPPORTED_REASON="Found partition/layer with no recognizable filesystem metadata."
339+
DEBUG "Skipping $FS_DEVICE: blkid returned no filesystem metadata"
340+
return 1
341+
fi
342+
250343
# Mount the device
251344
if ! mount -o ro "$FS_DEVICE" "$ROOT_MOUNT" &>/dev/null; then
345+
ROOT_DETECT_UNSUPPORTED_REASON="Found partition/layer on $FS_DEVICE but it could not be mounted as root by this root-hash flow."
252346
DEBUG "Can't mount filesystem on $FS_DEVICE from $DEVICE"
253347
return 1
254348
fi
@@ -269,14 +363,8 @@ open_root_device_no_clean_up() {
269363
close_block_device_lvm() {
270364
TRACE_FUNC
271365
local VG="$1"
272-
273-
# We always use the LV 'root' currently
274-
local LV="/dev/mapper/$VG-root"
275-
if [ -e "$LV" ]; then
276-
close_block_device_layers "$LV"
277-
fi
278-
279-
# The LVM VG might be open even if no 'root' LV exists, still try to close it.
366+
# Deactivate the VG directly. This avoids recursive LV close probing noise
367+
# for LV paths that are not PVs and matches the minimal initrd workflow.
280368
lvm vgchange -an "$VG" || \
281369
DEBUG "Can't close LVM VG: $VG"
282370
}
@@ -325,7 +413,7 @@ close_block_device_layers() {
325413
open_root_device() {
326414
TRACE_FUNC
327415
if ! open_root_device_no_clean_up "$1"; then
328-
unmount_root_device
416+
close_root_device "$1"
329417
return 1
330418
fi
331419

@@ -360,37 +448,66 @@ detect_root_device()
360448
fi
361449
# Ensure nothing is opened/mounted
362450
unmount_root_device
451+
ROOT_DETECT_UNSUPPORTED_REASON=""
363452

364453
# check $CONFIG_ROOT_DEV if set/valid
365-
if [ -e "$CONFIG_ROOT_DEV" ] && open_root_device "$CONFIG_ROOT_DEV"; then
454+
# run open_root_device with fd10 closed so external tools don't inherit it
455+
if [ -e "$CONFIG_ROOT_DEV" ] && open_root_device "$CONFIG_ROOT_DEV" 10<&-; then
366456
return 0
367457
fi
368458

369459
# generate list of possible boot devices
370460
fdisk -l 2>/dev/null | grep "Disk /dev/" | cut -f2 -d " " | cut -f1 -d ":" > /tmp/disklist
461+
DEBUG "detect_root_device: initial disklist=$(cat /tmp/disklist | tr '\n' ' ')"
371462

372463
# filter out extraneous options
373464
> /tmp_root_device_list
374465
while IFS= read -r -u 10 i; do
375466
# remove block device from list if numeric partitions exist
376467
DEV_NUM_PARTITIONS=$((`ls -1 $i* | wc -l`-1))
468+
DEBUG "detect_root_device: candidate $i has $DEV_NUM_PARTITIONS numeric partitions"
377469
if [ ${DEV_NUM_PARTITIONS} -eq 0 ]; then
378470
echo $i >> /tmp_root_device_list
379471
else
380472
ls $i* | tail -${DEV_NUM_PARTITIONS} >> /tmp_root_device_list
381473
fi
382474
done 10</tmp/disklist
383475

476+
# log the list after filtering
477+
DEBUG "detect_root_device: filtered candidates=$(cat /tmp_root_device_list | tr '\n' ' ')"
478+
384479
# iterate through possible options
385480
while IFS= read -r -u 10 i; do
386-
if open_root_device "$i"; then
387-
# CONFIG_ROOT_DEV is valid device and contains an installed OS
481+
DEBUG "detect_root_device: trying candidate $i"
482+
# close fd10 for the called command so it isn't inherited by tools like
483+
# lvm, which otherwise complain about a leaked descriptor.
484+
if open_root_device "$i" 10<&-; then
485+
DEBUG "detect_root_device: candidate $i succeeded"
388486
CONFIG_ROOT_DEV="$i"
389487
return 0
488+
else
489+
DEBUG "detect_root_device: candidate $i failed"
390490
fi
391491
done 10</tmp_root_device_list
392492

493+
# failed to find root on physical partitions; try any mapped devices
494+
for m in /dev/mapper/*; do
495+
# skip non-existent or non-block devices such as the control node
496+
[ -e "$m" ] || continue
497+
[ -b "$m" ] || continue
498+
499+
DEBUG "detect_root_device: trying mapper device $m as potential root"
500+
if open_root_device "$m"; then
501+
CONFIG_ROOT_DEV="$m"
502+
DEBUG "detect_root_device: mapper device $m appears to contain root files"
503+
return 0
504+
fi
505+
done
506+
393507
# no valid root device found
508+
if [ -n "$ROOT_DETECT_UNSUPPORTED_REASON" ]; then
509+
DEBUG "$ROOT_DETECT_UNSUPPORTED_REASON"
510+
fi
394511
echo "Unable to locate $ROOT_MOUNT files on any mounted disk"
395512
return 1
396513
}

initrd/etc/functions

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,12 +1297,35 @@ verify_checksums() {
12971297
# Check if a device is an LVM2 PV, and if so print the VG name
12981298
find_lvm_vg_name() {
12991299
TRACE_FUNC
1300-
local DEVICE VG
1300+
local DEVICE VG part
13011301
DEVICE="$1"
13021302

1303+
# closing fd10 should be handled by callers (detect_root_device now
1304+
# closes it for commands before invoking us). leaving this here can
1305+
# interfere with future uses of fd10 elsewhere in the same shell.
1306+
# (Note: previous versions contained a hack to close it here; see
1307+
# commit 700ed0c141.)
1308+
13031309
mkdir -p /tmp/root-hashes-gui
1304-
if ! lvm pvs --noheadings -o vg_name "$DEVICE" >/tmp/root-hashes-gui/lvm_vg 2>/dev/null; then
1305-
# It's not an LVM PV
1310+
# Try to query whether DEVICE is an LVM physical volume. On systems
1311+
# without LVM the command may not exist; treat that like "not a PV".
1312+
if ! lvm pvs --noheadings -o vg_name "$DEVICE" >/tmp/root-hashes-gui/lvm_vg 2>/tmp/root-hashes-gui/lvm_err; then
1313+
# It's not an LVM PV, or lvm failed entirely. Log stderr for debugging.
1314+
DEBUG "lvm pvs failed for $DEVICE, stderr:" "$(cat /tmp/root-hashes-gui/lvm_err)"
1315+
# try any children shown by lsblk (handles LUKS containers with
1316+
# internal partitions such as dm-0, dm-1 etc).
1317+
if command -v lsblk >/dev/null 2>&1; then
1318+
DEBUG "find_lvm_vg_name: lsblk children of $DEVICE"
1319+
for part in $(lsblk -np -l -o NAME "$DEVICE" | tail -n +2); do
1320+
[ -b "$part" ] || continue
1321+
DEBUG "find_lvm_vg_name: testing child $part"
1322+
if lvm pvs --noheadings -o vg_name "$part" >/tmp/root-hashes-gui/lvm_vg 2>/tmp/root-hashes-gui/lvm_err; then
1323+
VG="$(awk 'NF {print $1; exit}' /tmp/root-hashes-gui/lvm_vg)"
1324+
[ -n "$VG" ] && { echo "$VG"; return 0; }
1325+
fi
1326+
done
1327+
fi
1328+
DEBUG "find_lvm_vg_name: $DEVICE is not an LVM PV"
13061329
return 1
13071330
fi
13081331

0 commit comments

Comments
 (0)