Skip to content

Commit ebb2bda

Browse files
committed
fix(ungrub): replicate GRUB EFI core to all boot-pool members
install_grub installed the core to only the target device's ESP, while grub-install rebuilt the shared modules under /boot/grub. On a mirror that left every other member's ESP with a stale core that no longer matched the rebuilt modules, so those members failed to boot with "symbol 'grub_memcpy' not found" -- silently destroying boot redundancy. - install_grub now copies the freshly built core to every other member's ESP (no-op for a single-device pool). - new "mkbootable sync <good-device>" op re-distributes the core from a known good member to all others, to repair members that already drifted (eg a device added via a bare "zpool attach"). ESPs are written with mtools (mcopy), the same tooling the installer uses, so no mount is needed for the targets; the core uses a dynamic ($root) prefix so the same image is valid on every member. Split into member_esp / install_efi_core / replicate_grub. mtools round-trip verified byte-faithful and idempotent.
1 parent bccf49e commit ebb2bda

1 file changed

Lines changed: 62 additions & 0 deletions

File tree

ungrub/mkbootable

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
# Reconfigure grub.cfg and theme.txt after UUID change
1414
# mkbootable reconfigure
1515

16+
# Copy the EFI core from a known-good member to every other member's ESP
17+
# mkbootable sync <good-device>
18+
1619
#set -euo pipefail
1720
#set -x
1821
source /etc/rc.d/rc.runlog
@@ -82,6 +85,44 @@ resize_partitions() {
8285
udevadm settle
8386
}
8487

88+
# resolve a boot-pool member partition to that disk's EFI System Partition,
89+
# eg /dev/nvme1n1p3 -> /dev/nvme1n1p2, /dev/sdb3 -> /dev/sdb2
90+
member_esp() {
91+
local part="$1" disk sep
92+
disk="$(lsblk -no pkname "$part" 2>/dev/null | head -1)"
93+
[[ -n "$disk" ]] || return 1
94+
[[ $disk == *[0-9] ]] && sep="p" || sep=""
95+
printf '/dev/%s%s2\n' "$disk" "$sep"
96+
}
97+
98+
# write the EFI core ($2) onto a single ESP ($1) with mtools, no mount needed
99+
install_efi_core() {
100+
local esp="$1" src="$2"
101+
mmd -i "$esp" ::/EFI 2>/dev/null # ensure dir tree exists (ok if already)
102+
mmd -i "$esp" ::/EFI/BOOT 2>/dev/null
103+
mcopy -D o -i "$esp" "$src" ::/EFI/BOOT/BOOTX64.EFI
104+
}
105+
106+
# Copy a known-good EFI core to the ESP of every boot pool member except the one
107+
# it came from. grub-install rebuilds the shared modules under $BOOT/grub but
108+
# writes the core to only one ESP; a member left with a stale core no longer
109+
# matches those modules and fails to boot with "symbol 'grub_memcpy' not found",
110+
# silently breaking the mirror's boot redundancy. The core uses a dynamic
111+
# ($root) prefix, so the same image is valid on every member.
112+
# $1 = path to the known-good BOOTX64.EFI to distribute (on a mounted ESP)
113+
replicate_grub() {
114+
local src="$1" part esp
115+
for part in $(zpool list -v -H -P "$POOL" | awk '/\/dev\// {print $1}'); do
116+
esp="$(member_esp "$part")" || { log "replicate_grub: cannot resolve ESP for $part, skipping"; continue; }
117+
[[ "$esp" == "$EFI_PART" || ! -b "$esp" ]] && continue
118+
if install_efi_core "$esp" "$src"; then
119+
log "replicate_grub: synced EFI core to $esp"
120+
else
121+
log "replicate_grub: failed to sync EFI core to $esp"
122+
fi
123+
done
124+
}
125+
85126
install_grub() {
86127
# format the EFI System Partition (partition 2) with FAT32 and mount
87128
mkfs.fat -F32 -n EFI "$EFI_PART"
@@ -103,6 +144,11 @@ install_grub() {
103144
--modules="part_gpt zfs zfsinfo search search_fs_uuid configfile normal" \
104145
--no-floppy --no-rs-codes
105146

147+
# replicate the core we just built to every other member's ESP so all mirror
148+
# members stay in lockstep with the shared modules rebuilt above (no-op while
149+
# this is the only member)
150+
replicate_grub "$BOOT/efi/EFI/BOOT/BOOTX64.EFI"
151+
106152
# we don't need this mounted anymore
107153
umount "$BOOT/efi"
108154
}
@@ -162,6 +208,19 @@ remove_device() {
162208
fi
163209
}
164210

211+
# Re-distribute the EFI core from a known-good member to all other members.
212+
# Use this to repair members whose ESP holds a stale core (eg a device added
213+
# with a bare "zpool attach", or a member missed by an earlier grub refresh).
214+
sync_device() {
215+
mkdir -p -m 0700 "$BOOT/efi"
216+
if ! mount -o ro "$EFI_PART" "$BOOT/efi" 2>/dev/null; then
217+
log "sync: cannot mount source ESP $EFI_PART"
218+
exit 1
219+
fi
220+
replicate_grub "$BOOT/efi/EFI/BOOT/BOOTX64.EFI"
221+
umount "$BOOT/efi"
222+
}
223+
165224
case "$OPER" in
166225
'add')
167226
add_device
@@ -175,6 +234,9 @@ case "$OPER" in
175234
'reconfigure')
176235
configure_grub
177236
;;
237+
'sync')
238+
sync_device
239+
;;
178240
*)
179241
log "error 2" # shouldn't happen
180242
exit 2

0 commit comments

Comments
 (0)