Skip to content

Commit a365fcd

Browse files
committed
fix(ungrub): sync BOOTX64.EFI to all boot-pool members
mkbootable installed the GRUB core to only the target device's ESP, while grub-install refreshed the shared modules under /boot/grub. On a mirror that left every other member's ESP with a stale core that no longer matched the rebuilt modules, so those members failed to boot with "symbol 'grub_memcpy' not found" -- silently destroying boot redundancy. - install_grub now replicates the freshly built core to every pool member's ESP (no-op for a single-device pool; fixes add/replace/refresh on mirrors). - new "mkbootable sync" op re-distributes the canonical core (/boot/grub/x86_64-efi/core.efi) to all members, to repair systems whose members already drifted (eg a device added via a bare "zpool attach"). The core uses a dynamic ($root) prefix, so the same image is valid on every member. Device->ESP mapping verified against a live nvme mirror.
1 parent bccf49e commit a365fcd

1 file changed

Lines changed: 72 additions & 0 deletions

File tree

ungrub/mkbootable

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
# Reconfigure grub.cfg and theme.txt after UUID change
1414
# mkbootable reconfigure
1515

16+
# Re-distribute the current GRUB EFI core to every boot-pool member's ESP
17+
# mkbootable sync
18+
1619
#set -euo pipefail
1720
#set -x
1821
source /etc/rc.d/rc.runlog
@@ -82,6 +85,50 @@ resize_partitions() {
8285
udevadm settle
8386
}
8487

88+
# map a boot-pool member partition (eg /dev/nvme1n1p3 or /dev/sdb3) to that
89+
# disk's EFI System Partition (partition 2, eg /dev/nvme1n1p2 or /dev/sdb2)
90+
member_to_esp() {
91+
local part="$1" disk sep
92+
disk="$(lsblk -no pkname "$part" 2>/dev/null | head -1)"
93+
[[ -n "$disk" ]] || return 1
94+
[[ $disk == *[0-9] ]] && sep="p" || sep=""
95+
printf '/dev/%s%s2\n' "$disk" "$sep"
96+
}
97+
98+
# Replicate a known-good EFI core (BOOTX64.EFI) to the ESP of every device in
99+
# the boot pool. The core uses a dynamic "($root)" prefix and is rebuilt in
100+
# lockstep with the shared modules under $BOOT/grub, so the same image is valid
101+
# on every mirror member. Without this, a grub-install against one member
102+
# refreshes the shared modules but leaves sibling ESPs with a stale core,
103+
# yielding "symbol 'grub_memcpy' not found" on those members at boot.
104+
# $1 = path to a known-good BOOTX64.EFI to distribute
105+
sync_efi_to_members() {
106+
local src="$1" part esp mnt
107+
if [[ ! -s "$src" ]]; then
108+
log "sync_efi_to_members: source EFI '$src' missing or empty, skipping"
109+
return 1
110+
fi
111+
for part in $(zpool list -v -H -P "$POOL" 2>/dev/null | awk '/\/dev\// {print $1}'); do
112+
esp="$(member_to_esp "$part")" || { log "sync_efi_to_members: cannot map $part to an ESP, skipping"; continue; }
113+
[[ -b "$esp" ]] || { log "sync_efi_to_members: $esp is not a block device, skipping"; continue; }
114+
mnt="$(mktemp -d)"
115+
if ! mount "$esp" "$mnt" 2>/dev/null; then
116+
log "sync_efi_to_members: $esp not mountable (unformatted ESP?), skipping"
117+
rmdir "$mnt"
118+
continue
119+
fi
120+
mkdir -p "$mnt/EFI/BOOT"
121+
if cp -f "$src" "$mnt/EFI/BOOT/BOOTX64.EFI"; then
122+
sync -f "$mnt"
123+
log "sync_efi_to_members: wrote BOOTX64.EFI to $esp"
124+
else
125+
log "sync_efi_to_members: failed to write BOOTX64.EFI to $esp"
126+
fi
127+
umount "$mnt"
128+
rmdir "$mnt"
129+
done
130+
}
131+
85132
install_grub() {
86133
# format the EFI System Partition (partition 2) with FAT32 and mount
87134
mkfs.fat -F32 -n EFI "$EFI_PART"
@@ -103,8 +150,20 @@ install_grub() {
103150
--modules="part_gpt zfs zfsinfo search search_fs_uuid configfile normal" \
104151
--no-floppy --no-rs-codes
105152

153+
# stash the freshly built core before unmounting so it can be replicated to
154+
# every other pool member's ESP (keeps all mirror members in lockstep with
155+
# the shared modules just rebuilt above)
156+
local core_img
157+
core_img="$(mktemp)"
158+
cp -f "$BOOT/efi/EFI/BOOT/BOOTX64.EFI" "$core_img"
159+
106160
# we don't need this mounted anymore
107161
umount "$BOOT/efi"
162+
163+
# propagate the new core to all members (no-op for the first device, which is
164+
# the only member at this point; required for every subsequent add/refresh)
165+
sync_efi_to_members "$core_img"
166+
rm -f "$core_img"
108167
}
109168

110169
add_device() {
@@ -175,6 +234,19 @@ case "$OPER" in
175234
'reconfigure')
176235
configure_grub
177236
;;
237+
'sync')
238+
# Re-distribute the current GRUB EFI core to every boot-pool member's ESP.
239+
# Source is the canonical core built alongside the shared modules; it is
240+
# guaranteed to match them. Use this to repair members whose ESP holds a
241+
# stale core (eg a device added with a bare "zpool attach", or any member
242+
# missed by a prior single-disk grub refresh).
243+
if [[ -s "$BOOT/grub/x86_64-efi/core.efi" ]]; then
244+
sync_efi_to_members "$BOOT/grub/x86_64-efi/core.efi"
245+
else
246+
log "sync: $BOOT/grub/x86_64-efi/core.efi not found; run 'mkbootable add' or a grub refresh first"
247+
exit 1
248+
fi
249+
;;
178250
*)
179251
log "error 2" # shouldn't happen
180252
exit 2

0 commit comments

Comments
 (0)