Skip to content

Commit e5c1149

Browse files
cgwaltersJohan-Liebert1
authored andcommitted
bootloader: Fix a few things related to systemd-boot installs
See: osbuild/image-builder-cli#506 Right now most of our testing for composefs installs uses bcvk which uses `to-disk` in an ephemeral VM. That masks some issues, like the fact that `bootctl install` by default touches the ESP variables. Wire things up with `--generic-image` so it behaves the same as the bootupd backend. Now, the next problem is that osbuild actually makes `/` readonly (which is great!) and in fact we should change our recommended `podman run` invocations to pass `--read-only`. But that reveals the next bug, which is that bootctl installs wants to write an `/etc/kernel/entry-token` which we basically don't use. Redirect those writes to a tempdir for now, though we should probably try to get upstream patches for that. Also set SYSTEMD_RELAX_ESP_CHECKS=1 so bootctl skips the partition-type GUID check for the ESP - we don't want it to e.g. try to look up things in the udev database, which might not be available. Assisted-by: OpenCode (claude-sonnet-4-6) Signed-off-by: Colin Walters <walters@verbum.org>
1 parent cf93248 commit e5c1149

3 files changed

Lines changed: 232 additions & 55 deletions

File tree

crates/lib/src/bootc_composefs/boot.rs

Lines changed: 126 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,31 @@ fi
195195
)
196196
}
197197

198-
/// Mount the ESP from the provided device
198+
/// Mount flags shared by all ESP mounts: non-executable, no setuid.
199+
const ESP_MOUNT_FLAGS: MountFlags =
200+
MountFlags::from_bits_retain(MountFlags::NOEXEC.bits() | MountFlags::NOSUID.bits());
201+
202+
/// FAT mount options: owner-only permissions on files (0600) and dirs (0700).
203+
const ESP_MOUNT_DATA: &std::ffi::CStr = c"fmask=0177,dmask=0077";
204+
205+
/// Mount the ESP from the provided device into a temporary directory.
199206
pub fn mount_esp(device: &str) -> Result<TempMount> {
200-
let flags = MountFlags::NOEXEC | MountFlags::NOSUID;
201-
TempMount::mount_dev(device, "vfat", flags, Some(c"fmask=0177,dmask=0077"))
207+
TempMount::mount_dev(device, "vfat", ESP_MOUNT_FLAGS, Some(ESP_MOUNT_DATA))
208+
}
209+
210+
/// Mount the ESP from `device` at the given path and return a guard that
211+
/// synchronously unmounts (and flushes) it on drop.
212+
pub(crate) fn mount_esp_at(
213+
device: &str,
214+
path: std::path::PathBuf,
215+
) -> Result<bootc_mount::tempmount::MountGuard> {
216+
bootc_mount::tempmount::MountGuard::mount(
217+
device,
218+
path,
219+
"vfat",
220+
ESP_MOUNT_FLAGS,
221+
Some(ESP_MOUNT_DATA),
222+
)
202223
}
203224

204225
/// Filename release field for primary (new/upgraded) entry.
@@ -1145,6 +1166,100 @@ pub(crate) fn setup_composefs_uki_boot(
11451166
Ok(boot_digest)
11461167
}
11471168

1169+
/// A composefs image attached to a temporary directory with the ESP and a
1170+
/// tmpfs mounted inside it, ready for bootloader installation.
1171+
///
1172+
/// The composefs image (a detached `fsmount(2)` fd with no VFS path) is
1173+
/// attached to a tmpdir via `move_mount(2)`, giving us a real filesystem path
1174+
/// that `mount(2)` and bootctl can use. The ESP is mounted at
1175+
/// `<tmpdir>/efi` (if that directory exists in the image) or `<tmpdir>/boot`,
1176+
/// per the Boot Loader Specification. A tmpfs is also mounted at
1177+
/// `<tmpdir>/tmp` to provide a writable scratch area for tools invoked with
1178+
/// `--root`.
1179+
///
1180+
/// Drop order matters: the ESP and tmpfs guards are declared before `composefs`
1181+
/// so they are unmounted (and flushed) before the composefs root is detached.
1182+
pub(crate) struct MountedImageRoot {
1183+
// Unmounted before `composefs` on drop; ESP before tmp (inner before outer).
1184+
_esp: bootc_mount::tempmount::MountGuard,
1185+
_tmp: bootc_mount::tempmount::MountGuard,
1186+
composefs: TempMount,
1187+
pub(crate) esp_subdir: &'static str,
1188+
}
1189+
1190+
impl MountedImageRoot {
1191+
/// Find the ESP on `device`, attach the composefs image to a tmpdir, and
1192+
/// mount the ESP and a scratch tmpfs inside it.
1193+
// TODO: install to all ESPs on multi-device setups
1194+
#[context("Preparing image root for bootloader installation")]
1195+
pub(crate) fn new(
1196+
composefs_mnt_fd: std::os::fd::OwnedFd,
1197+
device: &bootc_blockdev::Device,
1198+
) -> Result<Self> {
1199+
let roots = device.find_all_roots()?;
1200+
let mut esp_part = None;
1201+
for root in &roots {
1202+
if let Some(esp) = root.find_partition_of_esp_optional()? {
1203+
esp_part = Some(esp);
1204+
break;
1205+
}
1206+
}
1207+
let esp_part = esp_part.ok_or_else(|| anyhow!("ESP partition not found"))?;
1208+
1209+
// Attach the detached composefs fsmount fd to a real tmpdir path so
1210+
// that mount(2) and bootctl --root can work with it.
1211+
let composefs = TempMount::mount_fd(composefs_mnt_fd)
1212+
.context("Attaching composefs image to temporary directory")?;
1213+
1214+
// TODO: support XBOOTLDR. Per BLS, the ESP should be mounted at /efi
1215+
// when a separate XBOOTLDR partition is present at /boot. bootc does
1216+
// not yet detect or use XBOOTLDR in the composefs install path, so
1217+
// unconditionally mount the ESP at /boot for now.
1218+
let esp_subdir = "boot";
1219+
1220+
let esp_path = composefs.dir.path().join(esp_subdir);
1221+
let esp =
1222+
mount_esp_at(&esp_part.path(), esp_path).context("Mounting ESP into composefs root")?;
1223+
1224+
// Mount a tmpfs over /tmp so that tools invoked with --root have a
1225+
// writable scratch area without touching the read-only EROFS root.
1226+
let tmp_path = composefs.dir.path().join("tmp");
1227+
let tmp = bootc_mount::tempmount::MountGuard::mount(
1228+
"tmpfs",
1229+
tmp_path,
1230+
"tmpfs",
1231+
MountFlags::NOEXEC | MountFlags::NOSUID | MountFlags::NODEV,
1232+
None::<&std::ffi::CStr>,
1233+
)
1234+
.context("Mounting tmpfs into composefs root")?;
1235+
1236+
Ok(Self {
1237+
_esp: esp,
1238+
_tmp: tmp,
1239+
composefs,
1240+
esp_subdir,
1241+
})
1242+
}
1243+
1244+
/// The composefs image as a capability-safe directory (for file reads).
1245+
pub(crate) fn dir(&self) -> &Dir {
1246+
&self.composefs.fd
1247+
}
1248+
1249+
/// Real filesystem path of the composefs tmpdir root.
1250+
pub(crate) fn root_path(&self) -> &std::path::Path {
1251+
self.composefs.dir.path()
1252+
}
1253+
1254+
/// Open the mounted ESP as a capability-safe directory.
1255+
pub(crate) fn open_esp_dir(&self) -> Result<Dir> {
1256+
self.composefs
1257+
.fd
1258+
.open_dir(self.esp_subdir)
1259+
.with_context(|| format!("Opening ESP at /{}", self.esp_subdir))
1260+
}
1261+
}
1262+
11481263
pub struct SecurebootKeys {
11491264
pub dir: Dir,
11501265
pub keys: Vec<Utf8PathBuf>,
@@ -1225,13 +1340,12 @@ pub(crate) async fn setup_composefs_boot(
12251340
let entries =
12261341
get_boot_resources(&fs, &*repo).context("Extracting boot entries from OCI image")?;
12271342

1228-
let mounted_fs = Dir::reopen_dir(
1229-
&repo
1230-
.mount(&id.to_hex())
1231-
.context("Failed to mount composefs image")?,
1232-
)?;
1343+
let composefs_mnt_fd = repo
1344+
.mount(&id.to_hex())
1345+
.context("Failed to mount composefs image")?;
1346+
let mounted_root = MountedImageRoot::new(composefs_mnt_fd, &root_setup.device_info)?;
12331347

1234-
let postfetch = PostFetchState::new(state, &mounted_fs)?;
1348+
let postfetch = PostFetchState::new(state, mounted_root.dir())?;
12351349

12361350
let boot_uuid = root_setup
12371351
.get_boot_uuid()?
@@ -1253,11 +1367,9 @@ pub(crate) async fn setup_composefs_boot(
12531367
)?;
12541368
} else {
12551369
crate::bootloader::install_systemd_boot(
1256-
&root_setup.device_info,
1257-
&root_setup.physical_root_path,
1370+
&mounted_root,
12581371
&state.config_opts,
1259-
None,
1260-
get_secureboot_keys(&mounted_fs, BOOTC_AUTOENROLL_PATH)?,
1372+
get_secureboot_keys(mounted_root.dir(), BOOTC_AUTOENROLL_PATH)?,
12611373
)?;
12621374
}
12631375

@@ -1280,7 +1392,7 @@ pub(crate) async fn setup_composefs_boot(
12801392
repo,
12811393
&id,
12821394
entry,
1283-
&mounted_fs,
1395+
mounted_root.dir(),
12841396
)?,
12851397
BootType::Uki => setup_composefs_uki_boot(
12861398
BootSetupType::Setup((&root_setup, &state, &postfetch)),

crates/lib/src/bootloader.rs

Lines changed: 64 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use fn_error_context::context;
1010

1111
use bootc_mount as mount;
1212

13-
use crate::bootc_composefs::boot::{SecurebootKeys, mount_esp};
13+
use crate::bootc_composefs::boot::{MountedImageRoot, SecurebootKeys};
1414
use crate::utils;
1515

1616
/// The name of the mountpoint for efi (as a subdirectory of /boot, or at the toplevel)
@@ -23,6 +23,16 @@ const BOOTUPD_UPDATES: &str = "usr/lib/bootupd/updates";
2323
// from: https://github.com/systemd/systemd/blob/26b2085d54ebbfca8637362eafcb4a8e3faf832f/man/systemd-boot.xml#L392
2424
const SYSTEMD_KEY_DIR: &str = "loader/keys";
2525

26+
/// Redirect bootctl's entry-token write into a tmpfs scratch area.
27+
///
28+
/// bootctl unconditionally writes `<KERNEL_INSTALL_CONF_ROOT>/entry-token`
29+
/// during installation. Because systemd's `path_join()` is naive string
30+
/// concatenation (see `src/bootctl/bootctl-install.c`), setting this to
31+
/// `/tmp` causes the write to land at `<composefs_root>/tmp/entry-token`
32+
/// on the MountedImageRoot tmpfs, where it is automatically discarded.
33+
/// bootc does not use the entry-token at all.
34+
const KERNEL_INSTALL_CONF_ROOT: &str = "/tmp";
35+
2636
/// Mount the first ESP found among backing devices at /boot/efi.
2737
///
2838
/// This is used by the install-alongside path to clean stale bootloader
@@ -218,66 +228,79 @@ pub(crate) fn install_via_bootupd(
218228
}
219229
}
220230

221-
/// Install systemd-boot to the first ESP found among backing devices.
222-
///
223-
/// On multi-device setups only the first ESP is installed to; additional
224-
/// ESPs on other backing devices are left untouched.
225-
// TODO: install to all ESPs on multi-device setups
231+
/// Install systemd-boot using a pre-prepared boot root.
226232
#[context("Installing bootloader")]
227233
pub(crate) fn install_systemd_boot(
228-
device: &bootc_blockdev::Device,
229-
_rootfs: &Utf8Path,
234+
prepared_root: &MountedImageRoot,
230235
configopts: &crate::install::InstallConfigOpts,
231-
_deployment_path: Option<&str>,
232236
autoenroll: Option<SecurebootKeys>,
233237
) -> Result<()> {
234-
let roots = device.find_all_roots()?;
235-
let mut esp_part = None;
236-
for root in &roots {
237-
if let Some(esp) = root.find_partition_of_esp_optional()? {
238-
esp_part = Some(esp);
239-
break;
240-
}
241-
}
242-
let esp_part = esp_part.ok_or_else(|| anyhow::anyhow!("ESP partition not found"))?;
243-
244-
let esp_mount = mount_esp(&esp_part.path()).context("Mounting ESP")?;
245-
let esp_path = Utf8Path::from_path(esp_mount.dir.path())
246-
.ok_or_else(|| anyhow::anyhow!("Failed to convert ESP mount path to UTF-8"))?;
247-
248238
println!("Installing bootloader via systemd-boot");
249239

250-
let mut bootctl_args = vec!["install", "--esp-path", esp_path.as_str()];
240+
// We use the --root of the mounted target root, so we have the right /etc/os-release.
241+
let root_path = prepared_root
242+
.root_path()
243+
.to_str()
244+
.ok_or_else(|| anyhow::anyhow!("composefs tmpdir path is not UTF-8"))?;
245+
let esp_path_in_root = format!("/{}", prepared_root.esp_subdir);
246+
247+
let mut bootctl_args = vec![
248+
"install",
249+
"--root",
250+
root_path,
251+
"--esp-path",
252+
esp_path_in_root.as_str(),
253+
// If we supported XBOOTLDR in the future, that'd go here with --boot-path.
254+
];
251255

252256
if configopts.generic_image {
253-
bootctl_args.extend(["--random-seed", "no"]);
257+
bootctl_args.extend(["--random-seed", "no", "--no-variables"]);
254258
}
255259

256260
Command::new("bootctl")
257261
.args(bootctl_args)
262+
// Skip partition-type GUID validation because e.g. osbuild
263+
// may not provide the udev database.
264+
.env("SYSTEMD_RELAX_ESP_CHECKS", "1")
265+
// bootc doesn't use the entry-token file, but bootctl still tries to
266+
// write it. Redirect into /tmp (a tmpfs mounted by MountedImageRoot)
267+
// so the write succeeds and is automatically discarded.
268+
.env("KERNEL_INSTALL_CONF_ROOT", KERNEL_INSTALL_CONF_ROOT)
258269
.log_debug()
259-
.run_inherited_with_cmd_context()?;
270+
// Capture stderr so bootctl error messages appear in our error chain.
271+
.run_capture_stderr()?;
260272

261273
if let Some(SecurebootKeys { dir, keys }) = autoenroll {
262-
let path = esp_path.join(SYSTEMD_KEY_DIR);
263-
create_dir_all(&path)?;
264-
265-
let keys_dir = esp_mount
266-
.fd
274+
let esp_dir = prepared_root.open_esp_dir()?;
275+
let keys_path = prepared_root
276+
.root_path()
277+
.join(prepared_root.esp_subdir)
278+
.join(SYSTEMD_KEY_DIR);
279+
create_dir_all(&keys_path).with_context(|| {
280+
format!("Creating secureboot key directory {}", keys_path.display())
281+
})?;
282+
283+
let keys_dir = esp_dir
267284
.open_dir(SYSTEMD_KEY_DIR)
268-
.with_context(|| format!("Opening {path}"))?;
285+
.with_context(|| format!("Opening {SYSTEMD_KEY_DIR}"))?;
269286

270287
for filename in keys.iter() {
271-
let p = path.join(&filename);
272-
273-
// create directory if it doesn't already exist
274-
if let Some(parent) = p.parent() {
275-
create_dir_all(parent)?;
288+
// Each key lives in a subdirectory, e.g. "PK/PK.auth".
289+
// Create the per-key subdirectory before copying the file into it.
290+
if let Some(parent) = filename.parent() {
291+
if !parent.as_str().is_empty() {
292+
keys_dir
293+
.create_dir_all(parent)
294+
.with_context(|| format!("Creating key subdirectory {parent}"))?;
295+
}
276296
}
277-
278-
dir.copy(&filename, &keys_dir, &filename)
279-
.with_context(|| format!("Copying secure boot key: {p}"))?;
280-
println!("Wrote Secure Boot key: {p}");
297+
dir.copy(filename, &keys_dir, filename)
298+
.with_context(|| format!("Copying secure boot key {filename:?}"))?;
299+
println!(
300+
"Wrote Secure Boot key: {}/{}",
301+
keys_path.display(),
302+
filename.as_str()
303+
);
281304
}
282305
if keys.is_empty() {
283306
tracing::debug!("No Secure Boot keys provided for systemd-boot enrollment");

crates/mount/src/tempmount.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::os::fd::AsFd;
2+
use std::path::Path;
23

34
use anyhow::{Context, Result};
45

@@ -7,6 +8,47 @@ use cap_std_ext::cap_std::{ambient_authority, fs::Dir};
78
use fn_error_context::context;
89
use rustix::mount::{MountFlags, MoveMountFlags, UnmountFlags, move_mount, unmount};
910

11+
/// RAII guard that synchronously unmounts a path on drop, flushing all writes.
12+
///
13+
/// Prefer this over `MNT_DETACH` when the mounted filesystem has received
14+
/// writes (e.g. FAT ESP) and you need them flushed before the guard drops.
15+
#[derive(Debug)]
16+
pub struct MountGuard(std::path::PathBuf);
17+
18+
impl MountGuard {
19+
/// Mount `dev` at `path` and return a guard that will synchronously
20+
/// unmount it on drop.
21+
pub fn mount(
22+
dev: &str,
23+
path: std::path::PathBuf,
24+
fstype: &str,
25+
flags: MountFlags,
26+
data: Option<&std::ffi::CStr>,
27+
) -> Result<Self> {
28+
rustix::mount::mount(dev, &path, fstype, flags, data)
29+
.with_context(|| format!("Mounting {} at {}", dev, path.display()))?;
30+
Ok(Self(path))
31+
}
32+
}
33+
34+
impl std::ops::Deref for MountGuard {
35+
type Target = Path;
36+
fn deref(&self) -> &Path {
37+
&self.0
38+
}
39+
}
40+
41+
impl Drop for MountGuard {
42+
fn drop(&mut self) {
43+
if let Err(e) = unmount(&self.0, UnmountFlags::empty()) {
44+
// Synchronous unmount failure may mean buffered writes were not
45+
// flushed to the underlying device (e.g. FAT ESP). Treat this as
46+
// an error rather than a warning.
47+
tracing::error!("Failed to unmount {}: {e:?}", self.0.display());
48+
}
49+
}
50+
}
51+
1052
/// RAII wrapper for a temporary mount that is automatically unmounted on drop.
1153
#[derive(Debug)]
1254
pub struct TempMount {

0 commit comments

Comments
 (0)