Skip to content

Commit c0e3307

Browse files
committed
fs: Add async filesystem import with parallel verity computation
Previously, we had a lot of synchronous code for interacting with the local filesystem, but the tar processing used by OCI was async. This created a need to do "the same thing" in two different ways. But importing from a local filesystem is equally amenable to being async! Replacing that with a model where we read the filesystem metadata synchronously, but defer fsverity computation and object import to worker threads results in *dramatic* speedup for large filesystem trees. Computing the composefs digest of the 40G `target/` directory I have locally is 1m20s before this patch, and 8s after (32 cores, so a lot more CPU time used of course). Three optimized paths depending on context: - Secure repo: std::io::copy (uses copy_file_range for reflinks on CoW filesystems) then kernel fsverity enable + measure - Insecure repo: tee through FsVerityHasher while copying to tmpfile, computing the digest in a single pass - No repo: incremental FsVerityHasher from fd, one block at a time Also: the composefs-http ensure_object call is migrated to ensure_object_async which it should have been using in the first place! This is a clear advantage of having one way to do it. Assisted-by: OpenCode (Claude Opus 4) Signed-off-by: Colin Walters <walters@verbum.org>
1 parent 8509cc6 commit c0e3307

4 files changed

Lines changed: 490 additions & 272 deletions

File tree

crates/cfsctl/src/lib.rs

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ use std::{ffi::OsString, path::PathBuf};
2929
#[cfg(feature = "oci")]
3030
use std::{fs::create_dir_all, io::IsTerminal};
3131

32-
#[cfg(any(feature = "oci", feature = "http"))]
3332
use std::sync::Arc;
3433

3534
use anyhow::{Context as _, Result};
@@ -573,11 +572,31 @@ pub async fn run_app(args: App) -> Result<()> {
573572
);
574573
}
575574

576-
if args.no_repo {
577-
let effective_hash = args.hash.unwrap_or(HashType::Sha512);
575+
// Commands that only need verity digests (no object storage) can
576+
// run without opening a repository.
577+
if args.no_repo
578+
|| matches!(
579+
args.cmd,
580+
Command::ComputeId { .. } | Command::CreateDumpfile { .. }
581+
)
582+
{
583+
// If a repo path is available and --no-repo wasn't passed,
584+
// try to read the hash type from the repo's metadata so that
585+
// e.g. `cfsctl --repo <sha256-repo> compute-id` uses SHA-256
586+
// instead of the default SHA-512.
587+
let effective_hash = if !args.no_repo {
588+
if let Ok(repo_path) = resolve_repo_path(&args) {
589+
resolve_hash_type(&repo_path, args.hash)
590+
.unwrap_or(args.hash.unwrap_or(HashType::Sha512))
591+
} else {
592+
args.hash.unwrap_or(HashType::Sha512)
593+
}
594+
} else {
595+
args.hash.unwrap_or(HashType::Sha512)
596+
};
578597
return match effective_hash {
579-
HashType::Sha256 => run_cmd_without_repo::<Sha256HashValue>(args),
580-
HashType::Sha512 => run_cmd_without_repo::<Sha512HashValue>(args),
598+
HashType::Sha256 => run_cmd_without_repo::<Sha256HashValue>(args).await,
599+
HashType::Sha512 => run_cmd_without_repo::<Sha512HashValue>(args).await,
581600
};
582601
}
583602

@@ -714,17 +733,25 @@ fn load_filesystem_from_oci_image<ObjectID: FsVerityHashValue>(
714733
Ok(fs)
715734
}
716735

717-
fn load_filesystem_from_ondisk_fs<ObjectID: FsVerityHashValue>(
736+
async fn load_filesystem_from_ondisk_fs<ObjectID: FsVerityHashValue>(
718737
fs_opts: &FsReadOptions,
719-
repo: Option<&Repository<ObjectID>>,
738+
repo: Option<Arc<Repository<ObjectID>>>,
720739
) -> Result<FileSystem<RegularFile<ObjectID>>> {
740+
// The async API needs an OwnedFd; fs_opts.path is typically absolute
741+
// so the dirfd is unused for path resolution, but required by the API.
742+
let dirfd = rustix::fs::openat(
743+
CWD,
744+
".",
745+
OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC,
746+
Mode::empty(),
747+
)?;
721748
let mut fs = if fs_opts.no_propagate_usr_to_root {
722-
composefs::fs::read_filesystem(CWD, &fs_opts.path, repo)?
749+
composefs::fs::read_filesystem(dirfd, fs_opts.path.clone(), repo.clone()).await?
723750
} else {
724-
composefs::fs::read_container_root(CWD, &fs_opts.path, repo)?
751+
composefs::fs::read_container_root(dirfd, fs_opts.path.clone(), repo.clone()).await?
725752
};
726753
if fs_opts.bootable {
727-
if let Some(repo) = repo {
754+
if let Some(repo) = &repo {
728755
fs.transform_for_boot(repo)?;
729756
} else {
730757
let rootfd = rustix::fs::openat(
@@ -797,15 +824,15 @@ fn dump_file_impl(
797824
}
798825

799826
/// Run commands that don't require a repository.
800-
pub fn run_cmd_without_repo<ObjectID: FsVerityHashValue>(args: App) -> Result<()> {
827+
pub async fn run_cmd_without_repo<ObjectID: FsVerityHashValue>(args: App) -> Result<()> {
801828
match args.cmd {
802829
Command::ComputeId { fs_opts } => {
803-
let fs = load_filesystem_from_ondisk_fs::<ObjectID>(&fs_opts, None)?;
830+
let fs = load_filesystem_from_ondisk_fs::<ObjectID>(&fs_opts, None).await?;
804831
let id = fs.compute_image_id();
805832
println!("{}", id.to_hex());
806833
}
807834
Command::CreateDumpfile { fs_opts } => {
808-
let fs = load_filesystem_from_ondisk_fs::<ObjectID>(&fs_opts, None)?;
835+
let fs = load_filesystem_from_ondisk_fs::<ObjectID>(&fs_opts, None).await?;
809836
fs.print_dumpfile()?;
810837
}
811838
_ => {
@@ -820,6 +847,7 @@ pub async fn run_cmd_with_repo<ObjectID>(repo: Repository<ObjectID>, args: App)
820847
where
821848
ObjectID: FsVerityHashValue,
822849
{
850+
let repo = Arc::new(repo);
823851
match args.cmd {
824852
Command::Init { .. } => {
825853
// Handled in run_app before we get here
@@ -841,7 +869,6 @@ where
841869
#[cfg(feature = "oci")]
842870
Command::Oci { cmd: oci_cmd } => match oci_cmd {
843871
OciCommand::ImportLayer { name, ref digest } => {
844-
let repo = Arc::new(repo);
845872
let (object_id, _stats) = composefs_oci::import_layer(
846873
&repo,
847874
digest,
@@ -899,9 +926,8 @@ where
899926
} => {
900927
// If no explicit name provided, use the image reference as the tag
901928
let tag_name = name.as_deref().unwrap_or(image);
902-
let repo_arc = Arc::new(repo);
903929
let (result, stats) =
904-
composefs_oci::pull_image(&repo_arc, image, Some(tag_name), None).await?;
930+
composefs_oci::pull_image(&repo, image, Some(tag_name), None).await?;
905931

906932
println!("manifest {}", result.manifest_digest);
907933
println!("config {}", result.config_digest);
@@ -917,7 +943,7 @@ where
917943

918944
if bootable {
919945
let image_verity =
920-
composefs_oci::generate_boot_image(&repo_arc, &result.manifest_digest)?;
946+
composefs_oci::generate_boot_image(&repo, &result.manifest_digest)?;
921947
println!("Boot image: {}", image_verity.to_hex());
922948
}
923949
}
@@ -1088,18 +1114,13 @@ where
10881114
fs_opts,
10891115
ref image_name,
10901116
} => {
1091-
let fs = load_filesystem_from_ondisk_fs(&fs_opts, Some(&repo))?;
1117+
let fs = load_filesystem_from_ondisk_fs(&fs_opts, Some(Arc::clone(&repo))).await?;
10921118
let id = fs.commit_image(&repo, image_name.as_deref())?;
10931119
println!("{}", id.to_id());
10941120
}
1095-
Command::ComputeId { fs_opts } => {
1096-
let fs = load_filesystem_from_ondisk_fs(&fs_opts, Some(&repo))?;
1097-
let id = fs.compute_image_id();
1098-
println!("{}", id.to_hex());
1099-
}
1100-
Command::CreateDumpfile { fs_opts } => {
1101-
let fs = load_filesystem_from_ondisk_fs::<ObjectID>(&fs_opts, None)?;
1102-
fs.print_dumpfile()?;
1121+
Command::ComputeId { .. } | Command::CreateDumpfile { .. } => {
1122+
// Handled in run_app before opening the repo
1123+
unreachable!("compute-id and create-dumpfile are dispatched without a repo");
11031124
}
11041125
Command::Mount { name, mountpoint } => {
11051126
repo.mount_at(&name, &mountpoint)?;
@@ -1165,7 +1186,7 @@ where
11651186
}
11661187
#[cfg(feature = "http")]
11671188
Command::Fetch { url, name } => {
1168-
let (digest, verity) = composefs_http::download(&url, &name, Arc::new(repo)).await?;
1189+
let (digest, verity) = composefs_http::download(&url, &name, Arc::clone(&repo)).await?;
11691190
println!("content {digest}");
11701191
println!("verity {}", verity.to_hex());
11711192
}

crates/composefs-http/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ impl<ObjectID: FsVerityHashValue> Downloader<ObjectID> {
8484
let my_id = if is_symlink {
8585
ObjectID::from_object_pathname(&data)?
8686
} else {
87-
self.repo.ensure_object(&data)?
87+
self.repo.ensure_object_async(data.into()).await?
8888
};
8989
progress.inc(1);
9090

0 commit comments

Comments
 (0)