From e9c212e6d7c94a1626a05080f08413dfae3085ee Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Mon, 1 Jun 2026 19:59:10 -0400 Subject: [PATCH 1/5] feat(config): add rootfs view opt-in Add a rootfs_view.enabled configuration knob and document it. The flag keeps rootfs view preparation disabled unless deployments explicitly opt in. Signed-off-by: sidneychang <2190206983@qq.com> --- deployment/urunc-deploy/config.toml | 3 +++ docs/configuration.md | 37 +++++++++++++++++++++++++++++ pkg/unikontainers/urunc_config.go | 11 +++++++++ 3 files changed, 51 insertions(+) diff --git a/deployment/urunc-deploy/config.toml b/deployment/urunc-deploy/config.toml index 8eeffaf4e..0330dba96 100644 --- a/deployment/urunc-deploy/config.toml +++ b/deployment/urunc-deploy/config.toml @@ -7,6 +7,9 @@ syslog = false [timestamps] enabled = false +[rootfs_view] +enabled = false + [monitors.qemu] default_memory_mb = 256 default_vcpus = 1 diff --git a/docs/configuration.md b/docs/configuration.md index a2daa4ba4..75dd87f1c 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -38,6 +38,9 @@ default_vcpus = 1 [extra_binaries.virtiofsd] path = "/usr/libexec/virtiofsd" options = "--sandbox none" + +[rootfs_view] +enabled = false ``` ## Configuration Sections @@ -89,6 +92,34 @@ destination = "/tmp/urunc-timestamps.log" When enabled, `urunc` will log performance timestamps to help with debugging and optimization. +### Rootfs View Configuration + +The `[rootfs_view]` section controls whether the urunc shim prepares a +per-container containerd rootfs view at task Create (for `devmapper` / +`blockfile` snapshotters). + +| Option | Type | Default | Description | +|--------|------|---------|-------------| +| `enabled` | boolean | `false` | Prepare rootfs views for container block rootfs after shim task Create | + +When `enabled = true`, the shim first lets the wrapped task service create the +task so the bundle rootfs is mounted. It then runs `ChooseRootfs` and prepares a +view only if **all** of the following hold: + +1. The container snapshotter is block-based (`devmapper` or `blockfile`). +2. Shim `ChooseRootfs` selected **container block rootfs** (`type=block` with a + non-empty `MountedPath`). + +This matches the block-rootfs boot-artifact path: kernel/initrd are read from a +read-only view instead of being copied out of the container rootfs before attach. + +**Example:** + +```toml +[rootfs_view] +enabled = true +``` + ### Monitor Configuration The `[monitors]` section allows you to configure default settings for different @@ -201,6 +232,9 @@ To create a configuration file, you can: [monitors.spt] default_memory_mb = 256 default_vcpus = 1 + + [rootfs_view] + enabled = false EOF ``` @@ -244,6 +278,9 @@ default_vcpus = 1 default_memory_mb = 256 default_vcpus = 1 # path is not set by default - urunc will search in PATH + +[rootfs_view] +enabled = false ``` ## Notes diff --git a/pkg/unikontainers/urunc_config.go b/pkg/unikontainers/urunc_config.go index 5f21d106e..36436fbdd 100644 --- a/pkg/unikontainers/urunc_config.go +++ b/pkg/unikontainers/urunc_config.go @@ -34,9 +34,15 @@ type UruncTimestamps struct { Destination string `toml:"destination"` // Used to specify a file for timestamps } +// UruncRootfsView configures shim-side per-container rootfs views (devmapper/blockfile). +type UruncRootfsView struct { + Enabled bool `toml:"enabled"` +} + type UruncConfig struct { Log UruncLog `toml:"log"` Timestamps UruncTimestamps `toml:"timestamps"` + RootfsView UruncRootfsView `toml:"rootfs_view"` Monitors map[string]types.MonitorConfig `toml:"monitors"` ExtraBins map[string]types.ExtraBinConfig `toml:"extra_binaries"` } @@ -94,10 +100,15 @@ func defaultExtraBinConfig() map[string]types.ExtraBinConfig { } } +func defaultRootfsViewConfig() UruncRootfsView { + return UruncRootfsView{Enabled: false} +} + func defaultUruncConfig() *UruncConfig { return &UruncConfig{ Log: defaultLogConfig(), Timestamps: defaultTimestampsConfig(), + RootfsView: defaultRootfsViewConfig(), Monitors: defaultMonitorsConfig(), ExtraBins: defaultExtraBinConfig(), } From e5108f74dede15eca561ea8063ce6b20b9a75c5a Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Mon, 1 Jun 2026 19:59:37 -0400 Subject: [PATCH 2/5] feat(rootfs-view): add snapshot view plumbing Add bundle state for shim-prepared rootfs views and containerd helpers to prepare and clean those views. Keep the accessor internal so shim code only passes a session and persisted cleanup state. Signed-off-by: sidneychang <2190206983@qq.com> --- pkg/containerd-shim/containerd/rootfs_view.go | 354 ++++++++++++++++++ pkg/containerd-shim/containerd/session.go | 2 - pkg/unikontainers/rootfs_view_boot.go | 150 ++++++++ pkg/unikontainers/types/types.go | 12 +- pkg/unikontainers/utils.go | 11 +- 5 files changed, 521 insertions(+), 8 deletions(-) create mode 100644 pkg/containerd-shim/containerd/rootfs_view.go create mode 100644 pkg/unikontainers/rootfs_view_boot.go diff --git a/pkg/containerd-shim/containerd/rootfs_view.go b/pkg/containerd-shim/containerd/rootfs_view.go new file mode 100644 index 000000000..cee448336 --- /dev/null +++ b/pkg/containerd-shim/containerd/rootfs_view.go @@ -0,0 +1,354 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package containerd + +import ( + "context" + "fmt" + "os" + "path/filepath" + + leasesapi "github.com/containerd/containerd/api/services/leases/v1" + snapshotsapi "github.com/containerd/containerd/api/services/snapshots/v1" + cntrtypes "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/mount" + "github.com/urunc-dev/urunc/pkg/unikontainers" + "github.com/urunc-dev/urunc/pkg/unikontainers/types" + "golang.org/x/sys/unix" + "google.golang.org/grpc/metadata" +) + +const ( + rootfsViewKeyPrefix = "urunc-rootfs-view-" + rootfsViewLeasePrefix = "urunc-rootfs-view-lease-" + rootfsViewMountpointName = "rootfs-view-mount" +) + +type rootfsViewAccessor struct { + namespace string + containerID string + snapshotter string + snapshotKey string + snapshots snapshotsapi.SnapshotsClient + leases leasesapi.LeasesClient +} + +func newRootfsViewAccessor(session *Session) *rootfsViewAccessor { + a := &rootfsViewAccessor{ + namespace: session.GetNamespace(), + containerID: session.GetContainerID(), + snapshots: session.snapshotsClient(), + leases: session.leasesClient(), + } + ctr := session.GetContainer() + if ctr != nil && ctr.GetSnapshotKey() != "" { + a.snapshotter = ctr.GetSnapshotter() + a.snapshotKey = ctr.GetSnapshotKey() + } + return a +} + +// PrepareRootfsView prepares a rootfs view when the container and +// rootfs choice support it. The returned shouldPrepare value lets callers +// distinguish config/check failures from prepare failures for logging. +func PrepareRootfsView(ctx context.Context, session *Session, rootfs types.RootfsParams, bundle string) (types.RootfsViewState, bool, error) { + if session == nil { + return types.RootfsViewState{}, false, nil + } + + accessor := newRootfsViewAccessor(session) + shouldPrepare, err := accessor.shouldPrepare(rootfs) + if err != nil { + return types.RootfsViewState{}, false, err + } + if !shouldPrepare { + return types.RootfsViewState{}, false, nil + } + + state, err := accessor.prepare(ctx, bundle) + if err != nil { + return types.RootfsViewState{}, true, err + } + + return state, true, nil +} + +// CleanupRootfsView removes a rootfs view using container metadata from the +// session and cleanup state read from the bundle. +func CleanupRootfsView(ctx context.Context, session *Session, snapshotter, mountpoint string) error { + if session == nil { + return fmt.Errorf("containerd session is nil") + } + + accessor := newRootfsViewAccessor(session) + return accessor.cleanupRootfsView(ctx, snapshotter, mountpoint) +} + +func (a *rootfsViewAccessor) shouldPrepare(rootfs types.RootfsParams) (bool, error) { + if a == nil || + a.snapshotter == "" || + a.snapshotKey == "" || + (a.snapshotter != "devmapper" && a.snapshotter != "blockfile") || + rootfs.Type != "block" || + rootfs.MountedPath == "" { + return false, nil + } + + uruncCfg, cfgErr := unikontainers.LoadUruncConfig(unikontainers.UruncConfigPath) + if cfgErr != nil { + return false, cfgErr + } + return uruncCfg.RootfsView.Enabled, nil +} + +// prepare records a read-only view of the committed rootfs snapshot for runtime use. +// On success it returns view state for the caller to persist in bundle rootfs-view.json. +func (a *rootfsViewAccessor) prepare(ctx context.Context, bundle string) (types.RootfsViewState, error) { + if a == nil { + return types.RootfsViewState{}, fmt.Errorf("rootfs view accessor is nil") + } + + snapshotKey, err := a.resolveCommittedSnapshotBase(ctx, a.snapshotter, a.snapshotKey) + if err != nil { + return types.RootfsViewState{}, err + } + + viewKey := rootfsViewKeyPrefix + a.containerID + leaseID := rootfsViewLeasePrefix + a.containerID + + nsCtx := withNamespace(ctx, a.namespace) + if _, err := a.leases.Create(nsCtx, &leasesapi.CreateRequest{ID: leaseID}); err != nil { + err = containerdErr(err) + if err != nil && !errdefs.IsAlreadyExists(err) { + return types.RootfsViewState{}, fmt.Errorf("create rootfs view lease %s: %w", leaseID, err) + } + } + + leaseCtx := metadata.AppendToOutgoingContext(nsCtx, "containerd-lease", leaseID) + mounts, err := a.createRootfsView(leaseCtx, viewKey, snapshotKey) + if err != nil { + _ = deleteRootfsViewLease(ctx, a.namespace, leaseID, a.leases) + return types.RootfsViewState{}, err + } + + mountpoint := filepath.Join(filepath.Clean(bundle), rootfsViewMountpointName) + keepView := false + defer func() { + if !keepView { + _ = cleanupRootfsViewMountpoint(mountpoint) + _ = removeRootfsViewSnapshotAndLease(ctx, a.namespace, a.containerID, a.snapshotter, a.snapshots, a.leases) + } + }() + + if err := prepareRootfsViewMountpoint(mountpoint, mounts); err != nil { + return types.RootfsViewState{}, err + } + + keepView = true + return types.RootfsViewState{ + Snapshotter: a.snapshotter, + Mountpoint: mountpoint, + Mounts: mounts, + }, nil +} + +// Rootfs view cleanup (call chain): +// +// Delete / Stop: ShouldCleanupRootfsView(bundle) → CleanupRootfsView(ctx, session, snapshotter, mountpoint) +// Create rollback: CleanupRootfsView(ctx, session, "", state.Mountpoint) +// +// cleanupRootfsView → removeRootfsViewSnapshotAndLease (view snapshot + lease in containerd) +// prepare failure after lease create → deleteRootfsViewLease (lease only) + +// cleanupRootfsView unmounts the shim-mounted rootfs view, then removes its snapshot and lease. +func (a *rootfsViewAccessor) cleanupRootfsView(ctx context.Context, snapshotter, mountpoint string) error { + if a == nil { + return fmt.Errorf("rootfs view accessor is nil") + } + if a.containerID == "" { + return fmt.Errorf("container id is empty") + } + + effectiveSnapshotter := snapshotter + if effectiveSnapshotter == "" { + effectiveSnapshotter = a.snapshotter + } + if effectiveSnapshotter == "" { + return fmt.Errorf("snapshotter name required for rootfs view cleanup") + } + + if err := cleanupRootfsViewMountpoint(mountpoint); err != nil { + return err + } + + return removeRootfsViewSnapshotAndLease( + ctx, a.namespace, a.containerID, effectiveSnapshotter, a.snapshots, a.leases, + ) +} + +func (a *rootfsViewAccessor) statSnapshot(ctx context.Context, snapshotter, key string) (parent string, committed bool, err error) { + resp, err := a.snapshots.Stat(withNamespace(ctx, a.namespace), &snapshotsapi.StatSnapshotRequest{ + Snapshotter: snapshotter, + Key: key, + }) + if err = containerdErr(err); err != nil { + return "", false, err + } + info := resp.GetInfo() + if info == nil { + return "", false, fmt.Errorf("stat snapshot %s (%s): empty info", key, snapshotter) + } + return info.GetParent(), info.GetKind() == snapshotsapi.Kind_COMMITTED, nil +} + +func (a *rootfsViewAccessor) resolveCommittedSnapshotBase(ctx context.Context, snapshotter, snapshotKey string) (string, error) { + parent, committed, err := a.statSnapshot(ctx, snapshotter, snapshotKey) + if err != nil { + return "", fmt.Errorf("stat snapshot %s (%s): %w", snapshotKey, snapshotter, err) + } + if committed { + return snapshotKey, nil + } + if parent == "" { + return snapshotKey, nil + } + + current := parent + for { + parent, committed, err = a.statSnapshot(ctx, snapshotter, current) + if err != nil { + return "", fmt.Errorf("stat snapshot %s (%s parent walk): %w", current, snapshotter, err) + } + if committed { + return current, nil + } + if parent == "" { + return "", fmt.Errorf("%s snapshot %s has no committed parent in chain", snapshotter, snapshotKey) + } + current = parent + } +} + +func (a *rootfsViewAccessor) createRootfsView(ctx context.Context, viewKey, parentKey string) ([]mount.Mount, error) { + nsCtx := withNamespace(ctx, a.namespace) + viewResp, err := a.snapshots.View(nsCtx, &snapshotsapi.ViewSnapshotRequest{ + Snapshotter: a.snapshotter, + Key: viewKey, + Parent: parentKey, + }) + if err = containerdErr(err); err == nil { + return protoMountsToMounts(viewResp.GetMounts()), nil + } + if !errdefs.IsAlreadyExists(err) { + return nil, fmt.Errorf("create rootfs view %s from %s: %w", viewKey, parentKey, err) + } + + // Reuse an existing view left by a retry or partial prepare. + mountsResp, err := a.snapshots.Mounts(nsCtx, &snapshotsapi.MountsRequest{ + Snapshotter: a.snapshotter, + Key: viewKey, + }) + if err = containerdErr(err); err != nil { + return nil, fmt.Errorf("create rootfs view %s from %s: %w", viewKey, parentKey, err) + } + return protoMountsToMounts(mountsResp.GetMounts()), nil +} + +func protoMountsToMounts(mm []*cntrtypes.Mount) []mount.Mount { + out := make([]mount.Mount, len(mm)) + for i, m := range mm { + out[i] = mount.Mount{ + Type: m.Type, + Source: m.Source, + Target: m.Target, + Options: m.Options, + } + } + return out +} + +// ShouldCleanupRootfsView reports whether bundle rootfs-view.json exists and returns cleanup state. +func ShouldCleanupRootfsView(bundle string) (bool, string, string, error) { + state, err := unikontainers.LoadRootfsViewState(bundle) + if err != nil { + return false, "", "", err + } + if state == nil || state.Snapshotter == "" { + return false, "", "", nil + } + return true, state.Snapshotter, state.Mountpoint, nil +} + +func prepareRootfsViewMountpoint(mountpoint string, mounts []mount.Mount) error { + if err := cleanupRootfsViewMountpoint(mountpoint); err != nil { + return err + } + if err := os.MkdirAll(mountpoint, 0o755); err != nil { + return fmt.Errorf("create rootfs view mountpoint %s: %w", mountpoint, err) + } + if err := mount.All(mounts, mountpoint); err != nil { + _ = cleanupRootfsViewMountpoint(mountpoint) + return fmt.Errorf("mount rootfs view at %s: %w", mountpoint, err) + } + return nil +} + +func cleanupRootfsViewMountpoint(mountpoint string) error { + if mountpoint == "" { + return nil + } + mountpoint = filepath.Clean(mountpoint) + if err := mount.Unmount(mountpoint, 0); err != nil && !os.IsNotExist(err) && err != unix.EINVAL { + return fmt.Errorf("unmount rootfs view mountpoint %s: %w", mountpoint, err) + } + if err := os.RemoveAll(mountpoint); err != nil { + return fmt.Errorf("remove rootfs view mountpoint %s: %w", mountpoint, err) + } + return nil +} + +// removeRootfsViewSnapshotAndLease deletes the view snapshot and its lease in containerd. +func removeRootfsViewSnapshotAndLease( + ctx context.Context, + namespace, containerID, snapshotter string, + snapshots snapshotsapi.SnapshotsClient, + leases leasesapi.LeasesClient, +) error { + if containerID == "" || snapshotter == "" { + return nil + } + nsCtx := withNamespace(ctx, namespace) + _, err := snapshots.Remove(nsCtx, &snapshotsapi.RemoveSnapshotRequest{ + Snapshotter: snapshotter, + Key: rootfsViewKeyPrefix + containerID, + }) + if err = containerdErr(err); err != nil && !errdefs.IsNotFound(err) { + return err + } + return deleteRootfsViewLease(ctx, namespace, rootfsViewLeasePrefix+containerID, leases) +} + +// deleteRootfsViewLease removes only the containerd lease (Prepare rollback after lease create). +func deleteRootfsViewLease(ctx context.Context, namespace, leaseID string, leases leasesapi.LeasesClient) error { + if leaseID == "" { + return nil + } + _, err := leases.Delete(withNamespace(ctx, namespace), &leasesapi.DeleteRequest{ID: leaseID}) + if err = containerdErr(err); err != nil && !errdefs.IsNotFound(err) { + return err + } + return nil +} diff --git a/pkg/containerd-shim/containerd/session.go b/pkg/containerd-shim/containerd/session.go index e7168ffa1..c3ff02ce1 100644 --- a/pkg/containerd-shim/containerd/session.go +++ b/pkg/containerd-shim/containerd/session.go @@ -158,12 +158,10 @@ func (s *Session) contentClient() contentapi.ContentClient { return contentapi.NewContentClient(s.conn) } -//nolint:unused // Used by follow-up feature-specific access constructors. func (s *Session) snapshotsClient() snapshotsapi.SnapshotsClient { return snapshotsapi.NewSnapshotsClient(s.conn) } -//nolint:unused // Used by follow-up feature-specific access constructors. func (s *Session) leasesClient() leasesapi.LeasesClient { return leasesapi.NewLeasesClient(s.conn) } diff --git a/pkg/unikontainers/rootfs_view_boot.go b/pkg/unikontainers/rootfs_view_boot.go new file mode 100644 index 000000000..8fca49915 --- /dev/null +++ b/pkg/unikontainers/rootfs_view_boot.go @@ -0,0 +1,150 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package unikontainers + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/urunc-dev/urunc/pkg/unikontainers/types" + "golang.org/x/sys/unix" +) + +// WriteRootfsViewState persists shim-prepared rootfs view state in the bundle. +func WriteRootfsViewState(bundleDir string, state types.RootfsViewState) error { + bundleDir = filepath.Clean(bundleDir) + data, err := json.Marshal(state) + if err != nil { + return fmt.Errorf("marshal %s: %w", rootfsViewFilename, err) + } + path := filepath.Join(bundleDir, rootfsViewFilename) + if err := os.WriteFile(path, data, 0o644); err != nil { //nolint:gosec // bundle metadata, same as state.json + return fmt.Errorf("write %s: %w", path, err) + } + return nil +} + +// LoadRootfsViewState reads rootfs view state written by the shim at task Create. +// Returns (nil, nil) when the file is absent. +func LoadRootfsViewState(bundleDir string) (*types.RootfsViewState, error) { + bundleDir = filepath.Clean(bundleDir) + path := filepath.Join(bundleDir, rootfsViewFilename) + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return nil, nil + } + return nil, fmt.Errorf("read %s: %w", path, err) + } + var state types.RootfsViewState + if err := json.Unmarshal(data, &state); err != nil { + return nil, fmt.Errorf("unmarshal %s: %w", path, err) + } + return &state, nil +} + +func rootfsViewBootArtifactBindPaths(viewRoot, monRootfs, unikernelPath, initrdPath, uruncJSON string) []struct{ src, target string } { + artifactPaths := []string{unikernelPath, uruncJSON} + if initrdPath != "" { + artifactPaths = append(artifactPaths, initrdPath) + } + files := make([]struct{ src, target string }, 0, len(artifactPaths)) + for _, artifactPath := range artifactPaths { + rootfsRelPath := strings.TrimPrefix(filepath.Clean(artifactPath), "/") + files = append(files, struct{ src, target string }{ + src: filepath.Join(viewRoot, rootfsRelPath), + target: filepath.Join(monRootfs, rootfsRelPath), + }) + } + return files +} + +func rollbackRootfsViewBinds(targets []string) { + for i := len(targets) - 1; i >= 0; i-- { + if err := unmountRootfsViewBind(targets[i]); err != nil { + uniklog.WithError(err).WithField("target", filepath.Clean(targets[i])).Warn("failed to roll back rootfs view bind mount") + } + } +} + +// probeRootfsViewBootArtifacts checks that boot artifacts can be bind-mounted +// from the shim-mounted view. preSetup still has mountedPath; binds are rolled +// back immediately. +func probeRootfsViewBootArtifacts(view *types.RootfsViewState, unikernelPath, initrdPath, uruncJSON string) (useView bool, err error) { + if view == nil || view.Mountpoint == "" { + return false, nil + } + + probeRoot, err := os.MkdirTemp("", "urunc-rootfs-view-probe-") + if err != nil { + return false, fmt.Errorf("create temporary rootfs view probe mountpoint: %w", err) + } + defer os.RemoveAll(probeRoot) + + // Probe binds only validate the source view; monitor binds are created later. + var probeBindTargets []string + defer func() { + rollbackRootfsViewBinds(probeBindTargets) + }() + + for _, f := range rootfsViewBootArtifactBindPaths(view.Mountpoint, probeRoot, unikernelPath, initrdPath, uruncJSON) { + dstPath := f.target + if err := bindMountFile(f.src, filepath.Dir(dstPath), dstPath, 0, unix.MS_BIND, false); err != nil { + return false, fmt.Errorf("bind view %s -> %s: %w", f.src, f.target, err) + } + probeBindTargets = append(probeBindTargets, dstPath) + } + + return true, nil +} + +// prepareRootfsViewBootBinds runs after prepareRoot. The source view mount is +// owned by the shim and remains mounted until task cleanup. +func prepareRootfsViewBootBinds(view *types.RootfsViewState, monRootfs, unikernelPath, initrdPath, uruncJSON string) error { + if view == nil || view.Mountpoint == "" { + return nil + } + + var bindTargets []string + keepBinds := false + defer func() { + if !keepBinds { + rollbackRootfsViewBinds(bindTargets) + } + }() + + for _, f := range rootfsViewBootArtifactBindPaths(view.Mountpoint, monRootfs, unikernelPath, initrdPath, uruncJSON) { + dstPath := f.target + if err := bindMountFile(f.src, filepath.Dir(dstPath), dstPath, 0, unix.MS_BIND, false); err != nil { + return fmt.Errorf("bind view %s -> %s: %w", f.src, f.target, err) + } + bindTargets = append(bindTargets, dstPath) + } + + keepBinds = true + return nil +} + +func unmountRootfsViewBind(target string) error { + target = filepath.Clean(target) + err := unix.Unmount(target, unix.MNT_DETACH) + if err == nil || err == unix.EINVAL || err == unix.ENOENT || os.IsNotExist(err) { + return nil + } + return fmt.Errorf("failed to unmount rootfs view bind %s: %w", target, err) +} diff --git a/pkg/unikontainers/types/types.go b/pkg/unikontainers/types/types.go index c6388e2cc..a742037d8 100644 --- a/pkg/unikontainers/types/types.go +++ b/pkg/unikontainers/types/types.go @@ -15,7 +15,10 @@ //revive:disable:var-naming package types -import "golang.org/x/sys/unix" +import ( + "github.com/containerd/containerd/mount" + "golang.org/x/sys/unix" +) type Unikernel interface { Init(UnikernelParams) error @@ -72,6 +75,13 @@ type RootfsParams struct { MonRootfs string // The rootfs for the monitor process } +// RootfsViewState is passed from shim to runtime via bundle rootfs-view.json. +type RootfsViewState struct { + Snapshotter string `json:"snapshotter"` + Mountpoint string `json:"mountpoint,omitempty"` + Mounts []mount.Mount `json:"mounts,omitempty"` +} + // Specific to Linux type ProcessConfig struct { UID uint32 // The uid of the process inside the guest diff --git a/pkg/unikontainers/utils.go b/pkg/unikontainers/utils.go index c53c0fc05..56245672f 100644 --- a/pkg/unikontainers/utils.go +++ b/pkg/unikontainers/utils.go @@ -35,11 +35,12 @@ import ( ) const ( - configFilename = "config.json" - stateFilename = "state.json" - initPidFilename = "init.pid" - uruncJSONFilename = "urunc.json" - rootfsDirName = "rootfs" + configFilename = "config.json" + stateFilename = "state.json" + rootfsViewFilename = "rootfs-view.json" + initPidFilename = "init.pid" + uruncJSONFilename = "urunc.json" + rootfsDirName = "rootfs" ) // copy sourceFile to targetDir From 2abfb2f087e2f52096457270adccc64945f791ab Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Mon, 1 Jun 2026 20:01:13 -0400 Subject: [PATCH 3/5] feat(runtime): bind boot artifacts from rootfs views Load shim-prepared rootfs view state for block rootfs setup. Probe the view before unmounting the container rootfs, then bind the boot artifacts after prepareRoot in the block postSetup step. Signed-off-by: sidneychang <2190206983@qq.com> --- pkg/unikontainers/block.go | 70 ++++++++++++++++++++---------- pkg/unikontainers/unikontainers.go | 35 ++++++++++----- 2 files changed, 71 insertions(+), 34 deletions(-) diff --git a/pkg/unikontainers/block.go b/pkg/unikontainers/block.go index 1b7bf7892..92fd55c33 100644 --- a/pkg/unikontainers/block.go +++ b/pkg/unikontainers/block.go @@ -36,15 +36,16 @@ const tmpfsSizeForBlockRootfs = "65536k" var ErrMountpoint = errors.New("no FS is mounted in this mountpoint") type blockRootfs struct { - mounts []specs.Mount - monRootfs string - mountedPath string - path string - kernelPath string - initrdPath string - uruncJSONPath string - guestType string - guest types.Unikernel + mounts []specs.Mount + monRootfs string + mountedPath string + path string + kernelPath string + initrdPath string + uruncJSONPath string + guestType string + guest types.Unikernel + rootfsViewState *types.RootfsViewState } // getMountInfo determines whether the provided path is a mount point @@ -122,8 +123,6 @@ func getMountInfo(path string) (types.BlockDevParams, error) { // extractUnikernelFromBlock moves unikernel binary, initrd and urunc.json // files from old rootfsPath to newRootfsPath -// FIXME: This approach fills up /run with unikernel binaries, initrds and urunc.json -// files for each unikernel we run func extractBootFiles(rootfsPath string, newRootfsPath string, unikernel string, uruncJSON string, initrd string) error { currentUnikernelPath := filepath.Join(rootfsPath, unikernel) targetUnikernelPath := filepath.Join(newRootfsPath, unikernel) @@ -148,7 +147,6 @@ func extractBootFiles(rootfsPath string, newRootfsPath string, unikernel string, if err != nil { return fmt.Errorf("could not move %s to %s: %w", currentConfigPath, newRootfsPath, err) } - return nil } @@ -226,24 +224,36 @@ func getBlockVolumes(monRootfs string, mounts []specs.Mount, ukernel types.Unike } func (b blockRootfs) preSetup() error { + // Preserve main's propagation fix: consume boot artifacts and unmount the + // container rootfs before prepareRoot() makes the mount tree private/slave. if b.mountedPath == "" { return nil } - err := copyMountfiles(b.mountedPath, b.mounts) - if err != nil { - return fmt.Errorf("failed to copy files from mount list: %w", err) + useViewPath := b.rootfsViewState != nil + if useViewPath { + // Probe only; the real bind must happen after prepareRoot. + useView, err := probeRootfsViewBootArtifacts(b.rootfsViewState, b.kernelPath, b.initrdPath, b.uruncJSONPath) + if err != nil { + return err + } + if !useView { + useViewPath = false + } } - // FIXME: This approach fills up /run with unikernel binaries and - // urunc.json files for each unikernel instance we run - err = extractBootFiles(b.mountedPath, b.monRootfs, b.kernelPath, b.uruncJSONPath, b.initrdPath) - if err != nil { - return fmt.Errorf("failed to extract boot files from rootfs: %w", err) + if !useViewPath { + err := extractBootFiles(b.mountedPath, b.monRootfs, b.kernelPath, b.uruncJSONPath, b.initrdPath) + if err != nil { + return fmt.Errorf("failed to extract boot files from rootfs: %w", err) + } } - err = mount.Unmount(b.mountedPath) - if err != nil { + if err := copyMountfiles(b.mountedPath, b.mounts); err != nil { + return fmt.Errorf("failed to copy files from mount list: %w", err) + } + + if err := mount.Unmount(b.mountedPath); err != nil { return fmt.Errorf("failed to unmount rootfs: %w", err) } @@ -262,10 +272,22 @@ func (b blockRootfs) postSetup() error { unix.MS_NOSUID|unix.MS_NOEXEC|unix.MS_STRICTATIME, "1777", tmpfsSizeForBlockRootfs) if err != nil { - err = fmt.Errorf("failed to create tmpfs for monitor's execution environment: %w", err) + return fmt.Errorf("failed to create tmpfs for monitor's execution environment: %w", err) + } + + if b.rootfsViewState == nil { + return nil } - return err + // Rootfs-view boot artifact binds must be created after prepareRoot() + // has fixed the monitor rootfs propagation and self-bind. Keeping this in + // postSetup() makes the ordering explicit while keeping the block-rootfs + // specific setup inside the block rootfs implementation. + if err := prepareRootfsViewBootBinds(b.rootfsViewState, b.monRootfs, b.kernelPath, b.initrdPath, b.uruncJSONPath); err != nil { + return fmt.Errorf("boot artifact setup after prepareRoot failed: %w", err) + } + + return nil } func (b blockRootfs) getBlockDevs() ([]types.BlockDevParams, error) { diff --git a/pkg/unikontainers/unikontainers.go b/pkg/unikontainers/unikontainers.go index a3a00bb5f..4f734239d 100644 --- a/pkg/unikontainers/unikontainers.go +++ b/pkg/unikontainers/unikontainers.go @@ -306,11 +306,21 @@ func ChooseRootfs(bundle, specRoot string, annot map[string]string, cfg *UruncCo func (u *Unikontainer) Exec(metrics m.Writer) error { metrics.Capture(m.TS15) + // Reload annotations written by the shim after Create. + spec, err := loadSpec(u.State.Bundle) + if err != nil { + return fmt.Errorf("reload bundle spec: %w", err) + } + if spec == nil || spec.Linux == nil { + return fmt.Errorf("invalid OCI spec: linux section is required") + } + u.Spec = spec + // container Paths // Make sure paths are clean bundleDir := filepath.Clean(u.State.Bundle) rootfsDir := filepath.Clean(u.Spec.Root.Path) - rootfsDir, err := resolveAgainstBase(bundleDir, rootfsDir) + rootfsDir, err = resolveAgainstBase(bundleDir, rootfsDir) if err != nil { uniklog.Errorf("could not resolve rootfs directory %s: %v", rootfsDir, err) return err @@ -461,16 +471,21 @@ func (u *Unikontainer) Exec(metrics m.Writer) error { var rfsBuilder rootfsBuilder switch rootfsParams.Type { case "block": + view, err := LoadRootfsViewState(bundleDir) + if err != nil { + return fmt.Errorf("could not load guest rootfs view: %w", err) + } rfsBuilder = blockRootfs{ - mounts: u.Spec.Mounts, - monRootfs: rootfsParams.MonRootfs, - mountedPath: rootfsParams.MountedPath, - path: rootfsParams.Path, - kernelPath: unikernelPath, - initrdPath: initrdPath, - uruncJSONPath: uruncJSONFilename, - guestType: unikernelType, - guest: unikernel, + mounts: u.Spec.Mounts, + monRootfs: rootfsParams.MonRootfs, + mountedPath: rootfsParams.MountedPath, + path: rootfsParams.Path, + kernelPath: unikernelPath, + initrdPath: initrdPath, + uruncJSONPath: uruncJSONFilename, + guestType: unikernelType, + guest: unikernel, + rootfsViewState: view, } case "initrd": rfsBuilder = initrdRootfs{ From cf77889c61d717d09a3b7aaba16138e7cdf98956 Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Mon, 1 Jun 2026 20:01:48 -0400 Subject: [PATCH 4/5] feat(shim): manage rootfs views in task lifecycle Choose guest rootfs parameters after inner task creation and persist them for runtime Exec. When enabled, prepare a rootfs view during Create, roll it back on persistence failures, and clean it during Delete. Signed-off-by: sidneychang <2190206983@qq.com> --- docs/package/index.md | 10 ++ pkg/containerd-shim/containerd/annotations.go | 8 +- pkg/containerd-shim/guest_rootfs.go | 42 ++---- pkg/containerd-shim/task_plugin.go | 9 ++ pkg/containerd-shim/task_service.go | 134 +++++++++++++++++- 5 files changed, 164 insertions(+), 39 deletions(-) diff --git a/docs/package/index.md b/docs/package/index.md index 2e772414a..b61f0be64 100644 --- a/docs/package/index.md +++ b/docs/package/index.md @@ -73,6 +73,16 @@ Except of the above, `urunc` accepts the following optional annotations: requests from `urunc` to mount the container's image rootfs in the unikernel (either as a block device or through shared-fs). +Per-container rootfs views are controlled by `[rootfs_view] enabled` in +`/etc/urunc/config.toml`. See +[configuration](../configuration.md#rootfs-view-configuration). When enabled, +the container must also use `com.urunc.unikernel.mountRootfs=true` (typically +from image annotations merged into `config.json` before shim task Create). +Supported snapshotters include `devmapper` and `blockfile`. After the wrapped +task service creates the task and mounts the bundle rootfs, the shim runs +`ChooseRootfs` and prepares a view only when that selection is container block +rootfs. + Due to the fact that [Docker](https://www.docker.com/) and some high-level container runtimes do not pass the image annotations to the underlying container runtime, `urunc` can also read the above information from a file inside the diff --git a/pkg/containerd-shim/containerd/annotations.go b/pkg/containerd-shim/containerd/annotations.go index 5d980c961..099c34639 100644 --- a/pkg/containerd-shim/containerd/annotations.go +++ b/pkg/containerd-shim/containerd/annotations.go @@ -86,7 +86,7 @@ func InjectUruncAnnotations(ctx context.Context, session *Session, bundlePath st return nil } - return patchConfigJSON(bundlePath, annotations) + return PatchConfigJSON(bundlePath, annotations) } func (f *annotationFetcher) fetchUruncAnnotations(ctx context.Context) (map[string]string, error) { @@ -152,12 +152,12 @@ func readBlob(ctx context.Context, namespace string, contentClient contentapi.Co return raw, nil } -// patchConfigJSON injects missing annotations into the OCI runtime spec -// stored in the bundle's config.json. +// PatchConfigJSON injects missing annotations into the OCI runtime spec stored in +// the bundle's config.json. // // Existing annotations in config.json are preserved. Only annotation keys that // are not already present in the runtime spec are added. -func patchConfigJSON(bundlePath string, annotations map[string]string) error { +func PatchConfigJSON(bundlePath string, annotations map[string]string) error { configPath := filepath.Join(bundlePath, "config.json") fi, err := os.Stat(configPath) diff --git a/pkg/containerd-shim/guest_rootfs.go b/pkg/containerd-shim/guest_rootfs.go index f8982ecf1..0508ed255 100644 --- a/pkg/containerd-shim/guest_rootfs.go +++ b/pkg/containerd-shim/guest_rootfs.go @@ -24,44 +24,38 @@ import ( taskAPI "github.com/containerd/containerd/api/runtime/task/v2" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/urunc-dev/urunc/pkg/unikontainers" + "github.com/urunc-dev/urunc/pkg/unikontainers/types" ) -const annotRootfsParams = "com.urunc.internal.rootfs.params" - var errGuestRootfsChoiceSkipped = errors.New("guest rootfs choice skipped") // chooseGuestRootfs runs the same ChooseRootfs logic as runtime Exec after inner -// task Create (#684) and records the result in annotRootfsParams so Exec knows -// selection already happened. -func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) error { +// task Create (#684). The caller persists the result in bundle config.json so +// Exec can reuse the selection. +func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) (types.RootfsParams, error) { configPath := filepath.Join(r.Bundle, "config.json") - info, err := os.Stat(configPath) - if err != nil { - return fmt.Errorf("stat config.json: %w", err) - } - data, err := os.ReadFile(configPath) if err != nil { - return fmt.Errorf("read config.json: %w", err) + return types.RootfsParams{}, fmt.Errorf("read config.json: %w", err) } var spec specs.Spec if err := json.Unmarshal(data, &spec); err != nil { - return fmt.Errorf("unmarshal config.json: %w", err) + return types.RootfsParams{}, fmt.Errorf("unmarshal config.json: %w", err) } if spec.Root == nil { - return fmt.Errorf("invalid OCI spec: root section is required") + return types.RootfsParams{}, fmt.Errorf("invalid OCI spec: root section is required") } config, err := unikontainers.GetUnikernelConfig(filepath.Clean(r.Bundle), &spec) if err != nil { - return fmt.Errorf("%w: %w", errGuestRootfsChoiceSkipped, err) + return types.RootfsParams{}, fmt.Errorf("%w: %w", errGuestRootfsChoiceSkipped, err) } annotations := config.Map() uruncCfg, err := unikontainers.LoadUruncConfig(unikontainers.UruncConfigPath) if err != nil && uruncCfg == nil { - return err + return types.RootfsParams{}, err } rootfsParams, err := unikontainers.ChooseRootfs( @@ -71,22 +65,8 @@ func chooseGuestRootfs(r *taskAPI.CreateTaskRequest) error { uruncCfg, ) if err != nil { - return err - } - - encoded, err := json.Marshal(rootfsParams) - if err != nil { - return err - } - if spec.Annotations == nil { - spec.Annotations = make(map[string]string) - } - spec.Annotations[annotRootfsParams] = string(encoded) - - patched, err := json.MarshalIndent(spec, "", " ") - if err != nil { - return fmt.Errorf("marshal config.json: %w", err) + return types.RootfsParams{}, err } - return os.WriteFile(configPath, patched, info.Mode()) + return rootfsParams, nil } diff --git a/pkg/containerd-shim/task_plugin.go b/pkg/containerd-shim/task_plugin.go index 85226f383..54dfabddf 100644 --- a/pkg/containerd-shim/task_plugin.go +++ b/pkg/containerd-shim/task_plugin.go @@ -15,6 +15,9 @@ package containerdshim import ( + "os" + "path/filepath" + "github.com/containerd/containerd/pkg/shutdown" "github.com/containerd/containerd/plugin" runcTask "github.com/containerd/containerd/runtime/v2/runc/task" @@ -45,9 +48,15 @@ func init() { return nil, err } + cwd, err := os.Getwd() + if err != nil { + return nil, err + } + return &taskService{ TaskService: inner, containerdAddress: ic.Address, + stateRoot: filepath.Dir(filepath.Dir(cwd)), }, nil }, }) diff --git a/pkg/containerd-shim/task_service.go b/pkg/containerd-shim/task_service.go index fb126c3f0..326e5c3a8 100644 --- a/pkg/containerd-shim/task_service.go +++ b/pkg/containerd-shim/task_service.go @@ -16,14 +16,22 @@ package containerdshim import ( "context" + "encoding/json" "errors" + "fmt" + "path/filepath" taskAPI "github.com/containerd/containerd/api/runtime/task/v2" + "github.com/containerd/containerd/namespaces" "github.com/containerd/log" "github.com/containerd/ttrpc" containerdShim "github.com/urunc-dev/urunc/pkg/containerd-shim/containerd" + "github.com/urunc-dev/urunc/pkg/unikontainers" ) +// Internal bundle annotation (duplicated in unikontainers; keep in sync). +const annotRootfsParams = "com.urunc.internal.rootfs.params" + // taskService is urunc's shim-side wrapper around containerd's runc task // service. It wires urunc task setup before forwarding calls to the wrapped // service. @@ -31,6 +39,8 @@ type taskService struct { taskAPI.TaskService containerdAddress string + // Used on Delete, where cwd may no longer be the bundle. + stateRoot string } func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (*taskAPI.CreateTaskResponse, error) { @@ -53,9 +63,8 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) return resp, err } - // ChooseRootfs after inner task Create so bundle rootfs is mounted; - // params are persisted in bundle config.json for runtime Exec. - if err := chooseGuestRootfs(r); err != nil { + rootfsChoice, err := chooseGuestRootfs(r) + if err != nil { if errors.Is(err, errGuestRootfsChoiceSkipped) { log.G(ctx).WithError(err).Debug("urunc(shim): guest rootfs choice skipped") return resp, nil @@ -64,14 +73,131 @@ func (s *taskService) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) return nil, err } + rootfsViewState, shouldPrepareRootfsView, err := containerdShim.PrepareRootfsView(ctx, session, rootfsChoice, r.Bundle) + rootfsViewPrepared := false + if err != nil { + if shouldPrepareRootfsView { + // Preflight passed and prepare failed. This is non-fatal: the runtime can + // still fall back to extracting boot artifacts from the legacy mounted rootfs. + log.G(ctx).WithError(err).Warn("urunc(shim): failed to prepare rootfs view; falling back to legacy boot artifact extraction") + } else { + // A disabled rootfs_view returns nil error and is handled as a skipped + // prepare below; this branch means the enablement check itself failed. + log.G(ctx).WithError(err).Warn("urunc(shim): failed to check rootfs view config; rootfs view skipped") + } + } else if shouldPrepareRootfsView { + rootfsViewPrepared = true + log.G(ctx).Debug("urunc(shim): rootfs view prepared") + } else if session != nil { + log.G(ctx).WithField("rootfs_type", rootfsChoice.Type).Debug("urunc(shim): rootfs view prepare skipped") + } + + rootfsViewPersisted := false + defer func() { + if rootfsViewPrepared && !rootfsViewPersisted { + cleanupRootfsView(ctx, session, "", rootfsViewState.Mountpoint, "create rollback") + } + }() + + rootfsParamsJSON, err := json.Marshal(rootfsChoice) + if err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): failed to encode rootfs params") + return nil, err + } + + if err := containerdShim.PatchConfigJSON(r.Bundle, map[string]string{ + annotRootfsParams: string(rootfsParamsJSON), + }); err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): failed to persist shim create annotations") + return nil, err + } + + if rootfsViewPrepared { + if err := unikontainers.WriteRootfsViewState(r.Bundle, rootfsViewState); err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): failed to persist rootfs view state") + return nil, err + } + rootfsViewPersisted = true + } + return resp, nil } +func cleanupRootfsView(ctx context.Context, session *containerdShim.Session, snapshotter, mountpoint, reason string) { + if err := containerdShim.CleanupRootfsView(ctx, session, snapshotter, mountpoint); err != nil { + log.G(ctx).WithError(err).WithField("reason", reason).Warn("urunc(shim): failed to clean up rootfs view") + } +} + func (s *taskService) Delete(ctx context.Context, r *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) { - return s.TaskService.Delete(ctx, r) + shouldCleanup := false + snapshotter := "" + rootfsViewMountpoint := "" + var loadErr error + + if r.ExecID == "" { + bundle, err := s.bundlePathFor(ctx, r.ID) + if err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): resolve bundle path during Delete failed") + loadErr = err + } else { + // Read view state before inner Delete; snapshotter is taken from bundle + // (written at Prepare) because container metadata may be gone after Delete. + var mountpoint string + shouldCleanup, snapshotter, mountpoint, loadErr = containerdShim.ShouldCleanupRootfsView(bundle) + if loadErr == nil { + rootfsViewMountpoint = mountpoint + } + } + } + + // Delete tears down the monitor namespace before removing the view it may pin. + resp, err := s.TaskService.Delete(ctx, r) + + if loadErr != nil { + if err != nil { + return resp, err + } + return resp, loadErr + } + + if shouldCleanup { + session, sessionErr := containerdShim.OpenSession(ctx, s.containerdAddress, r.ID) + if sessionErr != nil { + log.G(ctx).WithError(sessionErr).Warn("urunc(shim): open containerd session for rootfs view cleanup failed") + if err == nil { + err = sessionErr + } + } else { + defer func() { + if err := session.Close(); err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): failed to close containerd session after rootfs view cleanup") + } + }() + if cleanupErr := containerdShim.CleanupRootfsView(ctx, session, snapshotter, rootfsViewMountpoint); cleanupErr != nil { + log.G(ctx).WithError(cleanupErr).Warn("urunc(shim): delete rootfs view during Delete failed") + if err == nil { + err = cleanupErr + } + } + } + } + + return resp, err } func (s *taskService) RegisterTTRPC(server *ttrpc.Server) error { taskAPI.RegisterTaskService(server, s) return nil } + +func (s *taskService) bundlePathFor(ctx context.Context, containerID string) (string, error) { + if s.stateRoot == "" { + return "", fmt.Errorf("task service state root is empty (shim cwd layout assumption violated)") + } + ns, err := namespaces.NamespaceRequired(ctx) + if err != nil { + return "", fmt.Errorf("namespace required: %w", err) + } + return filepath.Join(s.stateRoot, ns, containerID), nil +} From f281afe8aecaa788e907092e7ca7cb6e18bd0e0d Mon Sep 17 00:00:00 2001 From: sidneychang <2190206983@qq.com> Date: Mon, 1 Jun 2026 20:02:45 -0400 Subject: [PATCH 5/5] feat(shim): clean rootfs views from delete manager Wire a custom shim manager Stop path that reads persisted rootfs view state from the bundle and removes the view snapshot and lease. This covers cleanup paths where task Delete is not the final teardown hook. Signed-off-by: sidneychang <2190206983@qq.com> --- cmd/containerd-shim-urunc-v2/main.go | 4 +- pkg/containerd-shim/shim_manager.go | 80 ++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 pkg/containerd-shim/shim_manager.go diff --git a/cmd/containerd-shim-urunc-v2/main.go b/cmd/containerd-shim-urunc-v2/main.go index dac1451c2..70d2398ea 100644 --- a/cmd/containerd-shim-urunc-v2/main.go +++ b/cmd/containerd-shim-urunc-v2/main.go @@ -17,11 +17,11 @@ package main import ( "context" - "github.com/containerd/containerd/runtime/v2/runc/manager" "github.com/containerd/containerd/runtime/v2/shim" _ "github.com/urunc-dev/urunc/pkg/containerd-shim" + containerdshim "github.com/urunc-dev/urunc/pkg/containerd-shim" ) func main() { - shim.RunManager(context.Background(), manager.NewShimManager("io.containerd.urunc.v2")) + shim.RunManager(context.Background(), containerdshim.NewShimManager("io.containerd.urunc.v2")) } diff --git a/pkg/containerd-shim/shim_manager.go b/pkg/containerd-shim/shim_manager.go new file mode 100644 index 000000000..a482eb7f6 --- /dev/null +++ b/pkg/containerd-shim/shim_manager.go @@ -0,0 +1,80 @@ +// Copyright (c) 2023-2026, Nubificus LTD +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package containerdshim + +import ( + "context" + "os" + + "github.com/containerd/containerd/runtime/v2/runc/manager" + "github.com/containerd/containerd/runtime/v2/shim" + "github.com/containerd/log" + containerdShim "github.com/urunc-dev/urunc/pkg/containerd-shim/containerd" +) + +const containerdGRPCAddressEnv = "GRPC_ADDRESS" + +func containerdGRPCAddress() string { + return os.Getenv(containerdGRPCAddressEnv) +} + +type shimManager struct { + shim.Manager +} + +func NewShimManager(runtime string) shim.Manager { + return &shimManager{Manager: manager.NewShimManager(runtime)} +} + +func (m *shimManager) Stop(ctx context.Context, id string) (shim.StopStatus, error) { + bundle, err := os.Getwd() + if err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): getwd during delete failed") + return m.Manager.Stop(ctx, id) + } + + shouldCleanup, snapshotter, mountpoint, err := containerdShim.ShouldCleanupRootfsView(bundle) + if err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): read rootfs view cleanup state from bundle during delete failed") + return m.Manager.Stop(ctx, id) + } + if !shouldCleanup { + return m.Manager.Stop(ctx, id) + } + + address := containerdGRPCAddress() + if address == "" { + log.G(ctx).Warn("urunc(shim): containerd gRPC address unset during delete; rootfs view cleanup skipped") + return m.Manager.Stop(ctx, id) + } + + session, err := containerdShim.OpenSession(ctx, address, id) + if err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): open containerd session for rootfs view cleanup failed") + return m.Manager.Stop(ctx, id) + } + defer func() { + if err := session.Close(); err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): failed to close containerd session after rootfs view cleanup") + } + }() + + // snapshotter from bundle view state; shim cwd may outlive task Delete. + if err := containerdShim.CleanupRootfsView(ctx, session, snapshotter, mountpoint); err != nil { + log.G(ctx).WithError(err).Warn("urunc(shim): rootfs view cleanup during delete failed") + } + + return m.Manager.Stop(ctx, id) +}