diff --git a/cmd/nerdctl/container/container_run_mount_image_linux_test.go b/cmd/nerdctl/container/container_run_mount_image_linux_test.go new file mode 100644 index 00000000000..2ac6e33bd18 --- /dev/null +++ b/cmd/nerdctl/container/container_run_mount_image_linux_test.go @@ -0,0 +1,235 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package container + +import ( + "fmt" + "testing" + + "github.com/containerd/nerdctl/mod/tigron/expect" + "github.com/containerd/nerdctl/mod/tigron/require" + "github.com/containerd/nerdctl/mod/tigron/test" + + "github.com/containerd/nerdctl/v2/pkg/testutil" + "github.com/containerd/nerdctl/v2/pkg/testutil/nerdtest" +) + +// TestRunMountTypeImage verifies that `--mount type=image` mounts the source +// image's filesystem into the container so its files are readable at the target. +func TestRunMountTypeImage(t *testing.T) { + testCase := nerdtest.Setup() + + testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/img", testutil.CommonImage), + testutil.CommonImage, "cat", "/mnt/img/etc/os-release") + } + + testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeSuccess, + Output: expect.Contains("Alpine"), + } + } + + testCase.Run(t) +} + +// TestRunMountTypeImageMultipleDestinations verifies the same image can be +// mounted at two destinations in one container. +func TestRunMountTypeImageMultipleDestinations(t *testing.T) { + testCase := nerdtest.Setup() + // nerdctl-only: Docker keys an image mount by its source image and rejects + // mounting the same image twice ("mount already exists with name"). + testCase.Require = require.Not(nerdtest.Docker) + + testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/a", testutil.CommonImage), + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/b", testutil.CommonImage), + testutil.CommonImage, "cat", "/mnt/a/etc/os-release", "/mnt/b/etc/os-release") + } + + testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeSuccess, + Output: expect.Contains("Alpine"), + } + } + + testCase.Run(t) +} + +// TestRunMountTypeImageReadOnly verifies an image mount is read-only (writing +// fails). nerdctl-only: Docker mounts images read-write by default. +func TestRunMountTypeImageReadOnly(t *testing.T) { + testCase := nerdtest.Setup() + testCase.Require = require.Not(nerdtest.Docker) + + testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/img", testutil.CommonImage), + testutil.CommonImage, "touch", "/mnt/img/should-fail") + } + + testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeGenericFail, + Errors: []error{fmt.Errorf("Read-only file system")}, + } + } + + testCase.Run(t) +} + +// TestRunMountTypeImageSubpath verifies that image-subpath exposes only the +// selected directory of the image rootfs at the destination: the image's +// /etc/os-release is reachable as /os-release. +func TestRunMountTypeImageSubpath(t *testing.T) { + testCase := nerdtest.Setup() + + testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/img,image-subpath=etc", testutil.CommonImage), + testutil.CommonImage, "cat", "/mnt/img/os-release") + } + + testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeSuccess, + Output: expect.Contains("Alpine"), + } + } + + testCase.Run(t) +} + +// TestRunMountTypeImageSubpathMultiple verifies that two image-subpath mounts of +// the same image at different destinations each expose their own subdirectory, +// exercising the multi-mount label round-trip and cleanup. +func TestRunMountTypeImageSubpathMultiple(t *testing.T) { + testCase := nerdtest.Setup() + // nerdctl-only: Docker keys an image mount by its source image and rejects + // mounting the same image twice ("mount already exists with name"). + testCase.Require = require.Not(nerdtest.Docker) + + testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/etc,image-subpath=etc", testutil.CommonImage), + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/bin,image-subpath=bin", testutil.CommonImage), + testutil.CommonImage, "ls", "/mnt/etc", "/mnt/bin") + } + + testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeSuccess, + } + } + + testCase.Run(t) +} + +// TestRunMountTypeImageSubpathReadOnly verifies that an image-subpath mount is +// read-only. nerdctl-only: Docker mounts images read-write by default. +func TestRunMountTypeImageSubpathReadOnly(t *testing.T) { + testCase := nerdtest.Setup() + testCase.Require = require.Not(nerdtest.Docker) + + testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/img,image-subpath=etc", testutil.CommonImage), + testutil.CommonImage, "touch", "/mnt/img/should-fail") + } + + testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeGenericFail, + Errors: []error{fmt.Errorf("Read-only file system")}, + } + } + + testCase.Run(t) +} + +// TestRunMountTypeImageErrors verifies that an image mount missing its source, +// or using the not-yet-supported subpath option, or an image-subpath that +// escapes the rootfs, is rejected. These are nerdctl-specific behaviours here, +// so the test is not run against Docker. +func TestRunMountTypeImageErrors(t *testing.T) { + testCase := nerdtest.Setup() + testCase.Require = require.Not(nerdtest.Docker) + + testCase.SubTests = []*test.Case{ + { + Description: "missing source", + Command: func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", "--mount", "type=image,destination=/mnt/img", + testutil.CommonImage, "true") + }, + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeGenericFail, + Errors: []error{fmt.Errorf("source")}, + } + }, + }, + { + Description: "subpath not supported", + Command: func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/img,subpath=etc", testutil.CommonImage), + testutil.CommonImage, "true") + }, + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeGenericFail, + Errors: []error{fmt.Errorf("subpath")}, + } + }, + }, + { + Description: "image-subpath parent traversal rejected", + Command: func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/img,image-subpath=../etc", testutil.CommonImage), + testutil.CommonImage, "true") + }, + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeGenericFail, + Errors: []error{fmt.Errorf("escapes")}, + } + }, + }, + { + Description: "image-subpath absolute rejected", + Command: func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/img,image-subpath=/etc", testutil.CommonImage), + testutil.CommonImage, "true") + }, + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeGenericFail, + Errors: []error{fmt.Errorf("relative")}, + } + }, + }, + } + + testCase.Run(t) +} diff --git a/docs/command-reference.md b/docs/command-reference.md index 2195e6ee7bb..415b639c047 100644 --- a/docs/command-reference.md +++ b/docs/command-reference.md @@ -295,10 +295,9 @@ Volume flags: Consists of multiple key-value pairs, separated by commas and each consisting of a `=` tuple. e.g., `-- mount type=bind,source=/src,target=/app,bind-propagation=shared`. - - :whale: `type`: Current supported mount types are `bind`, `volume`, `tmpfs`. + - :whale: `type`: Current supported mount types are `bind`, `volume`, `tmpfs`, `image`. The default type will be set to `volume` if not specified. i.e., `--mount src=vol-1,dst=/app,readonly` equals `--mount type=volume,src=vol-1,dst=/app,readonly` - - unimplemented type: `image` - Common Options: - :whale: `src`, `source`: Mount source spec for bind and volume. Mandatory for bind. - :whale: `dst`, `destination`, `target`: Mount destination spec. @@ -313,6 +312,9 @@ Volume flags: Defaults to `1777` or world-writable. - Options specific to `volume`: - unimplemented options: `volume-nocopy`, `volume-label`, `volume-driver`, `volume-opt` + - Options specific to `image`: + - :whale: `src`, `source`: image reference (mandatory). The image filesystem is mounted read-only. + - :whale: `image-subpath`: relative path inside the image rootfs to mount instead of the whole rootfs. Must stay within the rootfs (no absolute paths or `..` traversal). - :whale: `--volumes-from`: Mount volumes from the specified container(s), e.g. "--volumes-from my-container". Rootfs flags: diff --git a/pkg/cmd/container/create.go b/pkg/cmd/container/create.go index eb1de8df27d..342acda4ec5 100644 --- a/pkg/cmd/container/create.go +++ b/pkg/cmd/container/create.go @@ -70,7 +70,7 @@ import ( ) // Create will create a container. -func Create(ctx context.Context, client *containerd.Client, args []string, netManager containerutil.NetworkOptionsManager, options types.ContainerCreateOptions) (containerd.Container, func(), error) { +func Create(ctx context.Context, client *containerd.Client, args []string, netManager containerutil.NetworkOptionsManager, options types.ContainerCreateOptions) (_ containerd.Container, _ func(), retErr error) { // Acquire an exclusive lock on the volume store until we are done to avoid being raced by any other // volume operations (or any other operation involving volume manipulation) volStore, err := volume.Store(options.GOptions.Namespace, options.GOptions.DataRoot, options.GOptions.Address) @@ -94,6 +94,27 @@ func Create(ctx context.Context, client *containerd.Client, args []string, netMa internalLabels.platform = options.Platform internalLabels.namespace = options.GOptions.Namespace + // If creation fails after image-mount state is created, tear it down so the + // snapshots and host mounts do not leak (the cleanup labels are only persisted + // on success). + defer func() { + if retErr == nil { + return + } + var keys, hostpaths []string + for _, mp := range internalLabels.mountPoints { + if mp.ImageMountSnapshot != "" { + keys = append(keys, mp.ImageMountSnapshot) + } + if mp.ImageMountHostpath != "" { + hostpaths = append(hostpaths, mp.ImageMountHostpath) + } + } + if len(keys) > 0 || len(hostpaths) > 0 { + removeImageMounts(ctx, client.SnapshotService(options.GOptions.Snapshotter), hostpaths, keys) + } + }() + var ( id = idgen.GenerateID() opts []oci.SpecOpts @@ -806,6 +827,32 @@ func withInternalLabels(internalLabels internalLabels) (containerd.NewContainerO m[labels.AnonymousVolumes] = string(anonVolumeJSON) } + // Record the snapshot keys and host materialization paths of any type=image + // mounts so they can be removed when the container is deleted. + var imageMountSnapshots, imageMountHostpaths []string + for _, mp := range internalLabels.mountPoints { + if mp.ImageMountSnapshot != "" { + imageMountSnapshots = append(imageMountSnapshots, mp.ImageMountSnapshot) + } + if mp.ImageMountHostpath != "" { + imageMountHostpaths = append(imageMountHostpaths, mp.ImageMountHostpath) + } + } + if len(imageMountSnapshots) > 0 { + b, err := json.Marshal(imageMountSnapshots) + if err != nil { + return nil, err + } + m[labels.ImageMountSnapshots] = string(b) + } + if len(imageMountHostpaths) > 0 { + b, err := json.Marshal(imageMountHostpaths) + if err != nil { + return nil, err + } + m[labels.ImageMountHostpaths] = string(b) + } + if internalLabels.pidFile != "" { m[labels.PIDFile] = internalLabels.pidFile } diff --git a/pkg/cmd/container/remove.go b/pkg/cmd/container/remove.go index b9df2b2acaf..df55b173add 100644 --- a/pkg/cmd/container/remove.go +++ b/pkg/cmd/container/remove.go @@ -109,6 +109,13 @@ func RemoveContainer(ctx context.Context, c containerd.Container, globalOptions return err } + // Capture the container's snapshotter before deletion: image-mount views were + // created against it, which may differ from the current --snapshotter flag. + imageMountSnapshotter := globalOptions.Snapshotter + if info, err := c.Info(ctx); err == nil && info.Snapshotter != "" { + imageMountSnapshotter = info.Snapshotter + } + // Get datastore dataStore, err := clientutil.DataStore(globalOptions.DataRoot, globalOptions.Address) if err != nil { @@ -275,6 +282,23 @@ func RemoveContainer(ctx context.Context, c containerd.Container, globalOptions } } } + + // Tear down type=image mount state (host materializations and read-only + // views) backing this container - soft failure. + var imageMountKeys, imageMountHostpaths []string + if snapshotsJSON, ok := containerLabels[labels.ImageMountSnapshots]; ok { + if err = json.Unmarshal([]byte(snapshotsJSON), &imageMountKeys); err != nil { + log.G(ctx).WithError(err).Warnf("failed to unmarshal image-mount snapshots for container %q", id) + } + } + if hostpathsJSON, ok := containerLabels[labels.ImageMountHostpaths]; ok { + if err = json.Unmarshal([]byte(hostpathsJSON), &imageMountHostpaths); err != nil { + log.G(ctx).WithError(err).Warnf("failed to unmarshal image-mount host paths for container %q", id) + } + } + if len(imageMountKeys) > 0 || len(imageMountHostpaths) > 0 { + removeImageMounts(ctx, client.SnapshotService(imageMountSnapshotter), imageMountHostpaths, imageMountKeys) + } }() // Get the task. diff --git a/pkg/cmd/container/run_mount.go b/pkg/cmd/container/run_mount.go index 5850cad92f0..1f28de0203f 100644 --- a/pkg/cmd/container/run_mount.go +++ b/pkg/cmd/container/run_mount.go @@ -37,6 +37,7 @@ import ( "github.com/containerd/containerd/v2/core/containers" "github.com/containerd/containerd/v2/core/leases" "github.com/containerd/containerd/v2/core/mount" + "github.com/containerd/containerd/v2/core/snapshots" "github.com/containerd/containerd/v2/pkg/oci" "github.com/containerd/continuity/fs" "github.com/containerd/errdefs" @@ -122,17 +123,165 @@ func parseMountFlags(volStore volumestore.VolumeStore, options types.ContainerCr return parsed, nil } +// gcRootLabel marks a snapshot as a GC root so containerd does not reclaim it. +const gcRootLabel = "containerd.io/gc.root" + +// setupImageMount ensures and unpacks ref, then creates a read-only GC-rooted +// snapshot view of its rootfs. Without a subpath it returns the snapshotter's +// own mount for destination (runc owns its lifecycle). With a subpath it +// materializes the view on a host directory and returns a read-only bind mount +// of the resolved subdirectory, because an OCI overlay mount cannot select a +// subdir; that host directory is returned so it can be unmounted on removal. +// It returns the OCI mount, the view's snapshot key, and the host materialization +// path (empty when no subpath is used). +func setupImageMount(ctx context.Context, client *containerd.Client, options types.ContainerCreateOptions, ref, destination, subpath string) (specs.Mount, string, string, error) { + ensured, err := imgutil.EnsureImage(ctx, client, ref, options.ImagePullOpt) + if err != nil { + return specs.Mount{}, "", "", fmt.Errorf("failed to ensure image %q for image mount: %w", ref, err) + } + if err := ensured.Image.Unpack(ctx, options.GOptions.Snapshotter); err != nil { + return specs.Mount{}, "", "", fmt.Errorf("failed to unpack image %q for image mount: %w", ref, err) + } + diffIDs, err := ensured.Image.RootFS(ctx) + if err != nil { + return specs.Mount{}, "", "", fmt.Errorf("failed to get rootfs of image %q for image mount: %w", ref, err) + } + chainID := identity.ChainID(diffIDs).String() + + snapshotKey := idgen.GenerateID() + "-image-mount" + s := client.SnapshotService(options.GOptions.Snapshotter) + mounts, err := s.View(ctx, snapshotKey, chainID, snapshots.WithLabels(map[string]string{ + gcRootLabel: time.Now().UTC().Format(time.RFC3339), + })) + if err != nil { + return specs.Mount{}, "", "", fmt.Errorf("failed to create read-only view of image %q: %w", ref, err) + } + // removeView drops the snapshot view on any failure after it was created. + removeView := func() { + if rmErr := s.Remove(ctx, snapshotKey); rmErr != nil && !errdefs.IsNotFound(rmErr) { + log.G(ctx).WithError(rmErr).Warnf("failed to remove image-mount snapshot %q", snapshotKey) + } + } + + if subpath != "" { + return setupImageSubpathMount(ctx, options, ref, destination, subpath, snapshotKey, mounts, removeView) + } + + // Whole-rootfs case: hand the snapshotter's mount straight to the OCI runtime, + // which mounts and unmounts it with the container. overlayfs and native + // snapshotters each yield exactly one mount for a view. + if len(mounts) != 1 { + removeView() + return specs.Mount{}, "", "", fmt.Errorf("image mount expects exactly one mount from the snapshotter, got %d", len(mounts)) + } + m := mounts[0] + opts := m.Options + // A view without an upper dir is already read-only; make it explicit for + // bind-backed snapshotters. + if !strutil.InStringSlice(opts, "ro") { + opts = append(opts, "ro") + } + return specs.Mount{ + Type: m.Type, + Source: m.Source, + Destination: destination, + Options: opts, + }, snapshotKey, "", nil +} + +// setupImageSubpathMount materializes the snapshot view on a host directory and +// returns a read-only bind mount of the subpath. securejoin resolves the subpath +// against the materialized rootfs, applying a second check beyond parse-time +// validation: it blocks symlinks inside the image that point outside the rootfs. +// On any failure it unwinds the host mount, the directory, and the view. The +// returned host path must be unmounted and removed when the container is deleted. +func setupImageSubpathMount(ctx context.Context, options types.ContainerCreateOptions, ref, destination, subpath, snapshotKey string, mounts []mount.Mount, removeView func()) (specs.Mount, string, string, error) { + // Materialize under the data root keyed by snapshot key so it is unique per + // view and outlives container restarts (only removed on container deletion). + hostMountpoint := filepath.Join(options.GOptions.DataRoot, "image-mounts", snapshotKey) + if err := os.MkdirAll(hostMountpoint, 0o700); err != nil { + removeView() + return specs.Mount{}, "", "", fmt.Errorf("failed to create image-mount host dir: %w", err) + } + if err := mount.All(mounts, hostMountpoint); err != nil { + // mount.All may have applied some mounts before failing; unmount before + // removing the dir so RemoveAll never recurses into a live mount. + if uErr := mount.UnmountAll(hostMountpoint, 0); uErr != nil { + log.G(ctx).WithError(uErr).Warnf("failed to unmount image-mount host path %q after failed setup", hostMountpoint) + } + os.RemoveAll(hostMountpoint) + removeView() + return specs.Mount{}, "", "", fmt.Errorf("failed to materialize image %q for subpath mount: %w", ref, err) + } + // cleanup unwinds the host mount, its directory, and the view together. + cleanup := func() { + if uErr := mount.UnmountAll(hostMountpoint, 0); uErr != nil { + log.G(ctx).WithError(uErr).Warnf("failed to unmount image-mount host path %q", hostMountpoint) + } + os.RemoveAll(hostMountpoint) + removeView() + } + + // securejoin resolves the subpath within the materialized rootfs, blocking + // symlink escapes that parse-time validation cannot see. + resolved, err := securejoin.SecureJoin(hostMountpoint, subpath) + if err != nil { + cleanup() + return specs.Mount{}, "", "", fmt.Errorf("failed to resolve image-subpath %q: %w", subpath, err) + } + if _, err := os.Stat(resolved); err != nil { + cleanup() + if os.IsNotExist(err) { + return specs.Mount{}, "", "", fmt.Errorf("image-subpath %q does not exist in image %q", subpath, ref) + } + return specs.Mount{}, "", "", fmt.Errorf("failed to stat image-subpath %q: %w", subpath, err) + } + return specs.Mount{ + Type: "bind", + Source: resolved, + Destination: destination, + Options: []string{"rbind", "ro"}, + }, snapshotKey, hostMountpoint, nil +} + +// removeImageMounts tears down type=image mount state for a container: it +// unmounts and removes any host materialization directories (image-subpath), +// then removes the read-only snapshot views. NotFound is ignored; other +// failures are logged but not fatal. +func removeImageMounts(ctx context.Context, s snapshots.Snapshotter, hostpaths, snapshotKeys []string) { + // Unmount host materializations before removing the views they hold open. + for _, p := range hostpaths { + if err := mount.UnmountAll(p, 0); err != nil { + log.G(ctx).WithError(err).Warnf("failed to unmount image-mount host path %q", p) + } + if err := os.RemoveAll(p); err != nil { + log.G(ctx).WithError(err).Warnf("failed to remove image-mount host path %q", p) + } + } + for _, k := range snapshotKeys { + if err := s.Remove(ctx, k); err != nil && !errdefs.IsNotFound(err) { + log.G(ctx).WithError(err).Warnf("failed to remove image-mount snapshot %q", k) + } + } +} + // generateMountOpts generates volume-related mount opts. // Other mounts such as procfs mount are not handled here. func generateMountOpts(ctx context.Context, client *containerd.Client, ensuredImage *imgutil.EnsuredImage, - volStore volumestore.VolumeStore, options types.ContainerCreateOptions) ([]oci.SpecOpts, []string, []*mountutil.Processed, error) { + volStore volumestore.VolumeStore, options types.ContainerCreateOptions) (opts []oci.SpecOpts, anonVolumes []string, mountPoints []*mountutil.Processed, retErr error) { //nolint:prealloc var ( - opts []oci.SpecOpts - anonVolumes []string - userMounts []specs.Mount - mountPoints []*mountutil.Processed + userMounts []specs.Mount + imageMountViews []string + imageMountHostpaths []string ) + // Tear down any image-mount state created here if this function fails, so a + // partial setup does not leak snapshots or host mounts. + defer func() { + if retErr != nil && (len(imageMountViews) > 0 || len(imageMountHostpaths) > 0) { + removeImageMounts(ctx, client.SnapshotService(options.GOptions.Snapshotter), imageMountHostpaths, imageMountViews) + } + }() mounted := make(map[string]struct{}) var imageVolumes map[string]struct{} var tempDir string @@ -229,6 +378,24 @@ func generateMountOpts(ctx context.Context, client *containerd.Client, ensuredIm } else if len(parsed) > 0 { ociMounts := make([]specs.Mount, len(parsed)) for i, x := range parsed { + // type=image: build the read-only view now and record its snapshot + // key for cleanup on container removal. + if x.Type == mountutil.Image { + m, snapshotKey, hostMountpoint, err := setupImageMount(ctx, client, options, x.Mount.Source, x.Mount.Destination, x.ImageSubpath) + if err != nil { + return nil, nil, nil, err + } + imageMountViews = append(imageMountViews, snapshotKey) + if hostMountpoint != "" { + imageMountHostpaths = append(imageMountHostpaths, hostMountpoint) + } + ociMounts[i] = m + x.ImageMountSnapshot = snapshotKey + x.ImageMountHostpath = hostMountpoint + mounted[filepath.Clean(x.Mount.Destination)] = struct{}{} + continue + } + ociMounts[i] = x.Mount mounted[filepath.Clean(x.Mount.Destination)] = struct{}{} diff --git a/pkg/labels/labels.go b/pkg/labels/labels.go index eaec0720efb..a6e77945970 100644 --- a/pkg/labels/labels.go +++ b/pkg/labels/labels.go @@ -80,6 +80,15 @@ const ( // AnonymousVolumes is a JSON-marshalled string of []string AnonymousVolumes = Prefix + "anonymous-volumes" + // ImageMountSnapshots is a JSON-marshalled []string of snapshotter keys for + // the read-only views backing `--mount type=image`, removed on container deletion. + ImageMountSnapshots = Prefix + "image-mount-snapshots" + + // ImageMountHostpaths is a JSON-marshalled []string of host directories where + // `--mount type=image,image-subpath=...` rootfs views are materialized; each + // must be unmounted and removed on container deletion. + ImageMountHostpaths = Prefix + "image-mount-hostpaths" + // Platform is the normalized platform string like "linux/ppc64le". Platform = Prefix + "platform" diff --git a/pkg/mountutil/mountutil.go b/pkg/mountutil/mountutil.go index d55a2cb6646..c8e9e83cde1 100644 --- a/pkg/mountutil/mountutil.go +++ b/pkg/mountutil/mountutil.go @@ -39,6 +39,7 @@ const ( Bind = "bind" Volume = "volume" Tmpfs = "tmpfs" + Image = "image" Npipe = "npipe" pathSeparator = string(os.PathSeparator) ) @@ -50,6 +51,16 @@ type Processed struct { AnonymousVolume string // anonymous volume name Mode string Opts []oci.SpecOpts + // ImageMountSnapshot is the snapshotter key of the read-only view for a + // type=image mount; empty for other mount types. + ImageMountSnapshot string + // ImageSubpath is the relative path inside a type=image rootfs to expose at + // the destination, instead of the whole rootfs. Empty means the whole rootfs. + ImageSubpath string + // ImageMountHostpath is the host directory where a type=image rootfs is + // materialized so an image-subpath can be bind-mounted from it. It must be + // unmounted and removed on container deletion. Empty when no subpath is used. + ImageMountHostpath string } type volumeSpec struct { diff --git a/pkg/mountutil/mountutil_linux.go b/pkg/mountutil/mountutil_linux.go index d1fde8b1c2a..a10258ad26d 100644 --- a/pkg/mountutil/mountutil_linux.go +++ b/pkg/mountutil/mountutil_linux.go @@ -21,6 +21,7 @@ import ( "fmt" "io/fs" "os" + "path" "path/filepath" "strconv" "strings" @@ -301,18 +302,44 @@ func ProcessFlagTmpfs(s string) (*Processed, error) { return res, nil } +// validateImageSubpath normalizes an image-subpath value and rejects paths that +// are absolute, escape the image rootfs, or resolve to the rootfs itself. An +// empty input returns empty (no subpath). Image paths are always forward-slash, +// so it uses path, not filepath. +func validateImageSubpath(p string) (string, error) { + if p == "" { + return "", nil + } + if path.IsAbs(p) { + return "", fmt.Errorf("image-subpath must be relative to the image rootfs, got %q", p) + } + clean := path.Clean(p) + // Clean collapses ".." segments; anything still leading with ".." escapes root. + if clean == ".." || strings.HasPrefix(clean, "../") { + return "", fmt.Errorf("image-subpath %q escapes the image rootfs", p) + } + // "." means the whole rootfs (e.g. from "a/.."); that is the no-subpath case, + // not a subdirectory selection, so reject it as a misuse of image-subpath. + if clean == "." { + return "", fmt.Errorf("image-subpath %q must select a subdirectory, not the image rootfs", p) + } + return clean, nil +} + func ProcessFlagMount(s string, volStore volumestore.VolumeStore) (*Processed, error) { fields := strings.Split(s, ",") var ( - mountType string - src string - dst string - bindPropagation string - bindNonRecursive bool - rwOption string - tmpfsSize int64 - tmpfsMode os.FileMode - err error + mountType string + src string + dst string + bindPropagation string + bindNonRecursive bool + rwOption string + writableRequested bool + imageSubpath string + tmpfsSize int64 + tmpfsMode os.FileMode + err error ) // set default values @@ -334,6 +361,9 @@ func ProcessFlagMount(s string, volStore volumestore.VolumeStore) (*Processed, e switch key { case "readonly", "ro", "rw", "rro": rwOption = key + if key == "rw" { + writableRequested = true + } continue case "bind-nonrecursive": bindNonRecursive = true @@ -353,9 +383,11 @@ func ProcessFlagMount(s string, volStore volumestore.VolumeStore) (*Processed, e mountType = Tmpfs case "bind": mountType = Bind + case "image": + mountType = Image case "volume": default: - return nil, fmt.Errorf("invalid mount type '%s' must be a volume/bind/tmpfs", value) + return nil, fmt.Errorf("invalid mount type '%s' must be a volume/bind/tmpfs/image", value) } case "source", "src": src = value @@ -369,6 +401,18 @@ func ProcessFlagMount(s string, volStore volumestore.VolumeStore) (*Processed, e if trueValue { rwOption = key } + // Write requested: rw=true, or a read-only flag (ro/readonly/rro) set to false. + if trueValue == (key == "rw") { + writableRequested = true + } + case "subpath": + // subpath is not implemented for any mount type yet; type=image uses + // image-subpath instead. + return nil, fmt.Errorf("mount option %q is not yet supported", key) + case "image-subpath": + // Selects a directory inside a type=image rootfs; validated below once + // the mount type is known. + imageSubpath = value case "bind-propagation": // here don't validate the propagation value // parseVolumeOptions will do that. @@ -394,6 +438,43 @@ func ProcessFlagMount(s string, volStore volumestore.VolumeStore) (*Processed, e } } + // image-subpath only makes sense for type=image; reject it elsewhere before + // falling through to the legacy bind/volume/tmpfs handlers. + if imageSubpath != "" && mountType != Image { + return nil, fmt.Errorf("image-subpath is only supported for type=image") + } + + // type=image's source is an image reference resolved later with a containerd + // client; validate the intent here. Image mounts are read-only. + if mountType == Image { + if src == "" { + return nil, fmt.Errorf("type=image requires a source (the image reference)") + } + if dst == "" { + return nil, fmt.Errorf("type=image requires a destination") + } + if writableRequested { + return nil, fmt.Errorf("type=image mounts are read-only") + } + // Normalize and bound the subpath to the image rootfs at parse time; + // securejoin re-checks against symlinks once the rootfs is materialized. + cleanSubpath, err := validateImageSubpath(imageSubpath) + if err != nil { + return nil, err + } + return &Processed{ + Type: Image, + // Mode "ro" so inspect/label metadata reports the mount read-only. + Mode: "ro", + Mount: specs.Mount{ + Type: Image, + Source: src, + Destination: cleanMount(dst), + }, + ImageSubpath: cleanSubpath, + }, nil + } + // compose new fileds and join into a string // to call legacy ProcessFlagTmpfs or ProcessFlagV function fields = []string{} diff --git a/pkg/mountutil/mountutil_linux_test.go b/pkg/mountutil/mountutil_linux_test.go index 74484cbbbfe..9571f6e3786 100644 --- a/pkg/mountutil/mountutil_linux_test.go +++ b/pkg/mountutil/mountutil_linux_test.go @@ -352,3 +352,144 @@ func TestProcessFlagVAnonymousVolumes(t *testing.T) { }) } } + +// TestProcessFlagMountImage tests parsing and validation of `--mount type=image`. +func TestProcessFlagMountImage(t *testing.T) { + tests := []struct { + rawSpec string + wants *Processed + err string + }{ + { + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{ + Type: Image, + Source: "alpine:latest", + Destination: "/mnt/img", + }, + }, + }, + { + // target and src aliases must work too. + rawSpec: "type=image,src=alpine:latest,target=/mnt/img", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{ + Type: Image, + Source: "alpine:latest", + Destination: "/mnt/img", + }, + }, + }, + { + rawSpec: "type=image,destination=/mnt/img", + err: "requires a source", + }, + { + rawSpec: "type=image,source=alpine:latest", + err: "requires a destination", + }, + { + // ro and rro are read-only flags, accepted for type=image. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,ro", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{Type: Image, Source: "alpine:latest", Destination: "/mnt/img"}, + }, + }, + { + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,rro", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{Type: Image, Source: "alpine:latest", Destination: "/mnt/img"}, + }, + }, + { + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,rw", + err: "read-only", + }, + { + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,readonly=false", + err: "read-only", + }, + { + // bare subpath is not a type=image option; image-subpath is. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,subpath=etc", + err: "subpath", + }, + { + // image-subpath selects a directory inside the image rootfs. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,image-subpath=etc", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{Type: Image, Source: "alpine:latest", Destination: "/mnt/img"}, + ImageSubpath: "etc", + }, + }, + { + // image-subpath is normalized: leading ./ and trailing / are stripped. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,image-subpath=./etc/", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{Type: Image, Source: "alpine:latest", Destination: "/mnt/img"}, + ImageSubpath: "etc", + }, + }, + { + // parent traversal must be rejected before the mount is built. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,image-subpath=../etc", + err: "escapes", + }, + { + // traversal that normalizes back above root must be rejected. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,image-subpath=a/b/../../../etc", + err: "escapes", + }, + { + // a path normalizing to "." is the whole rootfs, not a subdirectory. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,image-subpath=.", + err: "must select a subdirectory", + }, + { + // "a/.." also normalizes to the rootfs and must be rejected. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,image-subpath=a/..", + err: "must select a subdirectory", + }, + { + // nested subpath is normalized and preserved. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,image-subpath=usr/lib", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{Type: Image, Source: "alpine:latest", Destination: "/mnt/img"}, + ImageSubpath: "usr/lib", + }, + }, + { + // absolute image-subpath is rejected; it must be relative to the rootfs. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,image-subpath=/etc", + err: "relative", + }, + { + // image-subpath only applies to type=image. + rawSpec: "type=bind,source=/tmp,destination=/mnt,image-subpath=etc", + err: "only supported for type=image", + }, + } + for _, tt := range tests { + t.Run(tt.rawSpec, func(t *testing.T) { + got, err := ProcessFlagMount(tt.rawSpec, nil) + if tt.err != "" { + assert.ErrorContains(t, err, tt.err) + return + } + assert.NilError(t, err) + assert.Equal(t, got.Type, tt.wants.Type) + assert.Equal(t, got.Mount.Type, tt.wants.Mount.Type) + assert.Equal(t, got.Mount.Source, tt.wants.Mount.Source) + assert.Equal(t, got.Mount.Destination, tt.wants.Mount.Destination) + assert.Equal(t, got.ImageSubpath, tt.wants.ImageSubpath) + }) + } +}