diff --git a/cmd/nerdctl/container/container_run_mount_image_linux_test.go b/cmd/nerdctl/container/container_run_mount_image_linux_test.go new file mode 100644 index 00000000000..0d2fe372e47 --- /dev/null +++ b/cmd/nerdctl/container/container_run_mount_image_linux_test.go @@ -0,0 +1,137 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package container + +import ( + "fmt" + "testing" + + "github.com/containerd/nerdctl/mod/tigron/expect" + "github.com/containerd/nerdctl/mod/tigron/require" + "github.com/containerd/nerdctl/mod/tigron/test" + + "github.com/containerd/nerdctl/v2/pkg/testutil" + "github.com/containerd/nerdctl/v2/pkg/testutil/nerdtest" +) + +// TestRunMountTypeImage verifies that `--mount type=image` mounts the source +// image's filesystem into the container so its files are readable at the target. +func TestRunMountTypeImage(t *testing.T) { + testCase := nerdtest.Setup() + + testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/img", testutil.CommonImage), + testutil.CommonImage, "cat", "/mnt/img/etc/os-release") + } + + testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeSuccess, + Output: expect.Contains("Alpine"), + } + } + + testCase.Run(t) +} + +// TestRunMountTypeImageMultipleDestinations verifies the same image can be +// mounted at two destinations in one container. +func TestRunMountTypeImageMultipleDestinations(t *testing.T) { + testCase := nerdtest.Setup() + // nerdctl-only: Docker keys an image mount by its source image and rejects + // mounting the same image twice ("mount already exists with name"). + testCase.Require = require.Not(nerdtest.Docker) + + testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/a", testutil.CommonImage), + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/b", testutil.CommonImage), + testutil.CommonImage, "cat", "/mnt/a/etc/os-release", "/mnt/b/etc/os-release") + } + + testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeSuccess, + Output: expect.Contains("Alpine"), + } + } + + testCase.Run(t) +} + +// TestRunMountTypeImageReadOnly verifies an image mount is read-only (writing +// fails). nerdctl-only: Docker mounts images read-write by default. +func TestRunMountTypeImageReadOnly(t *testing.T) { + testCase := nerdtest.Setup() + testCase.Require = require.Not(nerdtest.Docker) + + testCase.Command = func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/img", testutil.CommonImage), + testutil.CommonImage, "touch", "/mnt/img/should-fail") + } + + testCase.Expected = func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeGenericFail, + Errors: []error{fmt.Errorf("Read-only file system")}, + } + } + + testCase.Run(t) +} + +// TestRunMountTypeImageErrors verifies that an image mount missing its source, +// or using the not-yet-supported subpath option, is rejected. subpath is +// nerdctl-specific behaviour here, so the test is not run against Docker. +func TestRunMountTypeImageErrors(t *testing.T) { + testCase := nerdtest.Setup() + testCase.Require = require.Not(nerdtest.Docker) + + testCase.SubTests = []*test.Case{ + { + Description: "missing source", + Command: func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", "--mount", "type=image,destination=/mnt/img", + testutil.CommonImage, "true") + }, + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeGenericFail, + Errors: []error{fmt.Errorf("source")}, + } + }, + }, + { + Description: "subpath not supported", + Command: func(data test.Data, helpers test.Helpers) test.TestableCommand { + return helpers.Command("run", "--rm", + "--mount", fmt.Sprintf("type=image,source=%s,destination=/mnt/img,subpath=etc", testutil.CommonImage), + testutil.CommonImage, "true") + }, + Expected: func(data test.Data, helpers test.Helpers) *test.Expected { + return &test.Expected{ + ExitCode: expect.ExitCodeGenericFail, + Errors: []error{fmt.Errorf("subpath")}, + } + }, + }, + } + + testCase.Run(t) +} diff --git a/docs/command-reference.md b/docs/command-reference.md index 2195e6ee7bb..643f4a66483 100644 --- a/docs/command-reference.md +++ b/docs/command-reference.md @@ -295,10 +295,9 @@ Volume flags: Consists of multiple key-value pairs, separated by commas and each consisting of a `=` tuple. e.g., `-- mount type=bind,source=/src,target=/app,bind-propagation=shared`. - - :whale: `type`: Current supported mount types are `bind`, `volume`, `tmpfs`. + - :whale: `type`: Current supported mount types are `bind`, `volume`, `tmpfs`, `image`. The default type will be set to `volume` if not specified. i.e., `--mount src=vol-1,dst=/app,readonly` equals `--mount type=volume,src=vol-1,dst=/app,readonly` - - unimplemented type: `image` - Common Options: - :whale: `src`, `source`: Mount source spec for bind and volume. Mandatory for bind. - :whale: `dst`, `destination`, `target`: Mount destination spec. @@ -313,6 +312,9 @@ Volume flags: Defaults to `1777` or world-writable. - Options specific to `volume`: - unimplemented options: `volume-nocopy`, `volume-label`, `volume-driver`, `volume-opt` + - Options specific to `image`: + - :whale: `src`, `source`: image reference (mandatory). The image filesystem is mounted read-only. + - unimplemented options: `subpath` - :whale: `--volumes-from`: Mount volumes from the specified container(s), e.g. "--volumes-from my-container". Rootfs flags: diff --git a/pkg/cmd/container/create.go b/pkg/cmd/container/create.go index eb1de8df27d..97bf5ea7e27 100644 --- a/pkg/cmd/container/create.go +++ b/pkg/cmd/container/create.go @@ -70,7 +70,7 @@ import ( ) // Create will create a container. -func Create(ctx context.Context, client *containerd.Client, args []string, netManager containerutil.NetworkOptionsManager, options types.ContainerCreateOptions) (containerd.Container, func(), error) { +func Create(ctx context.Context, client *containerd.Client, args []string, netManager containerutil.NetworkOptionsManager, options types.ContainerCreateOptions) (_ containerd.Container, _ func(), retErr error) { // Acquire an exclusive lock on the volume store until we are done to avoid being raced by any other // volume operations (or any other operation involving volume manipulation) volStore, err := volume.Store(options.GOptions.Namespace, options.GOptions.DataRoot, options.GOptions.Address) @@ -94,6 +94,23 @@ func Create(ctx context.Context, client *containerd.Client, args []string, netMa internalLabels.platform = options.Platform internalLabels.namespace = options.GOptions.Namespace + // If creation fails after image-mount views are created, remove them so the + // snapshots do not leak (the cleanup label is only persisted on success). + defer func() { + if retErr == nil { + return + } + var keys []string + for _, mp := range internalLabels.mountPoints { + if mp.ImageMountSnapshot != "" { + keys = append(keys, mp.ImageMountSnapshot) + } + } + if len(keys) > 0 { + removeImageMountViews(ctx, client.SnapshotService(options.GOptions.Snapshotter), keys) + } + }() + var ( id = idgen.GenerateID() opts []oci.SpecOpts @@ -806,6 +823,22 @@ func withInternalLabels(internalLabels internalLabels) (containerd.NewContainerO m[labels.AnonymousVolumes] = string(anonVolumeJSON) } + // Record the snapshot keys of any type=image mount views so they can be + // removed when the container is deleted. + var imageMountSnapshots []string + for _, mp := range internalLabels.mountPoints { + if mp.ImageMountSnapshot != "" { + imageMountSnapshots = append(imageMountSnapshots, mp.ImageMountSnapshot) + } + } + if len(imageMountSnapshots) > 0 { + b, err := json.Marshal(imageMountSnapshots) + if err != nil { + return nil, err + } + m[labels.ImageMountSnapshots] = string(b) + } + if internalLabels.pidFile != "" { m[labels.PIDFile] = internalLabels.pidFile } diff --git a/pkg/cmd/container/remove.go b/pkg/cmd/container/remove.go index b9df2b2acaf..744d20aabb3 100644 --- a/pkg/cmd/container/remove.go +++ b/pkg/cmd/container/remove.go @@ -109,6 +109,13 @@ func RemoveContainer(ctx context.Context, c containerd.Container, globalOptions return err } + // Capture the container's snapshotter before deletion: image-mount views were + // created against it, which may differ from the current --snapshotter flag. + imageMountSnapshotter := globalOptions.Snapshotter + if info, err := c.Info(ctx); err == nil && info.Snapshotter != "" { + imageMountSnapshotter = info.Snapshotter + } + // Get datastore dataStore, err := clientutil.DataStore(globalOptions.DataRoot, globalOptions.Address) if err != nil { @@ -275,6 +282,16 @@ func RemoveContainer(ctx context.Context, c containerd.Container, globalOptions } } } + + // Remove the read-only views backing type=image mounts - soft failure. + if snapshotsJSON, ok := containerLabels[labels.ImageMountSnapshots]; ok { + var keys []string + if err = json.Unmarshal([]byte(snapshotsJSON), &keys); err != nil { + log.G(ctx).WithError(err).Warnf("failed to unmarshal image-mount snapshots for container %q", id) + } else { + removeImageMountViews(ctx, client.SnapshotService(imageMountSnapshotter), keys) + } + } }() // Get the task. diff --git a/pkg/cmd/container/run_mount.go b/pkg/cmd/container/run_mount.go index 5850cad92f0..8c3cf111009 100644 --- a/pkg/cmd/container/run_mount.go +++ b/pkg/cmd/container/run_mount.go @@ -37,6 +37,7 @@ import ( "github.com/containerd/containerd/v2/core/containers" "github.com/containerd/containerd/v2/core/leases" "github.com/containerd/containerd/v2/core/mount" + "github.com/containerd/containerd/v2/core/snapshots" "github.com/containerd/containerd/v2/pkg/oci" "github.com/containerd/continuity/fs" "github.com/containerd/errdefs" @@ -122,17 +123,83 @@ func parseMountFlags(volStore volumestore.VolumeStore, options types.ContainerCr return parsed, nil } +// gcRootLabel marks a snapshot as a GC root so containerd does not reclaim it. +const gcRootLabel = "containerd.io/gc.root" + +// setupImageMount ensures and unpacks ref, then creates a read-only GC-rooted +// snapshot view of its rootfs. It returns the OCI mount for destination and the +// view's snapshot key. +func setupImageMount(ctx context.Context, client *containerd.Client, options types.ContainerCreateOptions, ref, destination string) (specs.Mount, string, error) { + ensured, err := imgutil.EnsureImage(ctx, client, ref, options.ImagePullOpt) + if err != nil { + return specs.Mount{}, "", fmt.Errorf("failed to ensure image %q for image mount: %w", ref, err) + } + if err := ensured.Image.Unpack(ctx, options.GOptions.Snapshotter); err != nil { + return specs.Mount{}, "", fmt.Errorf("failed to unpack image %q for image mount: %w", ref, err) + } + diffIDs, err := ensured.Image.RootFS(ctx) + if err != nil { + return specs.Mount{}, "", fmt.Errorf("failed to get rootfs of image %q for image mount: %w", ref, err) + } + chainID := identity.ChainID(diffIDs).String() + + snapshotKey := idgen.GenerateID() + "-image-mount" + s := client.SnapshotService(options.GOptions.Snapshotter) + mounts, err := s.View(ctx, snapshotKey, chainID, snapshots.WithLabels(map[string]string{ + gcRootLabel: time.Now().UTC().Format(time.RFC3339), + })) + if err != nil { + return specs.Mount{}, "", fmt.Errorf("failed to create read-only view of image %q: %w", ref, err) + } + // overlayfs and native snapshotters each yield a single mount for a view. + if len(mounts) != 1 { + if rmErr := s.Remove(ctx, snapshotKey); rmErr != nil && !errdefs.IsNotFound(rmErr) { + log.G(ctx).WithError(rmErr).Warnf("failed to remove image-mount snapshot %q", snapshotKey) + } + return specs.Mount{}, "", fmt.Errorf("image mount expects exactly one mount from the snapshotter, got %d", len(mounts)) + } + + m := mounts[0] + opts := m.Options + // A view without an upper dir is already read-only; make it explicit for + // bind-backed snapshotters. + if !strutil.InStringSlice(opts, "ro") { + opts = append(opts, "ro") + } + return specs.Mount{ + Type: m.Type, + Source: m.Source, + Destination: destination, + Options: opts, + }, snapshotKey, nil +} + +// removeImageMountViews removes the snapshotter views created for type=image +// mounts. NotFound is ignored; other failures are logged but not fatal. +func removeImageMountViews(ctx context.Context, s snapshots.Snapshotter, keys []string) { + for _, k := range keys { + if err := s.Remove(ctx, k); err != nil && !errdefs.IsNotFound(err) { + log.G(ctx).WithError(err).Warnf("failed to remove image-mount snapshot %q", k) + } + } +} + // generateMountOpts generates volume-related mount opts. // Other mounts such as procfs mount are not handled here. func generateMountOpts(ctx context.Context, client *containerd.Client, ensuredImage *imgutil.EnsuredImage, - volStore volumestore.VolumeStore, options types.ContainerCreateOptions) ([]oci.SpecOpts, []string, []*mountutil.Processed, error) { + volStore volumestore.VolumeStore, options types.ContainerCreateOptions) (opts []oci.SpecOpts, anonVolumes []string, mountPoints []*mountutil.Processed, retErr error) { //nolint:prealloc var ( - opts []oci.SpecOpts - anonVolumes []string - userMounts []specs.Mount - mountPoints []*mountutil.Processed + userMounts []specs.Mount + imageMountViews []string ) + // Remove any image-mount views created here if this function fails, so a + // partial setup does not leak snapshots. + defer func() { + if retErr != nil && len(imageMountViews) > 0 { + removeImageMountViews(ctx, client.SnapshotService(options.GOptions.Snapshotter), imageMountViews) + } + }() mounted := make(map[string]struct{}) var imageVolumes map[string]struct{} var tempDir string @@ -229,6 +296,20 @@ func generateMountOpts(ctx context.Context, client *containerd.Client, ensuredIm } else if len(parsed) > 0 { ociMounts := make([]specs.Mount, len(parsed)) for i, x := range parsed { + // type=image: build the read-only view now and record its snapshot + // key for cleanup on container removal. + if x.Type == mountutil.Image { + m, snapshotKey, err := setupImageMount(ctx, client, options, x.Mount.Source, x.Mount.Destination) + if err != nil { + return nil, nil, nil, err + } + imageMountViews = append(imageMountViews, snapshotKey) + ociMounts[i] = m + x.ImageMountSnapshot = snapshotKey + mounted[filepath.Clean(x.Mount.Destination)] = struct{}{} + continue + } + ociMounts[i] = x.Mount mounted[filepath.Clean(x.Mount.Destination)] = struct{}{} diff --git a/pkg/labels/labels.go b/pkg/labels/labels.go index eaec0720efb..c4d38f54803 100644 --- a/pkg/labels/labels.go +++ b/pkg/labels/labels.go @@ -80,6 +80,10 @@ const ( // AnonymousVolumes is a JSON-marshalled string of []string AnonymousVolumes = Prefix + "anonymous-volumes" + // ImageMountSnapshots is a JSON-marshalled []string of snapshotter keys for + // the read-only views backing `--mount type=image`, removed on container deletion. + ImageMountSnapshots = Prefix + "image-mount-snapshots" + // Platform is the normalized platform string like "linux/ppc64le". Platform = Prefix + "platform" diff --git a/pkg/mountutil/mountutil.go b/pkg/mountutil/mountutil.go index d55a2cb6646..f7e8bc0a454 100644 --- a/pkg/mountutil/mountutil.go +++ b/pkg/mountutil/mountutil.go @@ -39,6 +39,7 @@ const ( Bind = "bind" Volume = "volume" Tmpfs = "tmpfs" + Image = "image" Npipe = "npipe" pathSeparator = string(os.PathSeparator) ) @@ -50,6 +51,9 @@ type Processed struct { AnonymousVolume string // anonymous volume name Mode string Opts []oci.SpecOpts + // ImageMountSnapshot is the snapshotter key of the read-only view for a + // type=image mount; empty for other mount types. + ImageMountSnapshot string } type volumeSpec struct { diff --git a/pkg/mountutil/mountutil_linux.go b/pkg/mountutil/mountutil_linux.go index d1fde8b1c2a..a552f2e2edc 100644 --- a/pkg/mountutil/mountutil_linux.go +++ b/pkg/mountutil/mountutil_linux.go @@ -304,15 +304,16 @@ func ProcessFlagTmpfs(s string) (*Processed, error) { func ProcessFlagMount(s string, volStore volumestore.VolumeStore) (*Processed, error) { fields := strings.Split(s, ",") var ( - mountType string - src string - dst string - bindPropagation string - bindNonRecursive bool - rwOption string - tmpfsSize int64 - tmpfsMode os.FileMode - err error + mountType string + src string + dst string + bindPropagation string + bindNonRecursive bool + rwOption string + writableRequested bool + tmpfsSize int64 + tmpfsMode os.FileMode + err error ) // set default values @@ -334,6 +335,9 @@ func ProcessFlagMount(s string, volStore volumestore.VolumeStore) (*Processed, e switch key { case "readonly", "ro", "rw", "rro": rwOption = key + if key == "rw" { + writableRequested = true + } continue case "bind-nonrecursive": bindNonRecursive = true @@ -353,9 +357,11 @@ func ProcessFlagMount(s string, volStore volumestore.VolumeStore) (*Processed, e mountType = Tmpfs case "bind": mountType = Bind + case "image": + mountType = Image case "volume": default: - return nil, fmt.Errorf("invalid mount type '%s' must be a volume/bind/tmpfs", value) + return nil, fmt.Errorf("invalid mount type '%s' must be a volume/bind/tmpfs/image", value) } case "source", "src": src = value @@ -369,6 +375,13 @@ func ProcessFlagMount(s string, volStore volumestore.VolumeStore) (*Processed, e if trueValue { rwOption = key } + // Write requested: rw=true, or a read-only flag (ro/readonly/rro) set to false. + if trueValue == (key == "rw") { + writableRequested = true + } + case "subpath": + // subpath is not implemented for any mount type yet. + return nil, fmt.Errorf("mount option %q is not yet supported", key) case "bind-propagation": // here don't validate the propagation value // parseVolumeOptions will do that. @@ -394,6 +407,30 @@ func ProcessFlagMount(s string, volStore volumestore.VolumeStore) (*Processed, e } } + // type=image's source is an image reference resolved later with a containerd + // client; validate the intent here. Image mounts are read-only. + if mountType == Image { + if src == "" { + return nil, fmt.Errorf("type=image requires a source (the image reference)") + } + if dst == "" { + return nil, fmt.Errorf("type=image requires a destination") + } + if writableRequested { + return nil, fmt.Errorf("type=image mounts are read-only") + } + return &Processed{ + Type: Image, + // Mode "ro" so inspect/label metadata reports the mount read-only. + Mode: "ro", + Mount: specs.Mount{ + Type: Image, + Source: src, + Destination: cleanMount(dst), + }, + }, nil + } + // compose new fileds and join into a string // to call legacy ProcessFlagTmpfs or ProcessFlagV function fields = []string{} diff --git a/pkg/mountutil/mountutil_linux_test.go b/pkg/mountutil/mountutil_linux_test.go index 74484cbbbfe..efd5a5b69f2 100644 --- a/pkg/mountutil/mountutil_linux_test.go +++ b/pkg/mountutil/mountutil_linux_test.go @@ -352,3 +352,85 @@ func TestProcessFlagVAnonymousVolumes(t *testing.T) { }) } } + +// TestProcessFlagMountImage tests parsing and validation of `--mount type=image`. +func TestProcessFlagMountImage(t *testing.T) { + tests := []struct { + rawSpec string + wants *Processed + err string + }{ + { + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{ + Type: Image, + Source: "alpine:latest", + Destination: "/mnt/img", + }, + }, + }, + { + // target and src aliases must work too. + rawSpec: "type=image,src=alpine:latest,target=/mnt/img", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{ + Type: Image, + Source: "alpine:latest", + Destination: "/mnt/img", + }, + }, + }, + { + rawSpec: "type=image,destination=/mnt/img", + err: "requires a source", + }, + { + rawSpec: "type=image,source=alpine:latest", + err: "requires a destination", + }, + { + // ro and rro are read-only flags, accepted for type=image. + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,ro", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{Type: Image, Source: "alpine:latest", Destination: "/mnt/img"}, + }, + }, + { + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,rro", + wants: &Processed{ + Type: Image, + Mount: specs.Mount{Type: Image, Source: "alpine:latest", Destination: "/mnt/img"}, + }, + }, + { + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,rw", + err: "read-only", + }, + { + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,readonly=false", + err: "read-only", + }, + { + rawSpec: "type=image,source=alpine:latest,destination=/mnt/img,subpath=etc", + err: "subpath", + }, + } + for _, tt := range tests { + t.Run(tt.rawSpec, func(t *testing.T) { + got, err := ProcessFlagMount(tt.rawSpec, nil) + if tt.err != "" { + assert.ErrorContains(t, err, tt.err) + return + } + assert.NilError(t, err) + assert.Equal(t, got.Type, tt.wants.Type) + assert.Equal(t, got.Mount.Type, tt.wants.Mount.Type) + assert.Equal(t, got.Mount.Source, tt.wants.Mount.Source) + assert.Equal(t, got.Mount.Destination, tt.wants.Mount.Destination) + }) + } +}