Skip to content

Commit ccfb76d

Browse files
committed
shim/manager: Retry shim start without userns on clone failure
cloneMntNs sets CLONE_NEWUSER|CLONE_NEWNS on the child, but clone can fail for reasons the proactive AppArmor sysctl check cannot detect — seccomp filters, other LSM policies, or EACCES when inherited socket fds cross the user namespace boundary after exec triggers capability recomputation. Return whether namespace flags were set so the caller can distinguish a namespace-related Start failure from an unrelated one. On failure, rebuild the command without clone flags and retry, degrading gracefully to no mount isolation rather than failing the container start entirely. Signed-off-by: Derek McGowan <derek@mcg.dev>
1 parent a1a5496 commit ccfb76d

File tree

3 files changed

+29
-6
lines changed

3 files changed

+29
-6
lines changed

internal/shim/manager/manager_unix.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import (
3434
"github.com/containerd/containerd/v2/pkg/namespaces"
3535
"github.com/containerd/containerd/v2/pkg/shim"
3636
"github.com/containerd/errdefs"
37+
"github.com/containerd/log"
3738
"golang.org/x/sys/unix"
3839
)
3940

@@ -183,10 +184,30 @@ func (manager) Start(ctx context.Context, id string, opts shim.StartOpts) (_ shi
183184
cmd.ExtraFiles = append(cmd.ExtraFiles, s.f)
184185
}
185186

186-
cloneMntNs(ctx, cmd)
187+
userns := cloneMntNs(ctx, cmd)
187188

188189
if err := cmd.Start(); err != nil {
189-
return params, err
190+
if !userns {
191+
return params, err
192+
}
193+
// clone(CLONE_NEWUSER) can fail for reasons not covered by the
194+
// proactive AppArmor check — e.g. seccomp filters, LSM policies,
195+
// or EACCES from the child's capability recomputation when
196+
// inherited socket fds cross the user namespace boundary after
197+
// exec. Retry without namespace isolation rather than failing
198+
// the container start.
199+
log.G(ctx).WithError(err).Warn("shim start with user namespace failed, retrying without namespace isolation")
200+
cmd, err = newCommand(ctx, id, opts.Address, opts.TTRPCAddress, opts.Debug)
201+
if err != nil {
202+
return params, err
203+
}
204+
cmd.ExtraFiles = append(cmd.ExtraFiles, sockets[0].f)
205+
if opts.Debug && len(sockets) > 1 {
206+
cmd.ExtraFiles = append(cmd.ExtraFiles, sockets[1].f)
207+
}
208+
if err := cmd.Start(); err != nil {
209+
return params, err
210+
}
190211
}
191212

192213
defer func() {

internal/shim/manager/mount_linux.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,14 @@ import (
5555
// If namespace creation is not possible (e.g. AppArmor restricts
5656
// unprivileged user namespaces), the function logs a warning and the shim
5757
// will run without mount isolation.
58-
func cloneMntNs(ctx context.Context, cmd *exec.Cmd) {
58+
// cloneMntNs returns true if user namespace clone flags were set.
59+
func cloneMntNs(ctx context.Context, cmd *exec.Cmd) bool {
5960
if restricted, err := apparmorRestrictsUserns(); err != nil {
6061
log.G(ctx).WithError(err).Warn("failed to check apparmor userns restriction, skipping mount namespace isolation")
61-
return
62+
return false
6263
} else if restricted {
6364
log.G(ctx).Warn("apparmor_restrict_unprivileged_userns=1 prevents user namespace creation; shim will run without mount namespace isolation")
64-
return
65+
return false
6566
}
6667

6768
uid := os.Getuid()
@@ -73,6 +74,7 @@ func cloneMntNs(ctx context.Context, cmd *exec.Cmd) {
7374
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
7475
{ContainerID: gid, HostID: gid, Size: 1},
7576
}
77+
return true
7678
}
7779

7880
// apparmorRestrictsUserns checks if the kernel sysctl

internal/shim/manager/mount_other.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,4 @@ import (
2323
"os/exec"
2424
)
2525

26-
func cloneMntNs(_ context.Context, _ *exec.Cmd) {}
26+
func cloneMntNs(_ context.Context, _ *exec.Cmd) bool { return false }

0 commit comments

Comments
 (0)