@@ -26,6 +26,7 @@ import (
2626 "github.com/dstackai/dstack/runner/consts"
2727 "github.com/dstackai/dstack/runner/internal/common"
2828 "github.com/dstackai/dstack/runner/internal/connections"
29+ cap "github.com/dstackai/dstack/runner/internal/linux/capabilities"
2930 linuxuser "github.com/dstackai/dstack/runner/internal/linux/user"
3031 "github.com/dstackai/dstack/runner/internal/log"
3132 "github.com/dstackai/dstack/runner/internal/schemas"
@@ -467,10 +468,19 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error
467468 }
468469 cmd .Dir = ex .jobWorkingDir
469470
470- // Strictly speaking, we need CAP_SETUID and CAP_GUID (for Cmd.Start()->
471- // Cmd.SysProcAttr.Credential) and CAP_CHOWN (for startCommand()->os.Chown()),
472- // but for the sake of simplicity we instead check if we are root or not
473- if ex .currentUser .IsRoot () {
471+ // CAP_SET{UID,GID} for startCommand() -> Cmd.Start() -> set{uid,gid,groups} syscalls during fork-exec
472+ // CAP_CHOWN for startCommand() -> os.Chown(pts.Name())
473+ if missing , err := cap .Check (cap .SETUID , cap .SETGID , cap .CHOWN ); err != nil {
474+ log .Error (
475+ ctx , "Failed to check capabilities, won't try to set process credentials" ,
476+ "err" , err , "user" , ex .currentUser ,
477+ )
478+ } else if len (missing ) > 0 {
479+ log .Info (
480+ ctx , "Required capabilities are missing, cannot set process credentials" ,
481+ "missing" , missing , "user" , ex .currentUser ,
482+ )
483+ } else {
474484 log .Trace (ctx , "Using credentials" , "user" , ex .jobUser )
475485 if cmd .SysProcAttr == nil {
476486 cmd .SysProcAttr = & syscall.SysProcAttr {}
@@ -480,8 +490,6 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error
480490 return fmt .Errorf ("prepare process credentials: %w" , err )
481491 }
482492 cmd .SysProcAttr .Credential = creds
483- } else {
484- log .Info (ctx , "Current user is not root, cannot set process credentials" , "user" , ex .currentUser )
485493 }
486494
487495 envMap := NewEnvMap (ParseEnvList (os .Environ ()), jobEnvs , ex .secrets )
@@ -509,11 +517,15 @@ func (ex *RunExecutor) execJob(ctx context.Context, jobLogFile io.Writer) error
509517 // Note: we already set RLIMIT_MEMLOCK to unlimited in the shim if we've detected IB devices
510518 // (see configureHpcNetworkingIfAvailable() function), but, as it's on the shim side, it only works
511519 // with VM-based backends.
512- rlimitMemlock := unix.Rlimit {Cur : unix .RLIM_INFINITY , Max : unix .RLIM_INFINITY }
513- // TODO: Check if we have CAP_SYS_RESOURCE. In container environments, even root usually doesn't have
514- // this capability.
515- if err := unix .Setrlimit (unix .RLIMIT_MEMLOCK , & rlimitMemlock ); err != nil {
516- log .Error (ctx , "Failed to set resource limits" , "err" , err )
520+ if ok , err := cap .Has (cap .SYS_RESOURCE ); err != nil {
521+ log .Error (ctx , "Failed to check capabilities, won't try to set resource limits" , "err" , err )
522+ } else if ! ok {
523+ log .Info (ctx , "Required capability is missing, cannot set resource limits" , "missing" , cap .SYS_RESOURCE )
524+ } else {
525+ rlimitMemlock := unix.Rlimit {Cur : unix .RLIM_INFINITY , Max : unix .RLIM_INFINITY }
526+ if err := unix .Setrlimit (unix .RLIMIT_MEMLOCK , & rlimitMemlock ); err != nil {
527+ log .Error (ctx , "Failed to set resource limits" , "err" , err )
528+ }
517529 }
518530
519531 // HOME must be added after writeDstackProfile to avoid overriding the correct per-user value set by sshd
0 commit comments