Skip to content

Commit 2e9a5e9

Browse files
JAORMXclaude
andcommitted
Enable override_stat xattr on Linux
The Linux kernel restricts user.* xattrs to regular files and directories, so symlinks and special files are silently skipped. Once libkrun's Linux virtiofs passthrough reads these xattrs, guest file ownership will be correct without CAP_CHOWN. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 04d515a commit 2e9a5e9

File tree

9 files changed

+319
-80
lines changed

9 files changed

+319
-80
lines changed

docs/SECURITY.md

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -397,10 +397,19 @@ sudo setcap cap_chown+ep /path/to/your-binary
397397
capsh --addamb=cap_chown -- -c '/path/to/your-binary'
398398
```
399399

400-
On macOS, this problem is solved differently: propolis sets the
401-
`user.containers.override_stat` extended attribute on extracted files so that
402-
libkrun's virtiofs FUSE server reports correct ownership to the guest. See
403-
the `internal/xattr` package for details.
400+
### override_stat xattr (macOS and Linux)
401+
402+
propolis also sets the `user.containers.override_stat` extended attribute on
403+
extracted files so that libkrun's virtiofs server reports correct ownership to
404+
the guest. This is the same mechanism that podman uses on macOS.
405+
406+
On Linux, the xattr is set on regular files and directories. The kernel
407+
restricts `user.*` xattrs on symlinks and special files, so those are silently
408+
skipped. Once libkrun's Linux virtiofs passthrough adds support for reading
409+
these xattrs (the same support already exists on macOS), file ownership in the
410+
guest will be correct without requiring `CAP_CHOWN`.
411+
412+
See the `internal/xattr` package for details.
404413

405414
## File Permissions
406415

internal/xattr/doc.go

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,24 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
// Package xattr sets libkrun's user.containers.override_stat extended
5-
// attribute on macOS so that the virtiofs FUSE server reports correct
5+
// attribute on macOS and Linux so that the virtiofs server reports correct
66
// guest-visible ownership and permissions for rootfs files.
77
//
8-
// When propolis extracts an OCI image on macOS, the host user (typically
9-
// uid 501) ends up owning all files because non-root cannot chown.
10-
// libkrun's FUSE server performs access checks against these host-side
11-
// attributes, causing permission denied errors for guest processes
12-
// running as UIDs that don't exist on the host (e.g. uid 1000).
8+
// When propolis extracts an OCI image, the host user ends up owning all
9+
// files because non-root cannot chown. libkrun's virtiofs server
10+
// performs access checks against these host-side attributes, causing
11+
// permission denied errors for guest processes running as UIDs that
12+
// don't exist on the host (e.g. uid 1000).
1313
//
14-
// Setting this xattr on each file tells the FUSE server to override
14+
// Setting this xattr on each file tells the virtiofs server to override
1515
// the reported stat values, making the guest see the correct ownership
1616
// from the original OCI image. This is the same mechanism that podman
1717
// uses on macOS.
1818
//
19-
// On non-darwin platforms these functions are no-ops.
19+
// On Linux, the kernel restricts user.* xattrs to regular files and
20+
// directories — symlinks and special files are silently skipped. This
21+
// is acceptable because applications check the target's ownership, not
22+
// the symlink itself.
23+
//
24+
// On platforms other than macOS and Linux these functions are no-ops.
2025
package xattr

internal/xattr/mode.go

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package xattr
5+
6+
import "os"
7+
8+
const overrideKey = "user.containers.override_stat"
9+
10+
// goFileModeToPosix converts a Go os.FileMode to a POSIX st_mode value
11+
// including file type bits.
12+
func goFileModeToPosix(m os.FileMode) uint32 {
13+
mode := uint32(m.Perm())
14+
15+
if m&os.ModeSetuid != 0 {
16+
mode |= 0o4000
17+
}
18+
if m&os.ModeSetgid != 0 {
19+
mode |= 0o2000
20+
}
21+
if m&os.ModeSticky != 0 {
22+
mode |= 0o1000
23+
}
24+
25+
switch {
26+
case m.IsDir():
27+
mode |= 0o040000 // S_IFDIR
28+
case m&os.ModeSymlink != 0:
29+
mode |= 0o120000 // S_IFLNK
30+
case m&os.ModeNamedPipe != 0:
31+
mode |= 0o010000 // S_IFIFO
32+
case m&os.ModeSocket != 0:
33+
mode |= 0o140000 // S_IFSOCK
34+
case m&os.ModeDevice != 0:
35+
if m&os.ModeCharDevice != 0 {
36+
mode |= 0o020000 // S_IFCHR
37+
} else {
38+
mode |= 0o060000 // S_IFBLK
39+
}
40+
default:
41+
mode |= 0o100000 // S_IFREG
42+
}
43+
44+
return mode
45+
}

internal/xattr/mode_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package xattr
5+
6+
import (
7+
"os"
8+
"testing"
9+
10+
"github.com/stretchr/testify/assert"
11+
)
12+
13+
func TestGoFileModeToPosix(t *testing.T) {
14+
t.Parallel()
15+
16+
tests := []struct {
17+
name string
18+
mode os.FileMode
19+
want uint32
20+
}{
21+
{"regular 644", 0o644, 0o100644},
22+
{"regular 755", 0o755, 0o100755},
23+
{"regular 600", 0o600, 0o100600},
24+
{"dir 755", os.ModeDir | 0o755, 0o040755},
25+
{"dir 700", os.ModeDir | 0o700, 0o040700},
26+
{"symlink", os.ModeSymlink | 0o777, 0o120777},
27+
{"setuid", os.ModeSetuid | 0o755, 0o104755},
28+
{"setgid", os.ModeSetgid | 0o755, 0o102755},
29+
{"sticky", os.ModeSticky | 0o755, 0o101755},
30+
}
31+
32+
for _, tt := range tests {
33+
t.Run(tt.name, func(t *testing.T) {
34+
t.Parallel()
35+
got := goFileModeToPosix(tt.mode)
36+
assert.Equal(t, tt.want, got)
37+
})
38+
}
39+
}

internal/xattr/override_darwin.go

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@ import (
1313
"golang.org/x/sys/unix"
1414
)
1515

16-
const overrideKey = "user.containers.override_stat"
17-
1816
// SetOverrideStat sets the user.containers.override_stat xattr on path
1917
// so that libkrun's virtiofs FUSE server reports the given uid, gid,
2018
// and mode to the guest instead of the real APFS values.
@@ -51,40 +49,3 @@ func CopyOverrideStat(src, dst string) {
5149
slog.Debug("copy override_stat xattr failed", "dst", dst, "err", err)
5250
}
5351
}
54-
55-
// goFileModeToPosix converts a Go os.FileMode to a POSIX st_mode value
56-
// including file type bits.
57-
func goFileModeToPosix(m os.FileMode) uint32 {
58-
mode := uint32(m.Perm())
59-
60-
if m&os.ModeSetuid != 0 {
61-
mode |= 0o4000
62-
}
63-
if m&os.ModeSetgid != 0 {
64-
mode |= 0o2000
65-
}
66-
if m&os.ModeSticky != 0 {
67-
mode |= 0o1000
68-
}
69-
70-
switch {
71-
case m.IsDir():
72-
mode |= 0o040000 // S_IFDIR
73-
case m&os.ModeSymlink != 0:
74-
mode |= 0o120000 // S_IFLNK
75-
case m&os.ModeNamedPipe != 0:
76-
mode |= 0o010000 // S_IFIFO
77-
case m&os.ModeSocket != 0:
78-
mode |= 0o140000 // S_IFSOCK
79-
case m&os.ModeDevice != 0:
80-
if m&os.ModeCharDevice != 0 {
81-
mode |= 0o020000 // S_IFCHR
82-
} else {
83-
mode |= 0o060000 // S_IFBLK
84-
}
85-
default:
86-
mode |= 0o100000 // S_IFREG
87-
}
88-
89-
return mode
90-
}

internal/xattr/override_darwin_test.go

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -117,34 +117,6 @@ func TestCopyOverrideStat_NoXattr(t *testing.T) {
117117
assert.Error(t, err)
118118
}
119119

120-
func TestGoFileModeToPosix(t *testing.T) {
121-
t.Parallel()
122-
123-
tests := []struct {
124-
name string
125-
mode os.FileMode
126-
want uint32
127-
}{
128-
{"regular 644", 0o644, 0o100644},
129-
{"regular 755", 0o755, 0o100755},
130-
{"regular 600", 0o600, 0o100600},
131-
{"dir 755", os.ModeDir | 0o755, 0o040755},
132-
{"dir 700", os.ModeDir | 0o700, 0o040700},
133-
{"symlink", os.ModeSymlink | 0o777, 0o120777},
134-
{"setuid", os.ModeSetuid | 0o755, 0o104755},
135-
{"setgid", os.ModeSetgid | 0o755, 0o102755},
136-
{"sticky", os.ModeSticky | 0o755, 0o101755},
137-
}
138-
139-
for _, tt := range tests {
140-
t.Run(tt.name, func(t *testing.T) {
141-
t.Parallel()
142-
got := goFileModeToPosix(tt.mode)
143-
assert.Equal(t, tt.want, got)
144-
})
145-
}
146-
}
147-
148120
func readXattr(t *testing.T, path string) string {
149121
t.Helper()
150122
buf := make([]byte, 256)

internal/xattr/override_linux.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//go:build linux
5+
6+
package xattr
7+
8+
import (
9+
"fmt"
10+
"log/slog"
11+
"os"
12+
13+
"golang.org/x/sys/unix"
14+
)
15+
16+
// SetOverrideStat sets the user.containers.override_stat xattr on path
17+
// so that libkrun's virtiofs server reports the given uid, gid,
18+
// and mode to the guest instead of the real host values.
19+
// Errors are logged at debug level and silently ignored.
20+
//
21+
// On Linux the kernel restricts user.* xattrs to regular files and
22+
// directories (fs/xattr.c:xattr_permission). Symlinks, named pipes,
23+
// sockets, and device nodes are silently skipped.
24+
func SetOverrideStat(path string, uid, gid int, mode os.FileMode) {
25+
// The Linux kernel refuses user.* xattrs on anything other than
26+
// regular files and directories. Skip early to avoid EPERM.
27+
if mode&(os.ModeSymlink|os.ModeNamedPipe|os.ModeSocket|os.ModeDevice) != 0 {
28+
return
29+
}
30+
31+
unixMode := goFileModeToPosix(mode)
32+
val := fmt.Sprintf("%d:%d:0%o", uid, gid, unixMode)
33+
if err := unix.Lsetxattr(path, overrideKey, []byte(val), 0); err != nil {
34+
slog.Debug("setxattr override_stat failed", "path", path, "err", err)
35+
}
36+
}
37+
38+
// SetOverrideStatFromPath sets the override_stat xattr by reading the
39+
// file's current mode via Lstat. Useful when you know the intended
40+
// uid/gid but the mode comes from the existing file on disk.
41+
func SetOverrideStatFromPath(path string, uid, gid int) {
42+
info, err := os.Lstat(path)
43+
if err != nil {
44+
slog.Debug("lstat for override_stat failed", "path", path, "err", err)
45+
return
46+
}
47+
SetOverrideStat(path, uid, gid, info.Mode())
48+
}
49+
50+
// CopyOverrideStat copies the user.containers.override_stat xattr from
51+
// src to dst. No-op if src has no such xattr. Errors are silently ignored.
52+
func CopyOverrideStat(src, dst string) {
53+
buf := make([]byte, 256)
54+
n, err := unix.Lgetxattr(src, overrideKey, buf)
55+
if err != nil || n == 0 {
56+
return
57+
}
58+
if err := unix.Lsetxattr(dst, overrideKey, buf[:n], 0); err != nil {
59+
slog.Debug("copy override_stat xattr failed", "dst", dst, "err", err)
60+
}
61+
}

0 commit comments

Comments
 (0)