Skip to content

Commit 9f1d50f

Browse files
committed
exeseal: refactor to string-based mode and strategy callbacks
- Drop Mode int enum in favor of plain strings; state.json now stores the annotation value. - Refactor CloneSelfExe to dispatch via a per-mode list of strategy callbacks, eliminating duplication between explicit modes and the unset fallback path. - Skip the IsSelfExeCloned shortcut when an explicit mode is set. Signed-off-by: Mohammed Aminu Futa <mohammedfuta2000@gmail.com>
1 parent b75d55e commit 9f1d50f

8 files changed

Lines changed: 129 additions & 172 deletions

File tree

libcontainer/configs/config.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818

1919
"github.com/opencontainers/cgroups"
2020
devices "github.com/opencontainers/cgroups/devices/config"
21-
"github.com/opencontainers/runc/libcontainer/exeseal"
2221
"github.com/opencontainers/runtime-spec/specs-go"
2322
)
2423

@@ -208,7 +207,7 @@ type Config struct {
208207
Labels []string `json:"labels"`
209208

210209
// CloneSelfExe selects how runc protects runc binary against tampering.
211-
CloneSelfExe exeseal.Mode `json:"clone_self_exe,omitempty"`
210+
CloneSelfExe string `json:"clone_self_exe,omitempty"`
212211

213212
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
214213
// callers keyring in this case.

libcontainer/container_linux.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
529529
exePath string
530530
safeExe *os.File
531531
)
532-
if exeseal.IsSelfExeCloned() {
532+
if c.config.CloneSelfExe == exeseal.ModeUnset && exeseal.IsSelfExeCloned() {
533533
// /proc/self/exe is already a cloned binary -- no need to do anything
534534
logrus.Debug("skipping binary cloning -- /proc/self/exe is already cloned!")
535535
// We don't need to use /proc/thread-self here because the exe mm of a

libcontainer/exeseal/cloned_binary_linux.go

Lines changed: 14 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -215,58 +215,24 @@ func IsCloned(exe *os.File) bool {
215215
// /proc/self/exe). This binary can then be used for "runc init" in order to
216216
// make sure the container process can never resolve the original runc binary.
217217
// For more details on why this is necessary, see CVE-2019-5736.
218-
func CloneSelfExe(tmpDir string, mode Mode) (*os.File, error) {
219-
switch mode {
220-
case ModeROSharedPage:
221-
overlayFile, err := sealedOverlayfs("/proc/self/exe", tmpDir)
222-
if err != nil {
223-
return nil, fmt.Errorf("%s=ro-shared-page requested but overlayfs unavailable: %w",
224-
AnnotationKey, err)
225-
}
226-
logrus.Debug("runc exeseal: using overlayfs for sealed /proc/self/exe") // used for tests
227-
return overlayFile, nil
228-
229-
case ModeIndependentDataCopy:
230-
return cloneSelfExeViaCloneBinary(tmpDir)
218+
func CloneSelfExe(tmpDir string, mode string) (*os.File, error) {
219+
strategies := strategiesFor(mode)
220+
if len(strategies) == 0 {
221+
return nil, fmt.Errorf("internal: no strategies for clone-self-exe mode %q", mode)
222+
}
231223

232-
case ModeUnset:
233-
// Try to create a temporary overlayfs to produce a readonly version of
234-
// /proc/self/exe that cannot be "unwrapped" by the container. In contrast
235-
// to CloneBinary, this technique does not require any extra memory usage
236-
// and does not have the (fairly noticeable) performance impact of copying
237-
// a large binary file into a memfd.
238-
//
239-
// Based on some basic performance testing, the overlayfs approach has
240-
// effectively no performance overhead (it is on par with both
241-
// MS_BIND+MS_RDONLY and no binary cloning at all) while memfd copying adds
242-
// around ~60% overhead during container startup.
243-
overlayFile, err := sealedOverlayfs("/proc/self/exe", tmpDir)
224+
var lastErr error
225+
for i, strategy := range strategies {
226+
f, err := strategy(tmpDir)
244227
if err == nil {
245-
logrus.Debug("runc exeseal: using overlayfs for sealed /proc/self/exe") // used for tests
246-
return overlayFile, nil
228+
return f, nil
229+
}
230+
lastErr = err
231+
if i < len(strategies)-1 {
232+
logrus.WithError(err).Debugf("clone-self-exe strategy %d/%d failed, trying next", i+1, len(strategies))
247233
}
248-
logrus.WithError(err).Debugf("could not use overlayfs for /proc/self/exe sealing -- falling back to making a temporary copy")
249-
return cloneSelfExeViaCloneBinary(tmpDir)
250-
251-
default:
252-
return nil, fmt.Errorf("internal error: unhandled CloneSelfExe mode %v", mode)
253-
}
254-
}
255-
256-
func cloneSelfExeViaCloneBinary(tmpDir string) (*os.File, error) {
257-
selfExe, err := os.Open("/proc/self/exe")
258-
if err != nil {
259-
return nil, fmt.Errorf("opening current binary: %w", err)
260-
}
261-
defer selfExe.Close()
262-
263-
stat, err := selfExe.Stat()
264-
if err != nil {
265-
return nil, fmt.Errorf("checking /proc/self/exe size: %w", err)
266234
}
267-
size := stat.Size()
268-
logrus.Debug("runc exeseal: using clone-binary path") // used for tests
269-
return CloneBinary(selfExe, size, "/proc/self/exe", tmpDir)
235+
return nil, fmt.Errorf("clone-self-exe failed (mode=%q): %w", mode, lastErr)
270236
}
271237

272238
// IsSelfExeCloned returns whether /proc/self/exe is a cloned binary that can

libcontainer/exeseal/mode.go

Lines changed: 0 additions & 65 deletions
This file was deleted.

libcontainer/exeseal/mode_linux.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package exeseal
2+
3+
import (
4+
"fmt"
5+
"os"
6+
7+
"github.com/sirupsen/logrus"
8+
)
9+
10+
// AnnotationKey is the OCI annotation that selects how runc protects
11+
// the host runc binary against tampering. See ValidateMode for the
12+
// recognized values.
13+
const AnnotationKey = "org.opencontainers.runc.clone-self-exe"
14+
15+
const (
16+
ModeUnset = ""
17+
ModeIndependentDataCopy = "independent-data-copy"
18+
ModeROSharedPage = "ro-shared-page"
19+
)
20+
21+
// ValidateMode reports whether value is a recognized annotation value.
22+
//
23+
// Recognized values:
24+
// - "": annotation absent; use the default fallback chain.
25+
// - "independent-data-copy": use the clone-binary path (memfd, with
26+
// an internal fallback to a classic unlinked tmpfile on older
27+
// kernels). Sealed overlayfs is not attempted.
28+
// - "ro-shared-page": use sealed overlayfs only; fail
29+
// container creation if it is not available.
30+
func ValidateMode(value string) error {
31+
switch value {
32+
case ModeUnset, ModeIndependentDataCopy, ModeROSharedPage:
33+
return nil
34+
default:
35+
return fmt.Errorf("invalid %s value %q (want %q or %q)",
36+
AnnotationKey, value,
37+
ModeIndependentDataCopy, ModeROSharedPage)
38+
}
39+
}
40+
41+
// strategy produces a sealed /proc/self/exe handle by one specific
42+
// mechanism. Returns the file on success, or an error on failure.
43+
type strategy func(tmpDir string) (*os.File, error)
44+
45+
func overlayfsStrategy(tmpDir string) (*os.File, error) {
46+
f, err := sealedOverlayfs("/proc/self/exe", tmpDir)
47+
if err != nil {
48+
return nil, err
49+
}
50+
logrus.Debug("runc exeseal: using overlayfs for sealed /proc/self/exe") // used for tests
51+
return f, nil
52+
}
53+
54+
func cloneBinaryStrategy(tmpDir string) (*os.File, error) {
55+
selfExe, err := os.Open("/proc/self/exe")
56+
if err != nil {
57+
return nil, fmt.Errorf("opening current binary: %w", err)
58+
}
59+
defer selfExe.Close()
60+
61+
stat, err := selfExe.Stat()
62+
if err != nil {
63+
return nil, fmt.Errorf("checking /proc/self/exe size: %w", err)
64+
}
65+
logrus.Debug("runc exeseal: using clone-binary path") // used for tests
66+
return CloneBinary(selfExe, stat.Size(), "/proc/self/exe", tmpDir)
67+
}
68+
69+
// strategiesFor returns the ordered list of strategies to try for a
70+
// given annotation value. The first successful strategy wins; if all
71+
// fail, the last error is returned to the caller.
72+
func strategiesFor(mode string) []strategy {
73+
switch mode {
74+
case ModeROSharedPage:
75+
return []strategy{overlayfsStrategy}
76+
case ModeIndependentDataCopy:
77+
return []strategy{cloneBinaryStrategy}
78+
case ModeUnset:
79+
// Historical default: overlayfs first, clone-binary fallback.
80+
// The order may be reversed in a future release.
81+
return []strategy{overlayfsStrategy, cloneBinaryStrategy}
82+
default:
83+
return nil
84+
}
85+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package exeseal
2+
3+
import "testing"
4+
5+
func TestValidateMode(t *testing.T) {
6+
cases := []struct {
7+
desc string
8+
in string
9+
wantErr bool
10+
}{
11+
{"absent (empty string) is valid", "", false},
12+
{"recognized: independent-data-copy", "independent-data-copy", false},
13+
{"recognized: ro-shared-page", "ro-shared-page", false},
14+
{"unrecognized errors", "not-a-real-mode", true},
15+
{"case-sensitive", "INDEPENDENT-DATA-COPY", true},
16+
{"no whitespace trimming", " independent-data-copy ", true},
17+
}
18+
for _, tc := range cases {
19+
t.Run(tc.desc, func(t *testing.T) {
20+
err := ValidateMode(tc.in)
21+
if (err != nil) != tc.wantErr {
22+
t.Errorf("ValidateMode(%q) err=%v, wantErr=%v", tc.in, err, tc.wantErr)
23+
}
24+
})
25+
}
26+
}

libcontainer/exeseal/mode_test.go

Lines changed: 0 additions & 53 deletions
This file was deleted.

libcontainer/specconv/spec_linux.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,11 +434,10 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
434434

435435
config.Cgroups = c
436436

437-
cloneSelfExe, err := initCloneSelfExeMode(spec)
438-
if err != nil {
437+
config.CloneSelfExe = spec.Annotations[exeseal.AnnotationKey]
438+
if err := exeseal.ValidateMode(config.CloneSelfExe); err != nil {
439439
return nil, err
440440
}
441-
config.CloneSelfExe = cloneSelfExe
442441

443442
// set linux-specific config
444443
if spec.Linux != nil {

0 commit comments

Comments
 (0)