Skip to content

Commit 62e6336

Browse files
authored
feat(docker,podman): bump RLIMIT_NOFILE to kernel max for every container (#225)
## Summary EL clients keep many file descriptors open while indexing chain DBs, and typical container defaults (1024 soft / 4096 hard) trip `too many open files` errors on long benchmark runs. Lift `RLIMIT_NOFILE` to the highest value the host kernel allows, applied uniformly to every container the runner creates — both Docker and Podman runtimes. ### Files **New** - `pkg/docker/ulimit.go` — `HostMaxNofile()` reads `/proc/sys/fs/nr_open` (the kernel-wide ceiling above which `RLIMIT_NOFILE` cannot be raised). Falls back to `DefaultMaxNofile = 1048576` when the file is missing/empty/unparseable (non-Linux hosts, restricted procfs, etc.). - `pkg/docker/ulimit_test.go` — non-zero check + Linux `/proc/sys/fs/nr_open` sanity check. **Updated** - `pkg/docker/docker.go` — `hostCfg.Ulimits = []*container.Ulimit{{Name: "nofile", Hard: nofile, Soft: nofile}}` with `nofile = int64(HostMaxNofile())`. Set after the literal because `Ulimits` lives on the embedded `Resources` struct. - `pkg/podman/podman.go` — `s.Rlimits = append(s.Rlimits, specs.POSIXRlimit{Type: "RLIMIT_NOFILE", Hard: nofile, Soft: nofile})` with `nofile = docker.HostMaxNofile()`. Podman's specgen takes `uint64` directly. No config knob — always-on by design. Both `CreateContainer` paths (regular run + init container) get the bump. ### Tests - [x] `go build ./...` clean - [x] `go test ./pkg/docker/...` passes - [x] `golangci-lint run --new-from-rev=origin/master` clean ## Test plan - [x] `docker inspect <bench-container>` should show `HostConfig.Ulimits = [{"Name":"nofile","Soft":1048576,"Hard":1048576}]` (or whatever your host's `cat /proc/sys/fs/nr_open` is). - [x] Same for `podman inspect <bench-container>` under `.HostConfig.Ulimits`. - [x] Run a long benchmark (e.g. EEST stateful suite) on a client previously hitting EMFILE; confirm the failure no longer reproduces.
1 parent 04b0a36 commit 62e6336

4 files changed

Lines changed: 91 additions & 0 deletions

File tree

pkg/docker/docker.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,9 @@ func (m *manager) Start(ctx context.Context) error {
168168
}
169169

170170
m.log.Debug("Connected to Docker daemon")
171+
m.log.WithField("nofile", HostMaxNofile()).Info(
172+
"Container RLIMIT_NOFILE bumped to host kernel max",
173+
)
171174

172175
return nil
173176
}
@@ -253,12 +256,21 @@ func (m *manager) CreateContainer(ctx context.Context, spec *ContainerSpec) (str
253256
Cmd: spec.Command,
254257
}
255258

259+
// Bump RLIMIT_NOFILE to the kernel's hard ceiling so EL clients
260+
// don't trip "too many open files" errors during long benchmark
261+
// runs. Applied to every container we create. Ulimits lives on the
262+
// embedded Resources struct, so it has to be set after the literal.
263+
nofile := int64(HostMaxNofile()) //nolint:gosec // bounded by kernel nr_open, fits in int64.
264+
256265
hostCfg := &container.HostConfig{
257266
Mounts: mounts,
258267
NetworkMode: container.NetworkMode(spec.NetworkName),
259268
CapAdd: spec.CapAdd,
260269
SecurityOpt: spec.SecurityOpt,
261270
}
271+
hostCfg.Ulimits = []*container.Ulimit{
272+
{Name: "nofile", Hard: nofile, Soft: nofile},
273+
}
262274

263275
// Apply resource limits if configured.
264276
if spec.ResourceLimits != nil {

pkg/docker/ulimit.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package docker
2+
3+
import (
4+
"os"
5+
"strconv"
6+
"strings"
7+
)
8+
9+
// DefaultMaxNofile is the fallback nofile (open-file-descriptor) limit
10+
// applied to containers when /proc/sys/fs/nr_open cannot be read (e.g.
11+
// non-Linux hosts). Matches the common Linux kernel default cap.
12+
const DefaultMaxNofile uint64 = 1048576
13+
14+
// HostMaxNofile returns the highest RLIMIT_NOFILE value that containers
15+
// on this host are allowed to set. On Linux it reads
16+
// /proc/sys/fs/nr_open, the kernel-wide cap above which RLIMIT_NOFILE
17+
// cannot be raised. On any other platform — or if the file is missing,
18+
// empty, or unparsable — it returns DefaultMaxNofile. EL clients tend
19+
// to keep many file descriptors open; bumping nofile to the kernel's
20+
// hard ceiling avoids spurious "too many open files" failures during
21+
// long benchmark runs.
22+
func HostMaxNofile() uint64 {
23+
data, err := os.ReadFile("/proc/sys/fs/nr_open")
24+
if err != nil {
25+
return DefaultMaxNofile
26+
}
27+
28+
v, err := strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
29+
if err != nil || v == 0 {
30+
return DefaultMaxNofile
31+
}
32+
33+
return v
34+
}

pkg/docker/ulimit_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package docker
2+
3+
import (
4+
"os"
5+
"runtime"
6+
"testing"
7+
8+
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
10+
)
11+
12+
func TestHostMaxNofile_NonZero(t *testing.T) {
13+
got := HostMaxNofile()
14+
assert.NotZero(t, got, "HostMaxNofile must always return a non-zero value")
15+
}
16+
17+
func TestHostMaxNofile_LinuxReadsProc(t *testing.T) {
18+
if runtime.GOOS != "linux" {
19+
t.Skip("Linux-only test: /proc/sys/fs/nr_open")
20+
}
21+
22+
data, err := os.ReadFile("/proc/sys/fs/nr_open")
23+
if err != nil {
24+
t.Skipf("could not read /proc/sys/fs/nr_open: %v", err)
25+
}
26+
27+
require.NotEmpty(t, data)
28+
29+
got := HostMaxNofile()
30+
assert.GreaterOrEqual(t, got, uint64(1024), "kernel nr_open should be at least 1024")
31+
}

pkg/podman/podman.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,10 @@ func (m *manager) Start(ctx context.Context) error {
115115
"runtime": info.Host.OCIRuntime.Name,
116116
}).Debug("Connected to Podman daemon")
117117

118+
m.log.WithField("nofile", docker.HostMaxNofile()).Info(
119+
"Container RLIMIT_NOFILE bumped to host kernel max",
120+
)
121+
118122
return nil
119123
}
120124

@@ -248,6 +252,16 @@ func (m *manager) CreateContainer(
248252
}
249253
}
250254

255+
// Bump RLIMIT_NOFILE to the kernel's hard ceiling so EL clients
256+
// don't trip "too many open files" errors during long benchmark
257+
// runs. Applied to every container we create.
258+
nofile := docker.HostMaxNofile()
259+
s.Rlimits = append(s.Rlimits, specs.POSIXRlimit{
260+
Type: "RLIMIT_NOFILE",
261+
Hard: nofile,
262+
Soft: nofile,
263+
})
264+
251265
// Apply resource limits.
252266
if spec.ResourceLimits != nil {
253267
s.ResourceLimits = &specs.LinuxResources{}

0 commit comments

Comments
 (0)