Skip to content

Commit 41a38a4

Browse files
committed
mantle/system/nproc: account for page cache in cgroup available memory
The cgroup available memory calculation used memory.current (total cgroup usage) directly, which includes page cache (file-backed memory). Since page cache is reclaimable by the kernel under memory pressure, it should not count as unavailable. This caused GetCurrentMemAvailableMiB() to significantly underestimate available memory, making QEMU instance scheduling overly conservative. Read the "file" field from /sys/fs/cgroup/memory.stat, which reports the page cache size in bytes, and subtract it from current usage before computing available memory. The effective formula becomes: available = limit - (current - page_cache) This mirrors how /proc/meminfo computes MemAvailable by considering reclaimable caches. A new helper getCgroupMemoryStatField() is added for parsing individual fields from memory.stat, returning 0 gracefully if the file or field is absent. Written-by: <anthropic/claude-opus-4.6>
1 parent 9467ee2 commit 41a38a4

1 file changed

Lines changed: 57 additions & 3 deletions

File tree

mantle/system/nproc.go

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,12 @@ func getCgroupMemoryLimitMiB() (uint, error) {
176176
}
177177

178178
// getCgroupMemoryAvailableMiB returns the available memory within the
179-
// cgroup v2 in MiB (limit - current usage), or math.MaxUint if no limit.
179+
// cgroup v2 in MiB, or math.MaxUint if no limit is set. It computes
180+
// available memory as: limit - (current - page_cache), where page_cache
181+
// is the "file" field from memory.stat. Page cache (file-backed memory)
182+
// is reclaimable by the kernel under memory pressure, so it should not
183+
// be counted as unavailable. This mirrors how /proc/meminfo computes
184+
// MemAvailable by considering reclaimable caches.
180185
func getCgroupMemoryAvailableMiB() (uint, error) {
181186
maxBuf, err := os.ReadFile("/sys/fs/cgroup/memory.max")
182187
if os.IsNotExist(err) {
@@ -200,8 +205,57 @@ func getCgroupMemoryAvailableMiB() (uint, error) {
200205
if err != nil {
201206
return 0, fmt.Errorf("invalid memory.current value: %w", err)
202207
}
203-
if current >= limit {
208+
209+
// Read page cache size from memory.stat to exclude reclaimable
210+
// file-backed memory from the usage calculation.
211+
pageCache, err := getCgroupMemoryStatField("file")
212+
if err != nil {
213+
return 0, err
214+
}
215+
216+
// Substract the page cache size from the memory.current. Page
217+
// cache should should always be less than the memory.current but
218+
// add a check and do nothing just in case.
219+
usage := current
220+
if pageCache < usage {
221+
usage -= pageCache
222+
}
223+
224+
// This also shouldn't happen, but in case the usage is larger
225+
// than the limit let's just return that there's 0 available memory.
226+
if usage >= limit {
227+
return 0, nil
228+
}
229+
return uint((limit - usage) / (1024 * 1024)), nil
230+
}
231+
232+
// getCgroupMemoryStatField reads a specific field from
233+
// /sys/fs/cgroup/memory.stat and returns its value in bytes.
234+
// The file contains key-value pairs like "file 123456789".
235+
// Returns 0 if the file does not exist or the field is not found.
236+
func getCgroupMemoryStatField(field string) (uint64, error) {
237+
f, err := os.Open("/sys/fs/cgroup/memory.stat")
238+
if os.IsNotExist(err) {
204239
return 0, nil
240+
} else if err != nil {
241+
return 0, fmt.Errorf("reading memory.stat: %w", err)
242+
}
243+
defer f.Close()
244+
245+
scanner := bufio.NewScanner(f)
246+
for scanner.Scan() {
247+
parts := strings.Fields(scanner.Text())
248+
if len(parts) == 2 && parts[0] == field {
249+
val, err := strconv.ParseUint(parts[1], 10, 64)
250+
if err != nil {
251+
return 0, fmt.Errorf("parsing memory.stat field %s: %w", field, err)
252+
}
253+
return val, nil
254+
}
255+
}
256+
if err := scanner.Err(); err != nil {
257+
return 0, fmt.Errorf("scanning memory.stat: %w", err)
205258
}
206-
return uint((limit - current) / (1024 * 1024)), nil
259+
// Field not found; return 0 so callers degrade gracefully.
260+
return 0, nil
207261
}

0 commit comments

Comments
 (0)