Skip to content

Commit 4255f89

Browse files
Get benchmark node memory stats from /proc (#7856)
Co-authored-by: Amaury Chamayou <amchamay@microsoft.com>
1 parent 264c1d6 commit 4255f89

6 files changed

Lines changed: 83 additions & 0 deletions

File tree

tests/e2e_suite.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import infra.e2e_args
55
import infra.network
6+
import infra.proc
67
import suite.test_suite as s
78
import suite.test_requirements as reqs
89
import infra.logging_app as app
@@ -24,6 +25,22 @@ class TestStatus(Enum):
2425
skipped = auto()
2526

2627

28+
def mem_stats(network):
29+
mem = {}
30+
for node in network.get_joined_nodes():
31+
try:
32+
pid = node.remote.remote.proc.pid
33+
stats = infra.proc.get_proc_memory_stats(pid)
34+
if stats is not None:
35+
mem[node.local_node_id] = stats
36+
except (AttributeError, OSError) as exc:
37+
LOG.debug(
38+
f"Unable to collect memory stats for node "
39+
f"{getattr(node, 'local_node_id', '<unknown>')}: {exc}"
40+
)
41+
return mem
42+
43+
2744
def run(args):
2845
chosen_suite = []
2946

@@ -126,6 +143,7 @@ def filter_fun(x):
126143
{
127144
"status": status.name,
128145
"elapsed (s)": round(test_elapsed, 2),
146+
"memory": mem_stats(new_network),
129147
}
130148
)
131149

tests/infra/basicperf.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import ccf.ledger
2121
import plotext as plt
2222
import infra.bencher
23+
import infra.proc
2324

2425

2526
def configure_remote_client(args, client_id, client_host, common_dir):
@@ -439,6 +440,15 @@ def run(args):
439440

440441
perf_label = args.perf_label
441442

443+
if not args.stop_primary_after_s:
444+
primary, _ = network.find_primary()
445+
mem = infra.proc.get_proc_memory_stats(
446+
primary.remote.remote.proc.pid
447+
)
448+
if mem is not None:
449+
bf = infra.bencher.Bencher()
450+
bf.set_memory(perf_label, mem)
451+
442452
network.stop_all_nodes()
443453

444454
agg = []

tests/infra/bencher.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import dataclasses
77
from typing import Optional, Union
88

9+
from loguru import logger as LOG
10+
911
BENCHER_FILE = "bencher.json"
1012

1113
# See https://bencher.dev/docs/reference/bencher-metric-format/
@@ -76,6 +78,17 @@ def __init__(self):
7678
with open(BENCHER_FILE, "w+") as bf:
7779
json.dump({}, bf)
7880

81+
def set_memory(self, key: str, proc_stats: dict):
82+
LOG.info(
83+
f"Memory: RSS={proc_stats['current_rss']}, "
84+
f"Peak RSS={proc_stats['peak_rss']}, "
85+
f"Virtual={proc_stats['virtual_size']}"
86+
)
87+
self.set(
88+
key,
89+
Memory(proc_stats["current_rss"], high_value=proc_stats["peak_rss"]),
90+
)
91+
7992
def set(self, key: str, metric: Union[Latency, Throughput, Memory]):
8093
with open(BENCHER_FILE, "r") as bf:
8194
data = json.load(bf)

tests/infra/piccolo_driver.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from piccolo import generator
1616
from piccolo import analyzer
1717
import infra.bencher
18+
import infra.proc
1819

1920

2021
def get_command_args(args, network, get_command):
@@ -215,6 +216,12 @@ def run(get_command, args):
215216
infra.bencher.Throughput(perf_result),
216217
)
217218

219+
primary, _ = network.find_primary()
220+
mem = infra.proc.get_proc_memory_stats(primary.remote.remote.proc.pid)
221+
if mem is not None:
222+
bf = infra.bencher.Bencher()
223+
bf.set_memory(perf_label, mem)
224+
218225
for remote_client in clients:
219226
remote_client.stop()
220227

tests/infra/proc.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,39 @@
11
# Copyright (c) Microsoft Corporation. All rights reserved.
22
# Licensed under the Apache 2.0 License.
33
from subprocess import run, Popen, PIPE
4+
from pathlib import Path
5+
from typing import Optional, Dict
46

57
from loguru import logger as LOG
68

79

10+
def get_proc_memory_stats(pid: int) -> Optional[Dict[str, int]]:
11+
"""Read memory statistics for a process from /proc/<pid>/status.
12+
13+
Returns a dict with keys:
14+
- current_rss: current resident set size in bytes
15+
- peak_rss: peak resident set size (VmHWM) in bytes
16+
- virtual_size: total virtual memory size in bytes
17+
Returns None if the process info cannot be read.
18+
"""
19+
try:
20+
status_path = Path(f"/proc/{pid}/status")
21+
text = status_path.read_text()
22+
except (OSError, PermissionError):
23+
return None
24+
25+
fields = {"VmRSS": "current_rss", "VmHWM": "peak_rss", "VmSize": "virtual_size"}
26+
result = {}
27+
for line in text.splitlines():
28+
parts = line.split(":", 1)
29+
if len(parts) == 2 and parts[0].strip() in fields:
30+
key = fields[parts[0].strip()]
31+
# Values in /proc/*/status are in kB
32+
value_str = parts[1].strip().split()[0]
33+
result[key] = int(value_str) * 1024
34+
return result if len(result) == len(fields) else None
35+
36+
837
def ccall(*args, path=None, log_output=True, env=None):
938
suffix = f" [cwd: {path}]" if path else ""
1039
cmd = " ".join(args)

tests/infra/runner.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,12 @@ def run(get_command, args):
159159
infra.bencher.Throughput(perf_result),
160160
)
161161

162+
primary, _ = network.find_primary()
163+
mem = infra.proc.get_proc_memory_stats(primary.remote.remote.proc.pid)
164+
if mem is not None:
165+
bf = infra.bencher.Bencher()
166+
bf.set_memory(perf_label, mem)
167+
162168
for remote_client in clients:
163169
remote_client.stop()
164170

0 commit comments

Comments
 (0)