Skip to content

Commit feefedd

Browse files
committed
mem diag update and add alloc_once
1 parent 8b5e586 commit feefedd

1 file changed

Lines changed: 27 additions & 51 deletions

File tree

evaluation_function/evaluation.py

Lines changed: 27 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -163,68 +163,47 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
163163
_add_common_timing(items, t_handler0)
164164
return _result(False, items)
165165

166-
# MEM) Environment & memory diagnostics (no torch/ultralytics)
166+
# ----------------------------
167+
# MEM) Short env/memory diagnostics (keep output small)
167168
# ----------------------------
168169
if diag == "mem":
169170
try:
170171
import platform
171172
import resource
172173
import sys
173174

174-
items.append(("MEM", "env/memory diagnostics"))
175175
items.append(("platform", platform.platform()))
176176
items.append(("python_version", platform.python_version()))
177-
items.append(("python_implementation", platform.python_implementation()))
178177
items.append(("pid", str(os.getpid())))
179178

180-
# sys.path (truncate to avoid huge output)
181-
try:
182-
sp = "\n".join(sys.path[:20])
183-
items.append(("sys_path_head", _escape_html(sp).replace("\n", "<br>")))
184-
except Exception as e:
185-
items.append(("sys_path_head_FAIL", f"{type(e).__name__}: {e}"))
186-
187-
# ru_maxrss: max resident set size so far
188-
# On Linux: typically KB; on macOS: bytes. Platform is Linux here.
179+
# ru_maxrss (Linux KB)
189180
try:
190181
rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
191-
items.append(("ru_maxrss_raw", str(rss)))
192-
# Best-effort human-friendly conversion assuming Linux KB.
193-
try:
194-
rss_mb = float(rss) / 1024.0
195-
items.append(("ru_maxrss_mb_est", f"{rss_mb:.2f}"))
196-
except Exception:
197-
pass
182+
items.append(("ru_maxrss_kb", str(rss)))
183+
items.append(("ru_maxrss_mb_est", f"{(float(rss) / 1024.0):.2f}"))
198184
except Exception as e:
199185
items.append(("ru_maxrss_FAIL", f"{type(e).__name__}: {e}"))
200186

201-
# cgroup memory limits / usage
202-
# We ALWAYS emit FOUND/NOT_FOUND for each path.
187+
# Only read the most important cgroup files; don't spam NOT_FOUND
203188
cgroup_files = [
204-
# cgroup v2 common files
205-
"/sys/fs/cgroup/memory.max",
206-
"/sys/fs/cgroup/memory.high",
189+
"/sys/fs/cgroup/memory.max", # cgroup v2
207190
"/sys/fs/cgroup/memory.current",
208-
"/sys/fs/cgroup/memory.swap.max",
209-
"/sys/fs/cgroup/cpu.max",
210-
# cgroup v1 common files
211-
"/sys/fs/cgroup/memory/memory.limit_in_bytes",
212-
"/sys/fs/cgroup/memory/memory.soft_limit_in_bytes",
191+
"/sys/fs/cgroup/memory/memory.limit_in_bytes", # cgroup v1
213192
"/sys/fs/cgroup/memory/memory.usage_in_bytes",
214-
"/sys/fs/cgroup/memory/memory.max_usage_in_bytes",
215193
]
216194

217195
for p in cgroup_files:
218-
key = f"cgroup:{os.path.basename(p)}"
219-
if os.path.exists(p):
220-
try:
221-
with open(p, "r", encoding="utf-8") as f:
222-
val = f.read().strip()
223-
items.append((key, val))
224-
except Exception as e:
225-
items.append((key + "_READ_FAIL", f"{type(e).__name__}: {e}"))
226-
else:
227-
items.append((key, "NOT_FOUND"))
196+
if not os.path.exists(p):
197+
continue
198+
try:
199+
with open(p, "r", encoding="utf-8") as f:
200+
val = f.read().strip()
201+
# truncate just in case
202+
if len(val) > 64:
203+
val = val[:64] + "..."
204+
items.append((os.path.basename(p), val))
205+
except Exception as e:
206+
items.append((os.path.basename(p) + "_READ_FAIL", f"{type(e).__name__}: {e}"))
228207

229208
_add_common_timing(items, t_handler0)
230209
return _result(False, items)
@@ -268,18 +247,15 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
268247
_add_common_timing(items, t_handler0)
269248
return _result(False, items)
270249

271-
if diag == "alloc":
250+
if diag == "alloc_once":
272251
try:
273-
step_mb = int(_pget(params, "alloc_step_mb", 64))
274-
max_mb = int(_pget(params, "alloc_max_mb", 1024))
275-
chunks = []
276-
allocated = 0
277-
while allocated + step_mb <= max_mb:
278-
chunks.append(bytearray(step_mb * 1024 * 1024))
279-
allocated += step_mb
280-
items.append(("alloc_mb", str(allocated)))
281-
_add_common_timing(items, t_handler0)
282-
items.append(("ALLOC_DONE", f"{allocated}MB"))
252+
step_mb = int(_pget(params, "alloc_step_mb", 16))
253+
items.append(("alloc_step_mb", str(step_mb)))
254+
255+
# allocate one chunk only
256+
_buf = bytearray(step_mb * 1024 * 1024)
257+
items.append(("alloc_status", f"allocated {step_mb}MB ✅"))
258+
283259
_add_common_timing(items, t_handler0)
284260
return _result(False, items)
285261
except Exception as e:

0 commit comments

Comments
 (0)