@@ -163,68 +163,47 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
163163 _add_common_timing (items , t_handler0 )
164164 return _result (False , items )
165165
166- # MEM) Environment & memory diagnostics (no torch/ultralytics)
166+ # ----------------------------
167+ # MEM) Short env/memory diagnostics (keep output small)
167168 # ----------------------------
168169 if diag == "mem" :
169170 try :
170171 import platform
171172 import resource
172173 import sys
173174
174- items .append (("MEM" , "env/memory diagnostics" ))
175175 items .append (("platform" , platform .platform ()))
176176 items .append (("python_version" , platform .python_version ()))
177- items .append (("python_implementation" , platform .python_implementation ()))
178177 items .append (("pid" , str (os .getpid ())))
179178
180- # sys.path (truncate to avoid huge output)
181- try :
182- sp = "\n " .join (sys .path [:20 ])
183- items .append (("sys_path_head" , _escape_html (sp ).replace ("\n " , "<br>" )))
184- except Exception as e :
185- items .append (("sys_path_head_FAIL" , f"{ type (e ).__name__ } : { e } " ))
186-
187- # ru_maxrss: max resident set size so far
188- # On Linux: typically KB; on macOS: bytes. Platform is Linux here.
179+ # ru_maxrss (Linux KB)
189180 try :
190181 rss = resource .getrusage (resource .RUSAGE_SELF ).ru_maxrss
191- items .append (("ru_maxrss_raw" , str (rss )))
192- # Best-effort human-friendly conversion assuming Linux KB.
193- try :
194- rss_mb = float (rss ) / 1024.0
195- items .append (("ru_maxrss_mb_est" , f"{ rss_mb :.2f} " ))
196- except Exception :
197- pass
182+ items .append (("ru_maxrss_kb" , str (rss )))
183+ items .append (("ru_maxrss_mb_est" , f"{ (float (rss ) / 1024.0 ):.2f} " ))
198184 except Exception as e :
199185 items .append (("ru_maxrss_FAIL" , f"{ type (e ).__name__ } : { e } " ))
200186
201- # cgroup memory limits / usage
202- # We ALWAYS emit FOUND/NOT_FOUND for each path.
187+ # Only read the most important cgroup files; don't spam NOT_FOUND
203188 cgroup_files = [
204- # cgroup v2 common files
205- "/sys/fs/cgroup/memory.max" ,
206- "/sys/fs/cgroup/memory.high" ,
189+ "/sys/fs/cgroup/memory.max" , # cgroup v2
207190 "/sys/fs/cgroup/memory.current" ,
208- "/sys/fs/cgroup/memory.swap.max" ,
209- "/sys/fs/cgroup/cpu.max" ,
210- # cgroup v1 common files
211- "/sys/fs/cgroup/memory/memory.limit_in_bytes" ,
212- "/sys/fs/cgroup/memory/memory.soft_limit_in_bytes" ,
191+ "/sys/fs/cgroup/memory/memory.limit_in_bytes" , # cgroup v1
213192 "/sys/fs/cgroup/memory/memory.usage_in_bytes" ,
214- "/sys/fs/cgroup/memory/memory.max_usage_in_bytes" ,
215193 ]
216194
217195 for p in cgroup_files :
218- key = f"cgroup:{ os .path .basename (p )} "
219- if os .path .exists (p ):
220- try :
221- with open (p , "r" , encoding = "utf-8" ) as f :
222- val = f .read ().strip ()
223- items .append ((key , val ))
224- except Exception as e :
225- items .append ((key + "_READ_FAIL" , f"{ type (e ).__name__ } : { e } " ))
226- else :
227- items .append ((key , "NOT_FOUND" ))
196+ if not os .path .exists (p ):
197+ continue
198+ try :
199+ with open (p , "r" , encoding = "utf-8" ) as f :
200+ val = f .read ().strip ()
201+ # truncate just in case
202+ if len (val ) > 64 :
203+ val = val [:64 ] + "..."
204+ items .append ((os .path .basename (p ), val ))
205+ except Exception as e :
206+ items .append ((os .path .basename (p ) + "_READ_FAIL" , f"{ type (e ).__name__ } : { e } " ))
228207
229208 _add_common_timing (items , t_handler0 )
230209 return _result (False , items )
@@ -268,18 +247,15 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
268247 _add_common_timing (items , t_handler0 )
269248 return _result (False , items )
270249
271- if diag == "alloc " :
250+ if diag == "alloc_once " :
272251 try :
273- step_mb = int (_pget (params , "alloc_step_mb" , 64 ))
274- max_mb = int (_pget (params , "alloc_max_mb" , 1024 ))
275- chunks = []
276- allocated = 0
277- while allocated + step_mb <= max_mb :
278- chunks .append (bytearray (step_mb * 1024 * 1024 ))
279- allocated += step_mb
280- items .append (("alloc_mb" , str (allocated )))
281- _add_common_timing (items , t_handler0 )
282- items .append (("ALLOC_DONE" , f"{ allocated } MB" ))
252+ step_mb = int (_pget (params , "alloc_step_mb" , 16 ))
253+ items .append (("alloc_step_mb" , str (step_mb )))
254+
255+ # allocate one chunk only
256+ _buf = bytearray (step_mb * 1024 * 1024 )
257+ items .append (("alloc_status" , f"allocated { step_mb } MB ✅" ))
258+
283259 _add_common_timing (items , t_handler0 )
284260 return _result (False , items )
285261 except Exception as e :
0 commit comments