File tree Expand file tree Collapse file tree
src/main/python/systemds/scuro/drsearch Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -150,6 +150,11 @@ def is_finished(self) -> bool:
150150 self ._initialized = True
151151 return False
152152
153+ if self .not_enough_memory ():
154+ self .deadlock = True
155+ self .success = False
156+ return True
157+
153158 if self ._is_deadlock ():
154159 self .deadlock = True
155160 self .success = False
@@ -250,13 +255,26 @@ def _is_success(self) -> bool:
250255 def _is_deadlock (self ) -> bool :
251256 pending_nodes = self ._get_pending_nodes ()
252257 blocked = len (pending_nodes ) > 0
258+ # if len(self.running_nodes) == 0 and len(self.ready_nodes) == 0 and blocked:
259+ # return True
253260
254261 for node_id in pending_nodes :
255262 if node_id not in self .blocked_memory_nodes_perm :
256263 blocked = False
257264 break
258265
259- return len (self .running_nodes ) == 0 and self .ready_nodes == [] and blocked
266+ return blocked
267+
268+ def not_enough_memory (self ) -> bool :
269+ for node_id in self ._get_pending_nodes ():
270+ cpu_mem , gpu_mem = self .node_resources [node_id ]
271+ if cpu_mem > self .memory_budget ["cpu" ] - self .memory_stats ["cpu_in_use" ]:
272+ return True
273+ if gpu_mem > 0.0 and self .n_gpu > 0 :
274+ gpu_id = self ._gpu_with_most_free_memory (gpu_mem )
275+ if gpu_id is None :
276+ return True
277+ return self .memory_stats ["cpu_in_use" ] > self .memory_budget ["cpu" ]
260278
261279 def _check_memory_constraints (self , node_id : str ) -> bool :
262280 cpu_mem , gpu_mem = self .node_resources [node_id ]
You can’t perform that action at this time.
0 commit comments