Skip to content

Commit d8ad653

Browse files
fix scheduler
1 parent 7407123 commit d8ad653

1 file changed

Lines changed: 19 additions & 1 deletion

File tree

src/main/python/systemds/scuro/drsearch/node_scheduler.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,11 @@ def is_finished(self) -> bool:
150150
self._initialized = True
151151
return False
152152

153+
if self.not_enough_memory():
154+
self.deadlock = True
155+
self.success = False
156+
return True
157+
153158
if self._is_deadlock():
154159
self.deadlock = True
155160
self.success = False
@@ -250,13 +255,26 @@ def _is_success(self) -> bool:
250255
def _is_deadlock(self) -> bool:
251256
pending_nodes = self._get_pending_nodes()
252257
blocked = len(pending_nodes) > 0
258+
# if len(self.running_nodes) == 0 and len(self.ready_nodes) == 0 and blocked:
259+
# return True
253260

254261
for node_id in pending_nodes:
255262
if node_id not in self.blocked_memory_nodes_perm:
256263
blocked = False
257264
break
258265

259-
return len(self.running_nodes) == 0 and self.ready_nodes == [] and blocked
266+
return blocked
267+
268+
def not_enough_memory(self) -> bool:
269+
for node_id in self._get_pending_nodes():
270+
cpu_mem, gpu_mem = self.node_resources[node_id]
271+
if cpu_mem > self.memory_budget["cpu"] - self.memory_stats["cpu_in_use"]:
272+
return True
273+
if gpu_mem > 0.0 and self.n_gpu > 0:
274+
gpu_id = self._gpu_with_most_free_memory(gpu_mem)
275+
if gpu_id is None:
276+
return True
277+
return self.memory_stats["cpu_in_use"] > self.memory_budget["cpu"]
260278

261279
def _check_memory_constraints(self, node_id: str) -> bool:
262280
cpu_mem, gpu_mem = self.node_resources[node_id]

0 commit comments

Comments
 (0)