Skip to content

Commit 593520a

Browse files
authored
[AINode] Add callback and release resources (#16070)
1 parent f481308 commit 593520a

4 files changed

Lines changed: 11 additions & 12 deletions

File tree

iotdb-core/ainode/ainode/core/ainode.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ def _handle_signal(self, signum, frame):
159159
def stop(self):
160160
if not self._stop_event.is_set():
161161
self._stop_event.set()
162+
self._rpc_handler.stop()
162163
if self._rpc_service:
163164
self._rpc_service.stop()
164165
self._rpc_service.join(1)

iotdb-core/ainode/ainode/core/inference/inference_request_pool.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -175,18 +175,10 @@ def run(self):
175175
execute_daemon.start()
176176
for thread in self._threads:
177177
thread.join()
178+
logger.info(
179+
f"[Inference][Device-{self.device}][Pool-{self.pool_id}] InferenceRequestPool exited cleanly."
180+
)
178181

179182
def stop(self):
180183
self._stop_event.set()
181-
logger.info(
182-
f"[Inference][Device-{self.device}][Pool-{self.pool_id}] Stopping and releasing resources."
183-
)
184-
try:
185-
del self.model
186-
if "cuda" in str(self.device):
187-
torch.cuda.empty_cache()
188-
gc.collect()
189-
except Exception as e:
190-
logger.warning(
191-
f"[Inference][Device-{self.device}][Pool-{self.pool_id}] Failed to clean up: {e}"
192-
)
184+
logger.debug(f"[Inference][Pool-{self.pool_id}] stop() called")

iotdb-core/ainode/ainode/core/manager/inference_manager.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,8 @@ def shutdown(self):
310310
while not requestQueue.empty():
311311
requestQueue.get_nowait()
312312
requestQueue.close()
313+
for requestPool, _ in pools:
314+
requestPool.join(timeout=10)
313315
while not self._result_queue.empty():
314316
self._result_queue.get_nowait()
315317
self._result_queue.close()

iotdb-core/ainode/ainode/core/rpc/handler.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ def __init__(self, ainode):
4646
self._model_manager = ModelManager()
4747
self._inference_manager = InferenceManager()
4848

49+
def stop(self) -> None:
50+
logger.info("Stopping the RPC service handler of IoTDB-AINode...")
51+
self._inference_manager.shutdown()
52+
4953
def stopAINode(self) -> TSStatus:
5054
self._ainode.stop()
5155
return get_status(TSStatusCode.SUCCESS_STATUS, "AINode stopped successfully.")

0 commit comments

Comments
 (0)