|
| 1 | +# Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +# or more contributor license agreements. See the NOTICE file |
| 3 | +# distributed with this work for additional information |
| 4 | +# regarding copyright ownership. The ASF licenses this file |
| 5 | +# to you under the Apache License, Version 2.0 (the |
| 6 | +# "License"); you may not use this file except in compliance |
| 7 | +# with the License. You may obtain a copy of the License at |
| 8 | +# |
| 9 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +# |
| 11 | +# Unless required by applicable law or agreed to in writing, |
| 12 | +# software distributed under the License is distributed on an |
| 13 | +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +# KIND, either express or implied. See the License for the |
| 15 | +# specific language governing permissions and limitations |
| 16 | +# under the License. |
| 17 | +# |
| 18 | +import os |
| 19 | +import signal |
| 20 | +import threading |
| 21 | +from datetime import datetime |
| 22 | + |
| 23 | +import psutil |
| 24 | + |
| 25 | +from ainode.core.config import AINodeDescriptor |
| 26 | +from ainode.core.constant import AINODE_SYSTEM_FILE_NAME |
| 27 | +from ainode.core.log import Logger |
| 28 | +from ainode.core.rpc.client import ClientManager |
| 29 | +from ainode.core.rpc.handler import AINodeRPCServiceHandler |
| 30 | +from ainode.core.rpc.service import AINodeRPCService |
| 31 | +from ainode.thrift.common.ttypes import ( |
| 32 | + TAINodeConfiguration, |
| 33 | + TAINodeLocation, |
| 34 | + TEndPoint, |
| 35 | + TNodeResource, |
| 36 | +) |
| 37 | +from ainode.thrift.confignode.ttypes import TNodeVersionInfo |
| 38 | + |
| 39 | +logger = Logger() |
| 40 | + |
| 41 | + |
| 42 | +def _generate_configuration() -> TAINodeConfiguration: |
| 43 | + location = TAINodeLocation( |
| 44 | + AINodeDescriptor().get_config().get_ainode_id(), |
| 45 | + TEndPoint( |
| 46 | + AINodeDescriptor().get_config().get_ain_inference_rpc_address(), |
| 47 | + AINodeDescriptor().get_config().get_ain_inference_rpc_port(), |
| 48 | + ), |
| 49 | + ) |
| 50 | + resource = TNodeResource(int(psutil.cpu_count()), int(psutil.virtual_memory()[0])) |
| 51 | + |
| 52 | + return TAINodeConfiguration(location, resource) |
| 53 | + |
| 54 | + |
| 55 | +def _generate_version_info() -> TNodeVersionInfo: |
| 56 | + return TNodeVersionInfo( |
| 57 | + AINodeDescriptor().get_config().get_version_info(), |
| 58 | + AINodeDescriptor().get_config().get_build_info(), |
| 59 | + ) |
| 60 | + |
| 61 | + |
| 62 | +def _check_path_permission(): |
| 63 | + system_path = AINodeDescriptor().get_config().get_ain_system_dir() |
| 64 | + if not os.path.exists(system_path): |
| 65 | + try: |
| 66 | + os.makedirs(system_path) |
| 67 | + os.chmod(system_path, 0o777) |
| 68 | + except PermissionError as e: |
| 69 | + logger.error(e) |
| 70 | + raise e |
| 71 | + |
| 72 | + |
| 73 | +def _generate_system_properties(ainode_id: int): |
| 74 | + return { |
| 75 | + "ainode_id": ainode_id, |
| 76 | + "cluster_name": AINodeDescriptor().get_config().get_cluster_name(), |
| 77 | + "iotdb_version": AINodeDescriptor().get_config().get_version_info(), |
| 78 | + "commit_id": AINodeDescriptor().get_config().get_build_info(), |
| 79 | + "ain_rpc_address": AINodeDescriptor() |
| 80 | + .get_config() |
| 81 | + .get_ain_inference_rpc_address(), |
| 82 | + "ain_rpc_port": AINodeDescriptor().get_config().get_ain_inference_rpc_port(), |
| 83 | + "config_node_list": AINodeDescriptor() |
| 84 | + .get_config() |
| 85 | + .get_ain_target_config_node_list(), |
| 86 | + } |
| 87 | + |
| 88 | + |
| 89 | +class AINode: |
| 90 | + def __init__(self): |
| 91 | + self._rpc_service = None |
| 92 | + self._rpc_handler = None |
| 93 | + self._stop_event = None |
| 94 | + |
| 95 | + def start(self): |
| 96 | + _check_path_permission() |
| 97 | + system_properties_file = os.path.join( |
| 98 | + AINodeDescriptor().get_config().get_ain_system_dir(), |
| 99 | + AINODE_SYSTEM_FILE_NAME, |
| 100 | + ) |
| 101 | + if not os.path.exists(system_properties_file): |
| 102 | + # If the system.properties file does not exist, the AINode will register to IoTDB cluster. |
| 103 | + try: |
| 104 | + logger.info("IoTDB-AINode is registering to IoTDB cluster...") |
| 105 | + ainode_id = ( |
| 106 | + ClientManager() |
| 107 | + .borrow_config_node_client() |
| 108 | + .node_register( |
| 109 | + AINodeDescriptor().get_config().get_cluster_name(), |
| 110 | + _generate_configuration(), |
| 111 | + _generate_version_info(), |
| 112 | + ) |
| 113 | + ) |
| 114 | + AINodeDescriptor().get_config().set_ainode_id(ainode_id) |
| 115 | + system_properties = _generate_system_properties(ainode_id) |
| 116 | + with open(system_properties_file, "w") as f: |
| 117 | + f.write("#" + str(datetime.now()) + "\n") |
| 118 | + for key, value in system_properties.items(): |
| 119 | + f.write(key + "=" + str(value) + "\n") |
| 120 | + except Exception as e: |
| 121 | + logger.error( |
| 122 | + "IoTDB-AINode failed to register to IoTDB cluster: {}".format(e) |
| 123 | + ) |
| 124 | + raise e |
| 125 | + else: |
| 126 | + # If the system.properties file does exist, the AINode will just restart. |
| 127 | + try: |
| 128 | + logger.info("IoTDB-AINode is restarting...") |
| 129 | + ClientManager().borrow_config_node_client().node_restart( |
| 130 | + AINodeDescriptor().get_config().get_cluster_name(), |
| 131 | + _generate_configuration(), |
| 132 | + _generate_version_info(), |
| 133 | + ) |
| 134 | + except Exception as e: |
| 135 | + logger.error("IoTDB-AINode failed to restart: {}".format(e)) |
| 136 | + raise e |
| 137 | + |
| 138 | + # Start the RPC service |
| 139 | + self._rpc_handler = AINodeRPCServiceHandler(aiNode=self) |
| 140 | + self._rpc_service = AINodeRPCService(self._rpc_handler) |
| 141 | + self._rpc_service.start() |
| 142 | + self._rpc_service.join(1) |
| 143 | + if self._rpc_service.exit_code != 0: |
| 144 | + logger.info("IoTDB-AINode failed to start, please check previous logs.") |
| 145 | + return |
| 146 | + |
| 147 | + logger.info("IoTDB-AINode has successfully started.") |
| 148 | + |
| 149 | + # Register stop hook |
| 150 | + self._stop_event = threading.Event() |
| 151 | + signal.signal(signal.SIGTERM, self._handle_signal) |
| 152 | + |
| 153 | + def _handle_signal(self, signum, frame): |
| 154 | + signal_name = {signal.SIGTERM: "SIGTERM", signal.SIGINT: "SIGINT"}.get( |
| 155 | + signum, f"SIGNAL {signum}" |
| 156 | + ) |
| 157 | + |
| 158 | + logger.info(f"IoTDB-AINode receives {signal_name}, initiating graceful stop...") |
| 159 | + self.stop() |
| 160 | + |
| 161 | + def stop(self): |
| 162 | + if not self._stop_event.is_set(): |
| 163 | + self._stop_event.set() |
| 164 | + if self._rpc_service: |
| 165 | + self._rpc_service.stop() |
| 166 | + self._rpc_service.join(1) |
| 167 | + logger.info("IoTDB-AINode has successfully stopped.") |
0 commit comments