11import json
2+ import logging
23import os
4+ import threading
35import traceback
46
57from flask import Flask , jsonify , request , Response
911import constants
1012from exceptions .exceptions import CustomError
1113from services .management_service import ManagementService , Action , BackendStatus
14+ from utils .logger import log
1215from .management_routes import ManagementRoutes
1316from .serverless_api_routes import ServerlessApiRoutes
1417from .cpu_routes import CpuRoutes
@@ -19,16 +22,17 @@ class Routes:
1922 def __init__ (self ):
2023 self .app = Flask (__name__ )
2124 self ._sock = Sock (self .app )
25+ # 重启锁,防止并发重启
26+ self ._reboot_lock = threading .Lock ()
2227 self .setup_routes ()
23- import logging
24- log = logging .getLogger ('werkzeug' )
25- log .setLevel (logging .ERROR )
28+ # 设置 Werkzeug 日志级别为 ERROR,只显示错误日志,不输出每个请求
29+ logging .getLogger ('werkzeug' ).setLevel (logging .ERROR )
2630
2731
2832 def setup_routes (self ):
2933 # 管控API
30- management = ManagementRoutes ()
31- management .register (self .app )
34+ self . management = ManagementRoutes ()
35+ self . management .register (self .app )
3236
3337 # ServerlessAPI
3438 if constants .BACKEND_TYPE == constants .TYPE_COMFYUI :
@@ -39,11 +43,118 @@ def setup_routes(self):
3943 cpu_router = CpuRoutes ()
4044 cpu_router .register (self .app )
4145
46+ @self .app .route ("/api/manager/reboot" , methods = ["GET" , "POST" ])
47+ def manager_reboot ():
48+ """
49+ 拦截 ComfyUI-Manager 的 reboot 请求,使用管控接口实现重启
50+ 支持从 CPU 函数触发:
51+ - X-FunArt-Snapshot-Name header: 指定要使用的 snapshot 名称
52+ - X-Forwarded-By header: 标识请求来源
53+
54+ 重启是异步的,立即返回响应,通过/management/status接口查询重启状态
55+ """
56+ # 检查是否已有重启在进行中
57+ if not self ._reboot_lock .acquire (blocking = False ):
58+ log ("WARNING" , "Reboot request rejected: reboot already in progress" )
59+ return jsonify ({
60+ "status" : "failed" ,
61+ "message" : "Reboot already in progress, please wait"
62+ }), 409
63+
64+ # 优先从 header 中获取 snapshot 名称(CPU 函数传递)
65+ snapshot_from_header = request .headers .get (constants .HEADER_SNAPSHOT_NAME )
66+ forwarded_by = request .headers .get (constants .HEADER_FORWARDED_BY , 'Direct' )
67+
68+ service = ManagementService ()
69+ current_snapshot = service .cur_snapshot_name or 'latest-dev'
70+
71+ log ("INFO" , f"Intercepted /api/manager/reboot request from { forwarded_by } , current snapshot: { current_snapshot } " )
72+
73+ # 异步执行重启逻辑
74+ def do_reboot ():
75+ try :
76+ # 如果是 CPU 函数触发且指定了 snapshot,直接使用,不再保存
77+ if snapshot_from_header :
78+ log ("INFO" , f"Using snapshot from CPU function: { snapshot_from_header } " )
79+ snapshot_to_load = snapshot_from_header
80+ skip_save = True
81+ else :
82+ snapshot_to_load = current_snapshot
83+ skip_save = False
84+
85+ # 若最近一次管控操作为Start 且未指定 snapshot,则在重启前保存工作空间
86+ # TODO: 是否会影响pre-stop逻辑?
87+ if not skip_save and service .latest_action and service .latest_action == Action .START :
88+ try :
89+ from services .workspace .snapshot_manager import SnapshotManager
90+ log ("INFO" , "Saving workspace before reboot..." )
91+ result_map = service .save (SnapshotManager .TYPE_DEV )
92+ log ("INFO" , f"Save result before reboot: { json .dumps (result_map , indent = 2 )} " )
93+
94+ # 使用新保存的 snapshot 名称
95+ if 'snapshot' in result_map :
96+ snapshot_to_load = result_map ['snapshot' ]
97+ log ("INFO" , f"Will restart with newly saved snapshot: { snapshot_to_load } " )
98+ except Exception as e :
99+ log ("WARNING" , f"Failed to save workspace before reboot: { str (e )} " )
100+ else :
101+ log ("INFO" , "Skip saving workspace (latest_action is not START)" )
102+
103+ # 停止服务
104+ try :
105+ service .stop ()
106+ except Exception as e :
107+ log ("WARNING" , f"Error during stop: { e } " )
108+
109+ # 如果是 CPU 模式,异步触发 GPU 函数的重启
110+ if constants .COMFYUI_MODE == 'cpu' and constants .GPU_FUNCTION_URL :
111+ def trigger_gpu_reboot ():
112+ try :
113+ gpu_reboot_url = f"{ constants .GPU_FUNCTION_URL .rstrip ('/' )} /api/manager/reboot"
114+ log ("INFO" , f"Triggering GPU function reboot: { gpu_reboot_url } " )
115+
116+ gpu_headers = {
117+ constants .HEADER_SNAPSHOT_NAME : snapshot_to_load ,
118+ constants .HEADER_FORWARDED_BY : 'CPU-Reboot-Trigger' ,
119+ constants .HEADER_FC_INVOCATION_TYPE : 'Async'
120+ }
121+
122+ gpu_resp = requests .post (gpu_reboot_url , headers = gpu_headers , timeout = 10 )
123+ log ("INFO" , f"GPU function reboot triggered: status={ gpu_resp .status_code } " )
124+ except requests .exceptions .Timeout :
125+ log ("WARNING" , "GPU function reboot request timed out (expected for async call)" )
126+ except Exception as e :
127+ log ("WARNING" , f"Failed to trigger GPU function reboot: { str (e )} " )
128+
129+ # 触发 GPU 重启
130+ threading .Thread (target = trigger_gpu_reboot , daemon = True ).start ()
131+
132+ # 重新启动本地服务(不安装依赖),使用新保存的 snapshot
133+ log ("INFO" , f"Restarting local service with snapshot: { snapshot_to_load } " )
134+ service .start (snapshot_to_load , nodes_map = service .SKIP_INSTALL_SENTINEL )
135+ log ("INFO" , "Reboot completed successfully" )
136+
137+ except Exception as e :
138+ error_msg = f"Failed to restart ComfyUI: { str (e )} "
139+ log ("ERROR" , f"{ error_msg } \n Stacktrace:\n { traceback .format_exc ()} " )
140+ finally :
141+ # 释放重启锁
142+ self ._reboot_lock .release ()
143+
144+ # 在后台线程中执行重启
145+ threading .Thread (target = do_reboot , daemon = True ).start ()
146+
147+ # 立即返回响应
148+ return jsonify ({
149+ "status" : "success" ,
150+ "message" : "Reboot request accepted, restarting in background"
151+ }), 202
152+
42153 @self .app .route ("/initialize" , methods = ["POST" ])
43154 def initialize ():
44155 # See FC docs for all the HTTP headers: https://www.alibabacloud.com/help/doc-detail/132044.htm#common-headers
45156 request_id = request .headers .get ("x-fc-request-id" , "" )
46- print ( " FC Initialize Start RequestId: " + request_id )
157+ log ( "INFO" , f" FC Initialize Start RequestId: { request_id } " )
47158
48159 # Use the following code to get temporary credentials
49160 # access_key_id = request.headers['x-fc-access-key-id']
@@ -56,7 +167,7 @@ def initialize():
56167
57168 # 使用环境变量指定的snapshot,默认为latest-dev
58169 snapshot_name = os .environ .get ('AUTO_LAUNCH_SNAPSHOT_NAME' , 'latest-dev' )
59- print ( f"Initializing function with ComfyUI mode: { constants .COMFYUI_MODE } , snapshot: { snapshot_name } " )
170+ log ( "INFO" , f"Initializing function with ComfyUI mode: { constants .COMFYUI_MODE } , snapshot: { snapshot_name } " )
60171 service .start (snapshot_name , nodes_map = {})
61172
62173 if (
@@ -65,22 +176,22 @@ def initialize():
65176 and constants .COMFYUI_MODE == "gpu"
66177 ):
67178 try :
68- print ( "prewarm models" )
179+ log ( "INFO" , "prewarm models" )
69180 prompt = json .loads (constants .PREWARM_PROMPT )
70181 api = ServerlessApiService ()
71182 api .run (prompt )
72183 api .api_clear_history ()
73- print ( "prewarm models done" )
184+ log ( "INFO" , "prewarm models done" )
74185 except Exception as e :
75- print ( f"prewarm models got exception:\n { e } " )
186+ log ( "ERROR" , f"prewarm models got exception:\n { e } " )
76187
77- print ( " FC Initialize End RequestId: " + request_id )
188+ log ( "INFO" , f" FC Initialize End RequestId: { request_id } " )
78189 return "Function is initialized, request_id: " + request_id + "\n "
79190
80191 @self .app .route ("/pre-stop" , methods = ["GET" ])
81192 def pre_stop ():
82193 request_id = request .headers .get ("x-fc-request-id" , "" )
83- print ( " FC PreStop Start RequestId: " + request_id )
194+ log ( "INFO" , f" FC PreStop Start RequestId: { request_id } " )
84195
85196 service = ManagementService () # singleton
86197 # 若最近一次管控操作为Start,且实例非预期销毁时,需要在pre-stop中保存工作空间从而兜底;
@@ -89,18 +200,18 @@ def pre_stop():
89200 try :
90201 from services .workspace .snapshot_manager import SnapshotManager
91202 result_map = service .save (SnapshotManager .TYPE_DEV )
92- print ( f"save resp when preStop: { json .dumps (result_map , indent = 2 )} " )
203+ log ( "INFO" , f"save resp when preStop: { json .dumps (result_map , indent = 2 )} " )
93204 except Exception as e :
94- print ( f"error occur when preStop: { str (e )} " )
205+ log ( "ERROR" , f"error occur when preStop: { str (e )} " )
95206 else :
96- print ( "Do nothing in pre-stop" )
97- print ( " FC PreStop End RequestId: " + request_id )
207+ log ( "INFO" , "Do nothing in pre-stop" )
208+ log ( "INFO" , f" FC PreStop End RequestId: { request_id } " )
98209 return "OK"
99210
100211 @self .app .route ("/<path:path>" , methods = ["GET" , "POST" , "PUT" , "DELETE" , "PATCH" , "HEAD" , "OPTIONS" ])
101212 @self .app .route ("/" , methods = ["GET" , "POST" , "PUT" , "DELETE" , "PATCH" , "HEAD" , "OPTIONS" ])
102213 def proxy (path = "" ):
103- backend_status = management .service .status
214+ backend_status = self . management .service .status
104215 if backend_status not in (BackendStatus .RUNNING , BackendStatus .SAVING ):
105216 return jsonify ({
106217 "status" : "failed" ,
@@ -146,7 +257,7 @@ def handle_base_error(error):
146257
147258 def _handle_exception (e ):
148259 err_msg = traceback .format_exc ()
149- print ( f"{ str (e )} \n Stacktrace:\n { err_msg } " )
260+ log ( "ERROR" , f"{ str (e )} \n Stacktrace:\n { err_msg } " )
150261
151262 if isinstance (e , CustomError ):
152263 # 处理自定义异常
0 commit comments