@@ -1278,17 +1278,30 @@ def _control_pause(self, control_request: ControlRequest):
12781278 def _wait_inflight_drained (self ):
12791279 """
12801280 Wait until resource_manager.requests is completely empty.
1281- No timeout — abort pipeline will complete. Aligned with SGLang's poll-until-drained.
1281+ No timeout — abort pipeline will complete.
1282+ Logs a warning every 30 seconds while waiting to help diagnose potential hangs.
12821283 """
1283- start_time = time .time ()
1284- while (
1285- self .resource_manager .requests
1286- or self .scheduler .requests
1287- or self .resource_manager .waiting_abort_req_id_set
1288- or self .resource_manager .to_be_aborted_req_id_set
1289- ):
1284+ start_time = time .monotonic ()
1285+ next_warn_time = start_time + 30
1286+
1287+ while self .resource_manager .requests or self .scheduler .requests :
1288+ now = time .monotonic ()
1289+
1290+ if now >= next_warn_time :
1291+ self .llm_logger .warning (
1292+ "Still waiting for inflight requests to drain, "
1293+ f"elapsed: { now - start_time :.3f} seconds, "
1294+ f"resource_manager.requests: { len (self .resource_manager .requests )} , "
1295+ f"scheduler.requests: { len (self .scheduler .requests )} " ,
1296+ )
1297+ next_warn_time = now + 30
1298+
12901299 time .sleep (0.005 )
1291- self .llm_logger .info (f"All inflight requests drained, take time: { time .time () - start_time :.3f} seconds" )
1300+
1301+ self .llm_logger .info (
1302+ "All inflight requests drained, take time: %.3f seconds" ,
1303+ time .monotonic () - start_time ,
1304+ )
12921305
12931306 def _control_resume (self , control_request : ControlRequest ) -> Optional [dict ]:
12941307 """Control function for resuming request generation.
0 commit comments