Skip to content

Commit 04e4ae8

Browse files
authored
[Cherry-Pick][BugFix] Fix pause drain hang caused by stale abort markers(#7825) (#7826)
1 parent 72beb9e commit 04e4ae8

1 file changed

Lines changed: 22 additions & 9 deletions

File tree

fastdeploy/engine/common_engine.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,17 +1278,30 @@ def _control_pause(self, control_request: ControlRequest):
12781278
def _wait_inflight_drained(self):
12791279
"""
12801280
Wait until resource_manager.requests is completely empty.
1281-
No timeout — abort pipeline will complete. Aligned with SGLang's poll-until-drained.
1281+
No timeout — abort pipeline will complete.
1282+
Logs a warning every 30 seconds while waiting to help diagnose potential hangs.
12821283
"""
1283-
start_time = time.time()
1284-
while (
1285-
self.resource_manager.requests
1286-
or self.scheduler.requests
1287-
or self.resource_manager.waiting_abort_req_id_set
1288-
or self.resource_manager.to_be_aborted_req_id_set
1289-
):
1284+
start_time = time.monotonic()
1285+
next_warn_time = start_time + 30
1286+
1287+
while self.resource_manager.requests or self.scheduler.requests:
1288+
now = time.monotonic()
1289+
1290+
if now >= next_warn_time:
1291+
self.llm_logger.warning(
1292+
"Still waiting for inflight requests to drain, "
1293+
f"elapsed: {now - start_time:.3f} seconds, "
1294+
f"resource_manager.requests: {len(self.resource_manager.requests)}, "
1295+
f"scheduler.requests: {len(self.scheduler.requests)}",
1296+
)
1297+
next_warn_time = now + 30
1298+
12901299
time.sleep(0.005)
1291-
self.llm_logger.info(f"All inflight requests drained, take time: {time.time() - start_time:.3f} seconds")
1300+
1301+
self.llm_logger.info(
1302+
"All inflight requests drained, take time: %.3f seconds",
1303+
time.monotonic() - start_time,
1304+
)
12921305

12931306
def _control_resume(self, control_request: ControlRequest) -> Optional[dict]:
12941307
"""Control function for resuming request generation.

0 commit comments

Comments
 (0)