Skip to content

Commit 8883757

Browse files
[BugFix] Fix bugs in /v1/abort_requests interface from PR(PaddlePaddle#6992) (PaddlePaddle#7176)
* abort api bug fix * bug fix --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
1 parent d8cdda8 commit 8883757

4 files changed

Lines changed: 23 additions & 7 deletions

File tree

fastdeploy/engine/common_engine.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1614,13 +1614,14 @@ def _control_abort_requests(self, control_req: ControlRequest):
16141614
engine_recv_first_token_time=request.metrics.engine_recv_first_token_time if request.metrics else now,
16151615
request_start_time=request.metrics.arrival_time if request.metrics else now,
16161616
)
1617+
eos_token_ids = getattr(request, "eos_token_ids", [0])
16171618
result = RequestOutput(
16181619
request_id=req_id,
16191620
finished=True,
16201621
outputs=CompletionOutput(
16211622
index=0,
16221623
send_idx=len(partial_token_ids),
1623-
token_ids=[self.data_processor.eos_token_ids[0]],
1624+
token_ids=[eos_token_ids[0]],
16241625
),
16251626
metrics=abort_metrics,
16261627
error_code=200,
@@ -1664,10 +1665,19 @@ def _wait_abort_complete(self, target_req_ids, stall_timeout=1):
16641665
reset progress state if any, then continue monitoring
16651666
"""
16661667
target_set = set(target_req_ids)
1668+
target_set = target_set & (set(self.resource_manager.requests.keys()) | set(self.scheduler.requests.keys()))
16671669
prev_remaining_count = len(target_set)
16681670
last_progress_time = time.time()
16691671
remaining = target_set & self.resource_manager.get_reqs_in_aborting()
16701672
while remaining:
1673+
alive_reqs = set(self.resource_manager.requests.keys()) | set(self.scheduler.requests.keys())
1674+
finished_reqs = target_set - alive_reqs
1675+
if finished_reqs:
1676+
self.llm_logger.info(f"abort targets already finished, skip: {finished_reqs}")
1677+
for req_id in finished_reqs:
1678+
self.resource_manager.waiting_abort_req_id_set.discard(req_id)
1679+
self.resource_manager.to_be_aborted_req_id_set.discard(req_id)
1680+
target_set -= finished_reqs
16711681
remaining = target_set & self.resource_manager.get_reqs_in_aborting()
16721682
if not remaining:
16731683
self.llm_logger.info(f"all {len(target_set)} abort reqs cleaned")

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ def recycle_abort_task(self, request_id):
315315
self.stop_flags[request.idx] = True # 设置停止标志
316316
del self.requests[request_id]
317317
del self.req_dict[request_id]
318-
self.to_be_aborted_req_id_set.remove(request_id)
318+
self.to_be_aborted_req_id_set.discard(request_id)
319319
self.update_metrics()
320320

321321
def _trigger_abort(self, request_id, batch_request):
@@ -327,7 +327,7 @@ def _trigger_abort(self, request_id, batch_request):
327327
abort_request.cached_block_num = 0
328328
batch_request.add_request(self._prepare_abort_task(abort_request))
329329
self.to_be_aborted_req_id_set.add(request_id)
330-
self.waiting_abort_req_id_set.remove(request_id)
330+
self.waiting_abort_req_id_set.discard(request_id)
331331

332332
def _info_each_block(self):
333333
"""
@@ -1622,6 +1622,8 @@ def finish_requests(self, request_ids: Union[str, Iterable[str]]):
16221622
del self.requests[req_id]
16231623
if req_id in self.req_dict:
16241624
del self.req_dict[req_id]
1625+
self.waiting_abort_req_id_set.discard(req_id)
1626+
self.to_be_aborted_req_id_set.discard(req_id)
16251627

16261628
# Do not block the main thread here
16271629
# Write cache to storage if kvcache_storage_backend is enabled

fastdeploy/entrypoints/openai/protocol.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ class ChatCompletionResponseChoice(BaseModel):
270270
logprobs: Optional[LogProbs] = None
271271
draft_logprobs: Optional[LogProbs] = None
272272
prompt_logprobs: Optional[PromptLogprobs] = None
273-
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]]
273+
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop", "abort"]]
274274
speculate_metrics: Optional[SpeculateMetrics] = None
275275

276276

@@ -335,7 +335,7 @@ class ChatCompletionResponseStreamChoice(BaseModel):
335335
logprobs: Optional[LogProbs] = None
336336
draft_logprobs: Optional[LogProbs] = None
337337
prompt_logprobs: Optional[PromptLogprobs] = None
338-
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]] = None
338+
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop", "abort"]] = None
339339
arrival_time: Optional[float] = None
340340
speculate_metrics: Optional[SpeculateMetrics] = None
341341

@@ -371,7 +371,7 @@ class CompletionResponseChoice(BaseModel):
371371
draft_logprobs: Optional[CompletionLogprobs] = None
372372
prompt_logprobs: Optional[PromptLogprobs] = None
373373
reasoning_content: Optional[str] = None
374-
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]] = None
374+
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop", "abort"]] = None
375375
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
376376
speculate_metrics: Optional[SpeculateMetrics] = None
377377

@@ -417,7 +417,7 @@ class CompletionResponseStreamChoice(BaseModel):
417417
prompt_tokens: Optional[str] = None
418418
completion_tokens: Optional[str] = None
419419
reasoning_content: Optional[str] = None
420-
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]] = None
420+
finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop", "abort"]] = None
421421
tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
422422
speculate_metrics: Optional[SpeculateMetrics] = None
423423

tests/engine/test_common_engine.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3700,6 +3700,8 @@ def test_wait_abort_complete_progress(self):
37003700
"""_wait_abort_complete exits when background thread cleans up."""
37013701
eng = self._make_abort_engine()
37023702
eng.resource_manager.waiting_abort_req_id_set = {"req-1_0"}
3703+
# Add the request to requests dict so it won't be filtered out
3704+
eng.resource_manager.requests = {"req-1_0": self._make_fake_request()}
37033705

37043706
call_count = [0]
37053707

@@ -3718,6 +3720,8 @@ def test_wait_abort_complete_force_cleanup_stuck_in_to_be_aborted(self):
37183720
"""Stall timeout triggers force cleanup for requests in to_be_aborted_req_id_set."""
37193721
eng = self._make_abort_engine()
37203722
eng.resource_manager.to_be_aborted_req_id_set = {"req-1_0"}
3723+
# Add the request to requests dict so it won't be filtered out
3724+
eng.resource_manager.requests = {"req-1_0": self._make_fake_request()}
37213725

37223726
def mock_recycle(req_id):
37233727
eng.resource_manager.to_be_aborted_req_id_set.discard(req_id)

0 commit comments

Comments
 (0)