Skip to content

Commit 09a488b

Browse files
committed
fix(httpserver,router): defensive group_request_id init; reorder is_aborted skip
- httpserver: initialize group_request_id=None so the ValueError except handler does not hit UnboundLocalError when the oversized-prompt guard raises before alloc_req_id. - router: move the is_aborted skip after on_request_completed so aborted reqs still update completion stats, but do not pollute the router_statics EMA with their truncated output_len.
1 parent 9b86761 commit 09a488b

2 files changed

Lines changed: 8 additions & 4 deletions

File tree

lightllm/server/httpserver/manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,11 @@ async def generate(
298298
# 用于等待 pd_master 下发的交换信息
299299
nixl_pd_event: asyncio.Event = None,
300300
) -> AsyncGenerator[Tuple[int, str, dict, FinishStatus], None]:
301+
group_request_id = None
301302
if isinstance(prompt, str):
303+
# Guard against extremely long string prompts that might stall the tokenizer
304+
# or cause excessive memory usage before tokenization.
305+
# 8 characters per token is a conservative heuristic (avg is ~4).
302306
max_prompt_chars = self.max_req_total_len * 8
303307
if len(prompt) > max_prompt_chars:
304308
raise ValueError(

lightllm/server/router/manager.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -347,16 +347,16 @@ def _filter_reqs_from_running_batch(self):
347347
# Settle any output-token tail produced after the last window boundary,
348348
# so windowed TPS does not lose the req's last tokens.
349349
self.status_reporter.discard_req(req)
350-
# Aborted/disconnected requests can leave a partial output_len that
351-
# would bias the EMA toward shorter generations; skip them.
352-
if req.is_aborted:
353-
continue
354350
self.status_reporter.on_request_completed(
355351
input_len=req.input_len,
356352
output_len=req.shm_cur_output_len,
357353
cache_len=req.prompt_cache_len,
358354
mtp_accepted=req.mtp_accepted_token_num,
359355
)
356+
# Aborted/disconnected requests can leave a partial output_len that
357+
# would bias the EMA toward shorter generations; skip them.
358+
if req.is_aborted:
359+
continue
360360
self.router_statics.update(req.candetoken_out_len)
361361
self.running_batch.filter_out_finished_req(self.shm_req_manager)
362362
if self.running_batch.is_clear():

0 commit comments

Comments
 (0)