Skip to content

Commit f184e8f

Browse files
committed
fix
1 parent 3f607ea commit f184e8f

4 files changed

Lines changed: 36 additions & 24 deletions

File tree

lightllm/server/core/objs/req.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,19 +25,20 @@ class FinishStatus(ctypes.Structure):
2525
NO_FINISH = 0
2626
FINISHED_STOP = 1
2727
FINISHED_LENGTH = 2
28+
FINISHED_ERROR = 3
2829

2930
def __init__(self, init_state=NO_FINISH):
3031
self.status = init_state
3132

3233
def set_status(self, new_status):
33-
assert 0 <= new_status <= 2
34+
assert 0 <= new_status <= 3
3435
self.status = new_status
3536

3637
def get_status(self):
3738
return self.status
3839

3940
def is_finished(self):
40-
return self.FINISHED_STOP <= self.status <= self.FINISHED_LENGTH
41+
return self.FINISHED_STOP <= self.status <= self.FINISHED_ERROR
4142

4243
def is_stopped(self):
4344
return self.status == self.FINISHED_STOP
@@ -50,6 +51,8 @@ def get_finish_reason(self):
5051
return "stop"
5152
elif self.status == self.FINISHED_LENGTH:
5253
return "length"
54+
elif self.status == self.FINISHED_ERROR:
55+
return "error"
5356
return None
5457

5558

lightllm/server/detokenization/manager.py

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -47,24 +47,30 @@ def _init_get_token_id_to_token_str(self):
4747
return
4848

4949
def _add_new_group_req_index(self, recv_obj: GroupReqIndexes):
50+
from lightllm.server.core.objs import FinishStatus
51+
5052
for req_index in recv_obj.shm_req_indexes:
5153
req = self.shm_req_manager.get_req_obj_by_index(req_index)
52-
req.link_prompt_ids_shm_array()
53-
req.link_logprobs_shm_array()
54-
55-
logger.debug(
56-
f"detokenization recv req id {req.request_id} " f"cost time {time.time() - recv_obj.time_mark} s"
57-
)
58-
59-
# p d 分离模式,decode节点的解码需要做一些特殊的修复。
60-
decode_req = DecodeReq(req, self.is_pd_decode_mode)
61-
if self.is_pd_decode_mode:
62-
decode_req = decode_mode_fix(decode_req, self.tokenizer, self.eos_id)
63-
# token_healing mode 的特殊初始化
64-
if self.args.token_healing_mode:
65-
decode_req.init_token_healing_prefix_str(self.token_id_to_token, self.tokenizer)
66-
67-
self.req_id_to_out[req.request_id] = decode_req
54+
try:
55+
req.link_prompt_ids_shm_array()
56+
req.link_logprobs_shm_array()
57+
58+
logger.debug(
59+
f"detokenization recv req id {req.request_id} " f"cost time {time.time() - recv_obj.time_mark} s"
60+
)
61+
62+
# p d 分离模式,decode节点的解码需要做一些特殊的修复。
63+
decode_req = DecodeReq(req, self.is_pd_decode_mode)
64+
if self.is_pd_decode_mode:
65+
decode_req = decode_mode_fix(decode_req, self.tokenizer, self.eos_id)
66+
# token_healing mode 的特殊初始化
67+
if self.args.token_healing_mode:
68+
decode_req.init_token_healing_prefix_str(self.token_id_to_token, self.tokenizer)
69+
70+
self.req_id_to_out[req.request_id] = decode_req
71+
except Exception as e:
72+
req.finish_status.set_status(FinishStatus.FINISHED_ERROR)
73+
raise e
6874
return
6975

7076
def handle_loop(self):
@@ -79,9 +85,8 @@ def handle_loop(self):
7985
try:
8086
self._add_new_group_req_index(recv_obj=recv_obj)
8187
except Exception:
82-
# TODO: publish an ERROR finish_status back to httpserver so the
83-
# client gets a 500 instead of hanging until disconnect.
8488
logger.exception("add new group req index has exception")
89+
self.pub_to_httpserver.send_pyobj(None, protocol=pickle.HIGHEST_PROTOCOL)
8590

8691
# 当队列中存在较多的请求时,将一次接受的数量上调
8792
recv_max_count = min(int(recv_max_count * 1.3), 256)

lightllm/server/httpserver/manager.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,11 @@ async def generate(
298298
# 用于等待 pd_master 下发的交换信息
299299
nixl_pd_event: asyncio.Event = None,
300300
) -> AsyncGenerator[Tuple[int, str, dict, FinishStatus], None]:
301+
group_request_id = None
301302
if isinstance(prompt, str):
303+
# Guard against extremely long string prompts that might stall the tokenizer
304+
# or cause excessive memory usage before tokenization.
305+
# 8 characters per token is a conservative heuristic (avg is ~4).
302306
max_prompt_chars = self.max_req_total_len * 8
303307
if len(prompt) > max_prompt_chars:
304308
raise ValueError(

lightllm/server/router/manager.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -347,16 +347,16 @@ def _filter_reqs_from_running_batch(self):
347347
# Settle any output-token tail produced after the last window boundary,
348348
# so windowed TPS does not lose the req's last tokens.
349349
self.status_reporter.discard_req(req)
350-
# Aborted/disconnected requests can leave a partial output_len that
351-
# would bias the EMA toward shorter generations; skip them.
352-
if req.is_aborted:
353-
continue
354350
self.status_reporter.on_request_completed(
355351
input_len=req.input_len,
356352
output_len=req.shm_cur_output_len,
357353
cache_len=req.prompt_cache_len,
358354
mtp_accepted=req.mtp_accepted_token_num,
359355
)
356+
# Aborted/disconnected requests can leave a partial output_len that
357+
# would bias the EMA toward shorter generations; skip them.
358+
if req.is_aborted:
359+
continue
360360
self.router_statics.update(req.candetoken_out_len)
361361
self.running_batch.filter_out_finished_req(self.shm_req_manager)
362362
if self.running_batch.is_clear():

0 commit comments

Comments
 (0)