1919from lightllm .utils .dist_utils import init_distributed_env
2020from lightllm .utils .envs_utils import get_unique_server_name
2121from lightllm .server .core .objs import ShmReqManager , StartArgs
22- from lightllm .server .core .objs .io_objs import AbortedReqCmd
22+ from lightllm .server .core .objs .io_objs import AbortedReqCmd , StopStrMatchedReqCmd
2323from lightllm .server .router .model_infer .infer_batch import g_infer_context
2424from lightllm .server .router .model_infer .pin_mem_manager import g_pin_mem_manager
2525from lightllm .utils .dist_utils import get_global_rank , get_global_world_size , get_dp_size
2626from lightllm .utils .dist_utils import get_dp_world_size , get_global_dp_rank , get_current_rank_in_dp
2727from lightllm .utils .dist_utils import get_current_device_id , get_current_rank_in_node , get_node_world_size
2828from lightllm .utils .dist_utils import get_dp_rank_in_node , create_new_group_for_current_node
29- from lightllm .utils .envs_utils import get_env_start_args , enable_stop_string_match
29+ from lightllm .utils .envs_utils import get_env_start_args
3030from lightllm .distributed import dist_group_manager
3131from lightllm .server .router .shm_reqs_io_buffer import ShmReqsIOBuffer
3232from lightllm .server .router .model_infer .mode_backend .overlap_events import OverlapEventManager , OverlapEventPack
3333from lightllm .models .deepseek_mtp .model import Deepseek3MTPModel
34- from lightllm .server .tokenizer import get_tokenizer
3534
3635
3736class ModeBackend :
@@ -320,6 +319,12 @@ def _read_reqs_buffer_and_init_reqs(self):
320319 if obj .req_id in g_infer_context .requests_mapping :
321320 req : InferReq = g_infer_context .requests_mapping [obj .req_id ]
322321 req .infer_aborted = True
322+ elif isinstance (cmds [0 ], StopStrMatchedReqCmd ):
323+ for obj in cmds :
324+ obj : StopStrMatchedReqCmd = obj
325+ if obj .req_id in g_infer_context .requests_mapping :
326+ req : InferReq = g_infer_context .requests_mapping [obj .req_id ]
327+ req .infer_aborted = True
323328 else :
324329 self ._init_reqs (reqs = cmds )
325330 return
@@ -505,14 +510,6 @@ def _post_handle(
505510 extra_post_req_handle_func 用于提供在一个请求确定输出的时候,给出额外的后处理操作,主要是用于
506511 约束输出等模式,设置自己请求内部的状态机的状态,并添加额外的停止判定条件等。
507512 """
508- if enable_stop_string_match ():
509- if not hasattr (self , "tokenizer" ):
510- self .tokenizer = get_tokenizer (
511- self .args .model_dir , self .args .tokenizer_mode , trust_remote_code = self .args .trust_remote_code
512- )
513- else :
514- self .tokenizer = None
515-
516513 for req_obj , next_token_id , next_token_logprob , pack in zip (
517514 run_reqs , next_token_ids , next_token_logprobs , run_reqs_update_packs
518515 ):
@@ -524,7 +521,6 @@ def _post_handle(
524521 eos_ids = self .eos_id ,
525522 extra_post_req_handle_func = extra_post_req_handle_func ,
526523 is_master_in_dp = self .is_master_in_dp ,
527- tokenizer = self .tokenizer ,
528524 )
529525
530526 g_infer_context .req_manager .req_sampling_params_manager .update_reqs_token_counter (
0 commit comments