@@ -35,6 +35,9 @@ async def lightllm_generate(request: Request, httpserver_manager: HttpServerMana
3535 prompt = request_dict .pop ("inputs" )
3636 sample_params_dict = request_dict ["parameters" ]
3737 return_details = sample_params_dict .pop ("return_details" , False )
38+ return_routed_experts = sample_params_dict .pop (
39+ "return_routed_experts" , httpserver_manager .args .enable_return_routed_experts
40+ )
3841 sampling_params = SamplingParams ()
3942 sampling_params .init (tokenizer = httpserver_manager .tokenizer , ** sample_params_dict )
4043 sampling_params .verify ()
@@ -105,7 +108,7 @@ async def lightllm_generate(request: Request, httpserver_manager: HttpServerMana
105108 ret ["prompt_logprobs" ] = prompt_logprobs
106109 if input_usage is not None :
107110 ret ["input_usage" ] = input_usage
108- if routed_experts_data is not None :
111+ if return_routed_experts and routed_experts_data is not None :
109112 ret ["routed_experts" ] = routed_experts_data
110113
111114 return Response (content = json .dumps (ret , ensure_ascii = False ).encode ("utf-8" ))
@@ -117,6 +120,7 @@ async def lightllm_generate_stream(request: Request, httpserver_manager: HttpSer
117120 prompt = request_dict .pop ("inputs" )
118121 sample_params_dict = request_dict ["parameters" ]
119122 _ = sample_params_dict .pop ("return_details" , False )
123+ _ = sample_params_dict .pop ("return_routed_experts" , None )
120124 sampling_params = SamplingParams ()
121125 sampling_params .init (tokenizer = httpserver_manager .tokenizer , ** sample_params_dict )
122126 sampling_params .verify ()
0 commit comments