@@ -300,19 +300,30 @@ def sample(
300300 if len (samples ) >= num_requests :
301301 break
302302 json_data = entry
303- prompt = entry ["messages" ][- 1 ].get ("content" , "" )
304- history_QA = entry .get ("messages" , [])
305303 response_format = entry .get ("response_format" )
306304 new_output_len = int (entry .get ("max_tokens" , output_len if output_len else 12288 ))
307305
308- if enable_multimodal_chat :
309- prompt = self .apply_multimodal_chat_transformation (prompt , None )
306+ # If the sample already carries pre-tokenized input_ids, send them
307+ # directly via prompt_token_ids and skip the server-side
308+ # chat_template + tokenizer step.
309+ input_ids = entry .get ("input_ids" )
310+ if input_ids is not None :
311+ prompt = [int (x ) for x in input_ids ]
312+ history_QA = []
313+ prompt_len = len (prompt )
314+ else :
315+ prompt = entry ["messages" ][- 1 ].get ("content" , "" )
316+ history_QA = entry .get ("messages" , [])
317+ prompt_len = 0
318+ if enable_multimodal_chat :
319+ prompt = self .apply_multimodal_chat_transformation (prompt , None )
320+
310321 samples .append (
311322 SampleRequest (
312323 no = cnt ,
313324 json_data = json_data ,
314325 prompt = prompt ,
315- prompt_len = 0 ,
326+ prompt_len = prompt_len ,
316327 history_QA = history_QA ,
317328 expected_output_len = new_output_len ,
318329 response_format = response_format ,
0 commit comments