[Benchmark] support input_ids for benchmark dataset (#7993)

zhupengyang · web-flow · commit acd5638f1d25 · 2026-06-04T10:56:45.000+08:00
diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py
@@ -356,14 +356,16 @@ async def async_request_eb_openai_chat_completions(
     if request_func_input.response_format:
         payload["response_format"] = request_func_input.response_format
 
-    # 随机输入开关
+    # Random-length input/output knob.
     if request_func_input.random_flag:
         payload["max_tokens"] = request_func_input.output_len
         payload["min_tokens"] = request_func_input.output_len
-        # 随机token_ids场景
-        if isinstance(request_func_input.prompt, list):
-            request_func_input.prompt_token_ids = request_func_input.prompt
-            request_func_input.prompt = ""
+
+    # When the prompt is a list of token ids, route through prompt_token_ids
+    # regardless of random_flag.
+    if isinstance(request_func_input.prompt, list):
+        request_func_input.prompt_token_ids = request_func_input.prompt
+        request_func_input.prompt = ""
 
     # 支持传入prompt_token_ids
     if request_func_input.prompt_token_ids:
diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py
@@ -300,19 +300,30 @@ def sample(
             if len(samples) >= num_requests:
                 break
             json_data = entry
-            prompt = entry["messages"][-1].get("content", "")
-            history_QA = entry.get("messages", [])
             response_format = entry.get("response_format")
             new_output_len = int(entry.get("max_tokens", output_len if output_len else 12288))
 
-            if enable_multimodal_chat:
-                prompt = self.apply_multimodal_chat_transformation(prompt, None)
+            # If the sample already carries pre-tokenized input_ids, send them
+            # directly via prompt_token_ids and skip the server-side
+            # chat_template + tokenizer step.
+            input_ids = entry.get("input_ids")
+            if input_ids is not None:
+                prompt = [int(x) for x in input_ids]
+                history_QA = []
+                prompt_len = len(prompt)
+            else:
+                prompt = entry["messages"][-1].get("content", "")
+                history_QA = entry.get("messages", [])
+                prompt_len = 0
+                if enable_multimodal_chat:
+                    prompt = self.apply_multimodal_chat_transformation(prompt, None)
+
             samples.append(
                 SampleRequest(
                     no=cnt,
                     json_data=json_data,
                     prompt=prompt,
-                    prompt_len=0,
+                    prompt_len=prompt_len,
                     history_QA=history_QA,
                     expected_output_len=new_output_len,
                     response_format=response_format,