Skip to content

Commit 838f614

Browse files
authored
Modify MMLU prompt template to improve accuracy on DeepSeek models; E… (#235)
* Modify MMLU prompt template to improve accuracy on DeepSeek models; Enable running whole dataset with num_prompt=-1 * change the default value of num_prompts to -1
1 parent 0aa437f commit 838f614

1 file changed

Lines changed: 8 additions & 3 deletions

File tree

benchmarks/benchmark_serving.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ def gen_mmlu_qa(data: Any, mmlu_method: str = "") -> str:
342342
f"(D) {row['D']}\n"
343343
)
344344

345-
output += "\nCorrect answer: "
345+
output += "\nCorrect answer:"
346346

347347
if mmlu_method == "HELM":
348348
output += f"({row['answer']})\n\n"
@@ -938,7 +938,7 @@ def parse_args() -> argparse.Namespace:
938938
parser.add_argument(
939939
"--num-prompts",
940940
type=int,
941-
default=1000,
941+
default=-1,
942942
help=(
943943
"Number of prompts to process. (number of sample requests we randomly"
944944
" collect from dataset)"
@@ -1133,11 +1133,16 @@ def main(args: argparse.Namespace):
11331133
# A given args.max_output_length value is the max generation step,
11341134
# when the args.max_output_length is default to None, the sample's golden
11351135
# output length will be used to decide the generation step.
1136+
if args.num_prompts == -1:
1137+
num_requests = len(dataset)
1138+
else:
1139+
num_requests = args.num_prompts
1140+
11361141
input_requests = sample_requests(
11371142
dataset=dataset,
11381143
tokenizer=tokenizer,
11391144
use_chat_template=use_chat_template,
1140-
num_requests=args.num_prompts,
1145+
num_requests=num_requests,
11411146
dataset_type=args.dataset,
11421147
max_output_length=args.max_output_length,
11431148
min_input_length=args.min_input_length,

0 commit comments

Comments
 (0)