updated benchmark docs (#340)

FrankLeeeee · web-flow · commit 3e0cda08b84a · 2025-12-01T18:19:55.000+08:00
* updated benchmark docs

* polish

* polish
diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
@@ -1,2 +1,2 @@
 *.jsonl
-benchmark_results/
+results/
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -30,7 +30,6 @@ python3 bench_eagle3.py \
     --attention-backend fa3 \
     --config-list 1,0,0,0 1,3,1,4 \
     --benchmark-list mtbench gsm8k:5 ceval:5:accountant \
-    --output test.jsonl \
     --dtype bfloat16
 ```
 
diff --git a/benchmarks/bench_eagle3.py b/benchmarks/bench_eagle3.py
@@ -70,9 +70,7 @@ def parse_args():
         "--skip-launch-server", action="store_true", default=False
     )
     benchmark_group.add_argument("--num-prompts", type=int, default=80)
-    benchmark_group.add_argument(
-        "--output-dir", type=str, default="./bernchmark_results"
-    )
+    benchmark_group.add_argument("--output-dir", type=str, default="./results")
     benchmark_group.add_argument(
         "--config-list", type=str, nargs="+", default=["1,0,0,0", "1,3,1,4"]
     )
diff --git a/docs/examples/llama3-eagle3-online.md b/docs/examples/llama3-eagle3-online.md
@@ -43,20 +43,20 @@ The four numbers in the config represent: `batch_size, num_steps, topk, num_veri
 A pre-trained EAGLE model is available at [zhuyksir/EAGLE3-Llama-3.1-8B-Instruct](https://huggingface.co/zhuyksir/EAGLE3-Llama-3.1-8B-Instruct) for reference.
 
 ```shell
+cd benchmarks
+
 config_list=(
     "4,3,1,4"
     "4,7,10,60"
 )
-CUDA_VISIBLE_DEVICES=4,5,6,7 python3 bench_model_speedup.py \
+python3 bench_eagle3.py \
     --model-path meta-llama/Llama-3.1-8B-Instruct \
     --speculative-draft-model-path /YOUR/PATH/Llama-3.1-8B-Instruct/dev_outputs/epoch_0 \
-    --port 20001 \
-    --trust-remote-code \
+    --port 30000 \
     --mem-fraction-static 0.8 \
-    --tp-size 4 \
+    --tp-size 1 \
     --config-list "${config_list[@]}" \
-    --benchmark-list mtbench:80 gsm8k:200 humaneval:200 math500:200 \
-    --output output.jsonl
+    --benchmark-list mtbench gsm8k humaneval math500
 ```
 
 

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`*.jsonl`
`2`		`-benchmark_results/`
	`2`	`+results/`
Original file line number	Diff line number	Diff line change
`@@ -70,9 +70,7 @@ def parse_args():`
`70`	`70`	`"--skip-launch-server", action="store_true", default=False`
`71`	`71`	`)`
`72`	`72`	`benchmark_group.add_argument("--num-prompts", type=int, default=80)`
`73`		`- benchmark_group.add_argument(`
`74`		`- "--output-dir", type=str, default="./bernchmark_results"`
`75`		`- )`
	`73`	`+ benchmark_group.add_argument("--output-dir", type=str, default="./results")`
`76`	`74`	`benchmark_group.add_argument(`
`77`	`75`	`"--config-list", type=str, nargs="+", default=["1,0,0,0", "1,3,1,4"]`
`78`	`76`	`)`