Skip to content

Commit ea59c87

Browse files
committed
[CE]fix ce yaml
1 parent fbf3f4e commit ea59c87

11 files changed

Lines changed: 19 additions & 12 deletions

benchmarks/yaml/GLM45-air-32k-bf16-mtp-updatemodel.yaml

Lines changed: 0 additions & 10 deletions
This file was deleted.

benchmarks/yaml/GLM45-air-32k-bf16-mtp.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ graph_optimization_config:
55
use_cudagraph: True
66
draft_model_use_cudagraph: True
77
load_choices: "default_v1"
8+
max_num_batched_tokens: 4096
9+
speculative_config: '{"method":"mtp","num_speculative_tokens":1,"num_model_steps":1,"model":"/root/paddlejob/tmpspace/GLM-4.5-Air"}'
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
max_num_seqs: 128
2+
max_model_len: 32768
3+
enable_prefix_caching: True
4+
disable_custom_all_reduce: True
5+
graph_optimization_config: '{"use_cudagraph":true,"use_unique_memory_pool":true,"draft_model_use_cudagraph": true}'
6+
speculative_config: '{"method": "mtp", "num_speculative_tokens": 3, "num_model_steps": 3, "model": "/root/paddlejob/tmpspace/glm_mtp_multi_step", "verify_strategy": "target_match"}'
7+
tensor_parallel_size: 4
8+
enable_logprob: True
9+
moe_gate_fp32: True
10+
swap_space: 300

benchmarks/yaml/deepseek-32k-tp8-wint4.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ load_choices: "default_v1"
33
graph_optimization_config:
44
use_cudagraph: True
55
use_unique_memory_pool: True
6-
enable_prefix_caching: False
76
max_num_seqs: 256
87
max_model_len: 32768
98
tensor_parallel_size: 8
9+
enable_prefix_caching: False

benchmarks/yaml/eb45-32k-blockwise-fp8-h800-tp8.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ quantization: block_wise_fp8
55
gpu_memory_utilization: 0.9
66
kv_cache_ratio: 0.8
77
enable_chunked_prefill: True
8-
max_num_batched_tokens: 1024
98
max_num_partial_prefills: 3
109
max_long_partial_prefills: 3
1110
enable_prefix_caching: True

benchmarks/yaml/eb45-32k-wint4-mtp-h800-tp4.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ gpu_memory_utilization: 0.8
44
kv_cache_ratio: 0.71
55
tensor_parallel_size: 4
66
quantization: wint4
7+
speculative_config: '{"method": "mtp", "num_speculative_tokens": 1, "model": "/root/paddlejob/ERNIE-45-Turbo/mtp/"}'

benchmarks/yaml/eb45-32k-wint4-tp1-dp4_ep.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ data_parallel_size: 4
55
tensor_parallel_size: 1
66
enable_expert_parallel: True
77
quantization: wint4
8+
max_num_batched_tokens: 4096

benchmarks/yaml/eb45-vl-28b-thinking-32k-wint8.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ reasoning_parser: ernie-45-vl-thinking
66
tool_call_parser: ernie-45-vl-thinking
77
load_choices: "default_v1"
88
mm-processor-kwargs: '{"image_max_pixels": 12845056 }'
9+
limit_mm_per_prompt: '{"image": 100, "video": 100}'

benchmarks/yaml/eb45-vl-lite-32k-bf16-a800-tp1.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ tensor_parallel_size: 1
77
enable_chunked_prefill: True
88
max_num_batched_tokens: 384
99
reasoning_parser: ernie-45-vl
10+
limit_mm_per_prompt: '{"image": 100, "video": 100}'

benchmarks/yaml/eb45-vl-lite-32k-wint4-a800-tp1.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ enable_chunked_prefill: True
88
max_num_batched_tokens: 384
99
quantization: wint4
1010
reasoning_parser: ernie-45-vl
11+
limit_mm_per_prompt: '{"image": 100, "video": 100}'

0 commit comments

Comments
 (0)