We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent b1fbf48 commit 1abbbb6Copy full SHA for 1abbbb6
1 file changed
benchmarks/gptoss_fp4_b200_trt_slurm.sh
@@ -31,9 +31,6 @@ echo "MOE_BACKEND set to '$MOE_BACKEND'"
31
32
EXTRA_CONFIG_FILE="gptoss-fp4.yml"
33
export TRTLLM_ENABLE_PDL=1
34
-export TRTLLM_MOE_ALLTOALL_BACKEND="mnnvlthroughput"
35
-export TRTLLM_FORCE_ALLTOALL_METHOD="MNNVL"
36
-export TRTLLM_MOE_A2A_WORKSPACE_MB="2048"
37
38
cat > $EXTRA_CONFIG_FILE << EOF
39
cuda_graph_config:
@@ -52,6 +49,9 @@ moe_config:
52
49
EOF
53
50
54
51
if [[ "$DP_ATTENTION" == "true" ]]; then
+ export TRTLLM_MOE_ALLTOALL_BACKEND="mnnvlthroughput"
+ export TRTLLM_FORCE_ALLTOALL_METHOD="MNNVL"
+ export TRTLLM_MOE_A2A_WORKSPACE_MB="2048"
55
cat << EOF >> $EXTRA_CONFIG_FILE
56
attention_dp_config:
57
enable_balance: true
0 commit comments