Skip to content

Commit cff05ee

Browse files
committed
[ci][benchmark] add Qwen3.5-397B-A13B-FP8 TP4/TP8 benchmark case on MI35X
1 parent 2e64a2a commit cff05ee

2 files changed

Lines changed: 36 additions & 0 deletions

File tree

.github/benchmark/sglang_benchmark_models.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,27 @@
5151
"bench_args": "",
5252
"runner": "atom-mi355-8gpu-oot-benchmark",
5353
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nSGLANG_AITER_FP8_PREFILL_ATTN=0\nSGLANG_USE_AITER=1\nATOM_ENABLE_DS_QKNORM_QUANT_FUSION=1"
54+
},
55+
{
56+
"display": "Qwen3.5-397B-A17B-FP8 TP4",
57+
"dashboard_model": "Qwen3.5-397B-A17B-FP8-tp4",
58+
"source_path": "Qwen/Qwen3.5-397B-A17B-FP8",
59+
"path": "Qwen/Qwen3.5-397B-A17B-FP8",
60+
"prefix": "qwen3-5-397b-a17b-fp8-tp4",
61+
"extra_args": "--trust-remote-code --tensor-parallel-size 4",
62+
"bench_args": "",
63+
"runner": "atom-mi355-8gpu-oot-benchmark",
64+
"env_vars": ""
65+
},
66+
{
67+
"display": "Qwen3.5-397B-A17B-FP8 TP8",
68+
"dashboard_model": "Qwen3.5-397B-A17B-FP8",
69+
"source_path": "Qwen/Qwen3.5-397B-A17B-FP8",
70+
"path": "Qwen/Qwen3.5-397B-A17B-FP8",
71+
"prefix": "qwen3-5-397b-a17b-fp8-tp8",
72+
"extra_args": "--trust-remote-code --tensor-parallel-size 8",
73+
"bench_args": "",
74+
"runner": "atom-mi355-8gpu-oot-benchmark",
75+
"env_vars": ""
5476
}
5577
]

.github/workflows/atom-sglang-benchmark.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ on:
2727
description: "DeepSeek-R1-0528-MXFP4 FP4 TP8 EP8"
2828
type: boolean
2929
default: false
30+
qwen3-5-397b-a17b-fp8-tp4:
31+
description: "Qwen3.5-397B-A17B-FP8 TP4"
32+
type: boolean
33+
default: false
34+
qwen3-5-397b-a17b-fp8-tp8:
35+
description: "Qwen3.5-397B-A17B-FP8 TP8"
36+
type: boolean
37+
default: false
3038
sglang_image:
3139
description: "Optional SGLang benchmark image override. Leave empty to use sglang-latest on main or rebuild from the selected non-main branch."
3240
type: string
@@ -217,6 +225,8 @@ jobs:
217225
ENABLE_DEEPSEEK_R1_FP4_TP8: ${{ inputs.deepseek-r1-fp4-tp8 }}
218226
ENABLE_DEEPSEEK_R1_FP4_TP4: ${{ inputs.deepseek-r1-fp4-tp4 }}
219227
ENABLE_DEEPSEEK_R1_FP4_TP8_EP8: ${{ inputs.deepseek-r1-fp4-tp8-ep8 }}
228+
ENABLE_QWEN3_5_397B_A17B_FP8_TP4: ${{ inputs.qwen3-5-397b-a17b-fp8-tp4 }}
229+
ENABLE_QWEN3_5_397B_A17B_FP8_TP8: ${{ inputs.qwen3-5-397b-a17b-fp8-tp8 }}
220230
run: |
221231
MODELS_JSON="$(jq -c '
222232
map(select(
@@ -225,6 +235,8 @@ jobs:
225235
or (.prefix == "deepseek-r1-fp4-tp8" and env.ENABLE_DEEPSEEK_R1_FP4_TP8 == "true")
226236
or (.prefix == "deepseek-r1-fp4-tp4" and env.ENABLE_DEEPSEEK_R1_FP4_TP4 == "true")
227237
or (.prefix == "deepseek-r1-fp4-tp8-ep8" and env.ENABLE_DEEPSEEK_R1_FP4_TP8_EP8 == "true")
238+
or (.prefix == "qwen3-5-397b-a17b-fp8-tp4" and env.ENABLE_QWEN3_5_397B_A17B_FP8_TP4 == "true")
239+
or (.prefix == "qwen3-5-397b-a17b-fp8-tp8" and env.ENABLE_QWEN3_5_397B_A17B_FP8_TP8 == "true")
228240
))
229241
' .github/benchmark/sglang_benchmark_models.json)"
230242
echo "models_json=${MODELS_JSON}" >> "$GITHUB_OUTPUT"
@@ -475,6 +487,8 @@ jobs:
475487
deepseek-r1-fp4-tp8) echo "enabled=${{ inputs.deepseek-r1-fp4-tp8 }}" >> "$GITHUB_OUTPUT" ;;
476488
deepseek-r1-fp4-tp4) echo "enabled=${{ inputs.deepseek-r1-fp4-tp4 }}" >> "$GITHUB_OUTPUT" ;;
477489
deepseek-r1-fp4-tp8-ep8) echo "enabled=${{ inputs.deepseek-r1-fp4-tp8-ep8 }}" >> "$GITHUB_OUTPUT" ;;
490+
qwen3-5-397b-a17b-fp8-tp4) echo "enabled=${{ inputs.qwen3-5-397b-a17b-fp8-tp4 }}" >> "$GITHUB_OUTPUT" ;;
491+
qwen3-5-397b-a17b-fp8-tp8) echo "enabled=${{ inputs.qwen3-5-397b-a17b-fp8-tp8 }}" >> "$GITHUB_OUTPUT" ;;
478492
*) echo "enabled=true" >> "$GITHUB_OUTPUT" ;;
479493
esac
480494

0 commit comments

Comments
 (0)