Skip to content

Commit 82c8443

Browse files
committed
[ci] add Qwen3.5 Dense/MoE models accuracy validation for atom-plugined sglang
1 parent b53d7fa commit 82c8443

3 files changed

Lines changed: 99 additions & 0 deletions

File tree

.github/benchmark/sglang_models_accuracy.json

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,41 @@
1010
"accuracy_baseline": null,
1111
"accuracy_baseline_model": "deepseek-ai/DeepSeek-R1-0528",
1212
"_baseline_note": "Threshold aligned with the SGLANG accuracy validation workflow target for gsm8k."
13+
},
14+
{
15+
"model_name": "Qwen3.5-35B-A3B-FP8 TP2",
16+
"model_path": "Qwen/Qwen3.5-35B-A3B-FP8",
17+
"extraArgs": "--tensor-parallel-size 2",
18+
"env_vars": "",
19+
"runner": "linux-atom-mi35x-4",
20+
"test_level": "nightly",
21+
"accuracy_threshold": 0.89,
22+
"accuracy_baseline": null,
23+
"accuracy_baseline_model": "Qwen/Qwen3.5-35B-A3B-FP8",
24+
"_baseline_note": "Threshold aligned with the SGLANG accuracy validation workflow target for gsm8k."
25+
},
26+
{
27+
"model_name": "Qwen3.5-27B-FP8 TP2",
28+
"model_path": "Qwen/Qwen3.5-27B-FP8",
29+
"extraArgs": "--tensor-parallel-size 2",
30+
"env_vars": "",
31+
"runner": "linux-atom-mi35x-4",
32+
"test_level": "nightly",
33+
"accuracy_threshold": 0.88,
34+
"accuracy_baseline": null,
35+
"accuracy_baseline_model": "Qwen/Qwen3.5-27B-FP8",
36+
"_baseline_note": "Threshold aligned with the SGLANG accuracy validation workflow target for gsm8k."
37+
},
38+
{
39+
"model_name": "Qwen3.5-35B-A3B TP2",
40+
"model_path": "Qwen/Qwen3.5-35B-A3B",
41+
"extraArgs": "--tensor-parallel-size 2",
42+
"env_vars": "",
43+
"runner": "linux-atom-mi35x-4",
44+
"test_level": "nightly",
45+
"accuracy_threshold": 0.95,
46+
"accuracy_baseline": null,
47+
"accuracy_baseline_model": "Qwen/Qwen3.5-35B-A3B",
48+
"_baseline_note": "Threshold aligned with the SGLANG accuracy validation workflow target for gsm8k."
1349
}
1450
]

.github/workflows/atom-sglang-accuracy-validation.yaml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,21 @@ on:
1414
required: false
1515
type: boolean
1616
default: false
17+
run_qwen35_35b_a3b_fp8_tp2:
18+
description: "Qwen3.5-35B-A3B-FP8 TP2"
19+
required: false
20+
type: boolean
21+
default: false
22+
run_qwen35_27b_fp8_tp2:
23+
description: "Qwen3.5-27B-FP8 TP2"
24+
required: false
25+
type: boolean
26+
default: false
27+
run_qwen35_35b_a3b_tp2:
28+
description: "Qwen3.5-35B-A3B TP2"
29+
required: false
30+
type: boolean
31+
default: false
1732
upload_accuracy_to_dashboard:
1833
description: "Optional: upload SGLANG accuracy results to dashboard after this manual run"
1934
required: false
@@ -55,6 +70,9 @@ jobs:
5570
id: meta
5671
env:
5772
RUN_DSR1_FP8_TP4: ${{ inputs.run_dsr1_fp8_tp4 }}
73+
RUN_QWEN35_35B_A3B_FP8_TP2: ${{ inputs.run_qwen35_35b_a3b_fp8_tp2 }}
74+
RUN_QWEN35_27B_FP8_TP2: ${{ inputs.run_qwen35_27b_fp8_tp2 }}
75+
RUN_QWEN35_35B_A3B_TP2: ${{ inputs.run_qwen35_35b_a3b_tp2 }}
5876
run: |
5977
set -euo pipefail
6078
@@ -76,6 +94,33 @@ jobs:
7694
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nSGLANG_AITER_FP8_PREFILL_ATTN=0\nSGLANG_USE_AITER=1\nATOM_ENABLE_DS_QKNORM_QUANT_FUSION=1",
7795
"runner": "linux-atom-mi35x-4",
7896
},
97+
{
98+
"toggle_env": "RUN_QWEN35_35B_A3B_FP8_TP2",
99+
"model_name": "Qwen3.5-35B-A3B-FP8 TP2",
100+
"model_path": "Qwen/Qwen3.5-35B-A3B-FP8",
101+
"extra_args": "--tensor-parallel-size 2",
102+
"accuracy_test_threshold": 0.89,
103+
"env_vars": "",
104+
"runner": "linux-atom-mi35x-4",
105+
},
106+
{
107+
"toggle_env": "RUN_QWEN35_27B_FP8_TP2",
108+
"model_name": "Qwen3.5-27B-FP8 TP2",
109+
"model_path": "Qwen/Qwen3.5-27B-FP8",
110+
"extra_args": "--tensor-parallel-size 2",
111+
"accuracy_test_threshold": 0.88,
112+
"env_vars": "",
113+
"runner": "linux-atom-mi35x-4",
114+
},
115+
{
116+
"toggle_env": "RUN_QWEN35_35B_A3B_TP2",
117+
"model_name": "Qwen3.5-35B-A3B TP2",
118+
"model_path": "Qwen/Qwen3.5-35B-A3B",
119+
"extra_args": "--tensor-parallel-size 2",
120+
"accuracy_test_threshold": 0.95,
121+
"env_vars": "",
122+
"runner": "linux-atom-mi35x-4",
123+
},
79124
]
80125
81126
selected = []

.github/workflows/atom-sglang-test.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,24 @@ jobs:
127127
ATOM_ENABLE_DS_QKNORM_QUANT_FUSION=1
128128
accuracy_test_threshold: 0.92
129129
runner: linux-atom-mi35x-4
130+
- model_name: "Qwen3.5-35B-A3B-FP8 TP2"
131+
model_path: "Qwen/Qwen3.5-35B-A3B-FP8"
132+
extra_args: "--tensor-parallel-size 2"
133+
env_vars: ""
134+
accuracy_test_threshold: 0.89
135+
runner: linux-atom-mi35x-4
136+
- model_name: "Qwen3.5-27B-FP8 TP2"
137+
model_path: "Qwen/Qwen3.5-27B-FP8"
138+
extra_args: "--tensor-parallel-size 2"
139+
env_vars: ""
140+
accuracy_test_threshold: 0.88
141+
runner: linux-atom-mi35x-4
142+
- model_name: "Qwen3.5-35B-A3B TP2"
143+
model_path: "Qwen/Qwen3.5-35B-A3B"
144+
extra_args: "--tensor-parallel-size 2"
145+
env_vars: ""
146+
accuracy_test_threshold: 0.95
147+
runner: linux-atom-mi35x-4
130148
runs-on: ${{ matrix.runner }}
131149
timeout-minutes: 180
132150
env:

0 commit comments

Comments
 (0)