Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 26 additions & 8 deletions .github/benchmark/oot_benchmark_models.json
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@
"1024x8192"
],
"extra_args": "--trust-remote-code --tensor-parallel-size 4 --attention-backend ROCM_AITER_FA --gpu-memory-utilization 0.8 --max-num-batched-tokens 16384 --max-model-len 16384",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0"
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0"
},
{
"tp_size": 8,
Expand All @@ -192,7 +192,7 @@
"1024x8192"
],
"extra_args": "--trust-remote-code --tensor-parallel-size 8 --attention-backend ROCM_AITER_FA --gpu-memory-utilization 0.8 --max-num-batched-tokens 16384 --max-model-len 16384",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0"
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0"
}
]
},
Expand All @@ -213,7 +213,7 @@
"1024x8192"
],
"extra_args": "--trust-remote-code --tensor-parallel-size 8 --attention-backend ROCM_AITER_FA --gpu-memory-utilization 0.8 --max-num-batched-tokens 16384 --max-model-len 16384",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0"
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0"
}
]
},
Expand All @@ -231,7 +231,7 @@
"prefix": "qwen3-next-80b-a3b-instruct-fp8-tp1-met",
"bench_args": "",
"extra_args": "--trust-remote-code --tensor-parallel-size 1 --max-num-batched-tokens 32768 --max-model-len 16384",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0"
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0"
},
{
"tp_size": 4,
Expand All @@ -240,7 +240,7 @@
"prefix": "qwen3-next-80b-a3b-instruct-fp8-tp4-met",
"bench_args": "",
"extra_args": "--trust-remote-code --tensor-parallel-size 4 --max-num-batched-tokens 32768 --max-model-len 16384",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0"
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0"
},
{
"tp_size": 1,
Expand All @@ -249,7 +249,7 @@
"prefix": "qwen3-next-80b-a3b-instruct-fp8-aw-tp1",
"bench_args": "",
"extra_args": "--trust-remote-code --tensor-parallel-size 1 --max-num-batched-tokens 32768 --max-model-len 16384",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0"
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0"
},
{
"tp_size": 2,
Expand All @@ -258,7 +258,7 @@
"prefix": "qwen3-next-80b-a3b-instruct-fp8-aw-tp2",
"bench_args": "",
"extra_args": "--trust-remote-code --tensor-parallel-size 2 --max-num-batched-tokens 32768 --max-model-len 16384",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0"
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0"
},
{
"tp_size": 4,
Expand All @@ -267,7 +267,25 @@
"prefix": "qwen3-next-80b-a3b-instruct-fp8-aw-tp4",
"bench_args": "",
"extra_args": "--trust-remote-code --tensor-parallel-size 4 --max-num-batched-tokens 32768 --max-model-len 16384",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0"
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0"
},
{
"tp_size": 1,
"display": "Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP1 (AW)",
"dashboard_model": "Qwen3-Next-80B-A3B-Instruct-FP8-mtp-tp1",
"prefix": "qwen3-next-80b-a3b-instruct-fp8-mtp-tp1-aw",
"bench_args": "",
"extra_args": "--trust-remote-code --tensor-parallel-size 1 --max-num-batched-tokens 32768 --max-model-len 16384 --speculative-config '{\"num_speculative_tokens\":1, \"method\": \"mtp\"}'",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0"
},
{
"tp_size": 4,
"display": "Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4 (AW)",
"dashboard_model": "Qwen3-Next-80B-A3B-Instruct-FP8-mtp-tp4",
"prefix": "qwen3-next-80b-a3b-instruct-fp8-mtp-tp4-aw",
"bench_args": "",
"extra_args": "--trust-remote-code --tensor-parallel-size 4 --max-num-batched-tokens 32768 --max-model-len 16384 --speculative-config '{\"num_speculative_tokens\":1, \"method\": \"mtp\"}'",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0"
}
]
},
Expand Down
26 changes: 19 additions & 7 deletions .github/benchmark/oot_models_accuracy.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"model_name": "Qwen3-235B-A22B-Instruct-2507-FP8 TP8+EP8",
"model_path": "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
"extraArgs": "--tensor-parallel-size 8 --enable-expert-parallel",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-8",
"test_level": "nightly",
"accuracy_threshold": 0.87,
Expand All @@ -14,7 +14,7 @@
"model_name": "Qwen3-Next-80B-A3B-Instruct-FP8 TP4",
"model_path": "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8",
"extraArgs": "--tensor-parallel-size 4 --attention-backend ROCM_AITER_FA",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-4",
"test_level": "nightly",
"accuracy_threshold": 0.76,
Expand All @@ -25,7 +25,7 @@
"model_name": "Qwen3.5-397B-A17B-FP8 TP8",
"model_path": "Qwen/Qwen3.5-397B-A17B-FP8",
"extraArgs": "--tensor-parallel-size 8 --attention-backend ROCM_AITER_FA",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-8",
"test_level": "nightly",
"accuracy_threshold": 0.83,
Expand All @@ -36,7 +36,7 @@
"model_name": "Qwen3.5-397B-A17B TP8",
"model_path": "Qwen/Qwen3.5-397B-A17B",
"extraArgs": "--tensor-parallel-size 8 --attention-backend ROCM_AITER_FA",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-8",
"test_level": "nightly",
"accuracy_threshold": 0.83,
Expand All @@ -47,14 +47,26 @@
"model_name": "Qwen3.5-397B-A17B-MXFP4 TP4",
"model_path": "amd/Qwen3.5-397B-A17B-MXFP4",
"extraArgs": "--tensor-parallel-size 4 --attention-backend ROCM_AITER_FA",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-4",
"test_level": "nightly",
"accuracy_threshold": 0.82,
"accuracy_baseline": 0.82,
"accuracy_baseline_model": "Qwen/Qwen3-235B-A22B-Instruct-2507",
"_baseline_note": "Using Qwen3-235B baseline as proxy; needs CI measurement for Qwen3.5 specific baseline"
},
{
"model_name": "Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4",
"model_path": "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8",
"extraArgs": "--tensor-parallel-size 4 --speculative-config '{\"num_speculative_tokens\":1, \"method\": \"mtp\"}'",
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-4",
"test_level": "nightly",
"accuracy_threshold": 0.8,
"accuracy_baseline": 0.81,
"accuracy_baseline_model": "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8",
"_baseline_note": "Qwen3-Next-80B-A3B-Instruct-FP8 baseline with TP4 (no MTP) as proxy; needs CI measurement for MTP-specific baseline"
},
{
"model_name": "Llama-3.1-8B-Instruct TP1",
"model_path": "meta-llama/Llama-3.1-8B-Instruct",
Expand Down Expand Up @@ -157,7 +169,7 @@
"runner": "linux-atom-mi35x-1",
"test_level": "nightly",
"accuracy_threshold": 0.88,
"accuracy_baseline": 0.90,
"accuracy_baseline": 0.9,
"accuracy_baseline_model": "openai/gpt-oss-120b"
},
{
Expand All @@ -169,7 +181,7 @@
"runner": "linux-atom-mi35x-4",
"test_level": "nightly",
"accuracy_threshold": 0.88,
"accuracy_baseline": 0.90,
"accuracy_baseline": 0.9,
"accuracy_baseline_model": "openai/gpt-oss-120b"
},
{
Expand Down
27 changes: 21 additions & 6 deletions .github/workflows/atom-vllm-accuracy-validation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ on:
required: false
type: boolean
default: false
run_qwen3_next_80b_mtp_tp4:
description: "Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4"
required: false
type: boolean
default: false
run_qwen35_397b_fp8_tp8:
description: "Qwen3.5-397B-A17B-FP8 TP8"
required: false
Expand Down Expand Up @@ -137,6 +142,7 @@ jobs:
RUN_QWEN3_MOE_TP8: ${{ inputs.run_qwen3_moe_tp8 }}
RUN_QWEN3_NEXT_80B_TP1: ${{ inputs.run_qwen3_next_80b_tp1 }}
RUN_QWEN3_NEXT_80B_TP4: ${{ inputs.run_qwen3_next_80b_tp4 }}
RUN_QWEN3_NEXT_80B_MTP_TP4: ${{ inputs.run_qwen3_next_80b_mtp_tp4 }}
RUN_QWEN35_397B_FP8_TP8: ${{ inputs.run_qwen35_397b_fp8_tp8 }}
RUN_QWEN35_397B_TP8: ${{ inputs.run_qwen35_397b_tp8 }}
RUN_QWEN35_397B_FP4_TP4: ${{ inputs.run_qwen35_397b_fp4_tp4 }}
Expand Down Expand Up @@ -169,7 +175,7 @@ jobs:
"model_path": "Qwen/Qwen3-235B-A22B-Instruct-2507-FP8",
"extra_args": "--tensor-parallel-size 8 --enable-expert-parallel",
"accuracy_test_threshold": 0.87,
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-8",
},
{
Expand All @@ -178,7 +184,7 @@ jobs:
"model_path": "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8",
"extra_args": "--tensor-parallel-size 1",
"accuracy_test_threshold": 0.83,
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=1",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=1\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-1",
},
{
Expand All @@ -187,7 +193,16 @@ jobs:
"model_path": "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8",
"extra_args": "--tensor-parallel-size 4",
"accuracy_test_threshold": 0.83,
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=1",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_USE_FLYDSL_GDR=1\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-4",
},
{
"toggle_env": "RUN_QWEN3_NEXT_80B_MTP_TP4",
"model_name": "Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4",
"model_path": "Qwen/Qwen3-Next-80B-A3B-Instruct-FP8",
"extra_args": "--tensor-parallel-size 4 --speculative-config '{\"num_speculative_tokens\":1, \"method\": \"mtp\"}'",
"accuracy_test_threshold": 0.80,
"env_vars": "AITER_QUICK_REDUCE_QUANTIZATION=INT4\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-4",
},
{
Expand All @@ -196,7 +211,7 @@ jobs:
"model_path": "Qwen/Qwen3.5-397B-A17B-FP8",
"extra_args": "--tensor-parallel-size 8",
"accuracy_test_threshold": 0.83,
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-8",
},
{
Expand All @@ -205,7 +220,7 @@ jobs:
"model_path": "Qwen/Qwen3.5-397B-A17B",
"extra_args": "--tensor-parallel-size 8",
"accuracy_test_threshold": 0.83,
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-8",
},
{
Expand All @@ -214,7 +229,7 @@ jobs:
"model_path": "amd/Qwen3.5-397B-A17B-MXFP4",
"extra_args": "--tensor-parallel-size 4",
"accuracy_test_threshold": 0.83,
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0",
"env_vars": "ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=1\nATOM_USE_CUSTOM_ALL_GATHER=0\nATOM_FP8_BLOCKSCALE_WEIGHT_PRESHUFFLE=0",
"runner": "linux-atom-mi35x-4",
},
{
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/atom-vllm-benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ on:
- Qwen3.5-397B-A17B TP8 (OOB)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP2 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (AW)
Expand Down Expand Up @@ -69,6 +71,8 @@ on:
- Qwen3.5-397B-A17B TP8 (OOB)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP2 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (AW)
Expand Down Expand Up @@ -102,6 +106,8 @@ on:
- Qwen3.5-397B-A17B TP8 (OOB)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP2 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (AW)
Expand Down Expand Up @@ -135,6 +141,8 @@ on:
- Qwen3.5-397B-A17B TP8 (OOB)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP2 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (AW)
Expand Down Expand Up @@ -168,6 +176,8 @@ on:
- Qwen3.5-397B-A17B TP8 (OOB)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP2 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (AW)
Expand Down Expand Up @@ -201,6 +211,8 @@ on:
- Qwen3.5-397B-A17B TP8 (OOB)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP2 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (AW)
Expand Down Expand Up @@ -234,6 +246,8 @@ on:
- Qwen3.5-397B-A17B TP8 (OOB)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP2 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (AW)
Expand Down Expand Up @@ -267,6 +281,8 @@ on:
- Qwen3.5-397B-A17B TP8 (OOB)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (MET)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8-MTP TP4 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP1 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP2 (AW)
- Qwen3-Next-80B-A3B-Instruct-FP8 TP4 (AW)
Expand Down
Loading
Loading