11#! /usr/bin/env bash
22
33# MiniMax-M3 MXFP8 MI300X (gfx942) single-node vLLM recipe.
4- # Reuses the dedicated ROCm image and the MI355X serving shape. Block size 128
5- # is mandatory for MSA sparse attention. Keep the default BF16 KV cache on
6- # gfx942: the checkpoint has no calibrated q/prob scales for ROCm FP8
7- # attention, and vLLM's fallback scale of 1.0 corrupts model accuracy.
4+ # Reuses the dedicated ROCm image and converts MXFP8 MoE weights to 128x128
5+ # block FP8 at load time. Block size 128 is mandatory for MSA sparse attention.
6+ # Keep the default BF16 KV cache on gfx942: the checkpoint has no calibrated
7+ # q/prob scales for ROCm FP8 attention, and vLLM's fallback scale of 1.0
8+ # corrupts model accuracy.
9+ # Target image vLLM revision: 4a560dd8db67c270f5e2afb614558271b76f2294.
810
911source " $( dirname " $0 " ) /../../benchmark_lib.sh"
1012
@@ -24,6 +26,46 @@ if [[ -n "$SLURM_JOB_ID" ]]; then
2426 echo " JOB $SLURM_JOB_ID running on $SLURMD_NODENAME "
2527fi
2628
29+ if ! VLLM_PACKAGE_ROOT=" $(
30+ python3 - << 'PY '
31+ from pathlib import Path
32+
33+ import vllm
34+
35+ print(Path(vllm.__file__).resolve().parent.parent)
36+ PY
37+ ) " ; then
38+ echo " Failed to locate the installed vLLM package" >&2
39+ exit 1
40+ fi
41+ if [[ -z " $VLLM_PACKAGE_ROOT " || ! -d " $VLLM_PACKAGE_ROOT /vllm" ]]; then
42+ echo " Invalid installed vLLM package root: $VLLM_PACKAGE_ROOT " >&2
43+ exit 1
44+ fi
45+
46+ MXFP8_PATCH=" $( dirname " $0 " ) /minimaxm3_mi300x_mxfp8.patch"
47+ if [[ ! -f " $MXFP8_PATCH " ]]; then
48+ echo " MI300X MXFP8 patch is missing: $MXFP8_PATCH " >&2
49+ exit 1
50+ fi
51+
52+ PATCH_CHECK_ARGS=(--batch --silent -d " $VLLM_PACKAGE_ROOT " -p1 --dry-run)
53+ if patch " ${PATCH_CHECK_ARGS[@]} " --reverse --forward < " $MXFP8_PATCH " ; then
54+ echo " MI300X MXFP8 patch is already fully applied"
55+ elif patch " ${PATCH_CHECK_ARGS[@]} " --forward < " $MXFP8_PATCH " ; then
56+ if ! patch --batch --forward -d " $VLLM_PACKAGE_ROOT " -p1 < " $MXFP8_PATCH " ; then
57+ echo " Failed to apply the MI300X MXFP8 patch" >&2
58+ exit 1
59+ fi
60+ else
61+ echo " Installed vLLM is neither cleanly patchable nor fully patched" >&2
62+ exit 1
63+ fi
64+ if ! patch " ${PATCH_CHECK_ARGS[@]} " --reverse --forward < " $MXFP8_PATCH " ; then
65+ echo " MI300X MXFP8 patch verification failed" >&2
66+ exit 1
67+ fi
68+
2769if [[ " $MODEL " != /* ]]; then hf download " $MODEL " ; fi
2870
2971if [ -n " $ROCR_VISIBLE_DEVICES " ]; then
0 commit comments