Skip to content

Commit 95e79da

Browse files
Oseltamivircodex
andcommitted
perf(mi300x): use load-time block FP8 MoE conversion
Co-authored-by: OpenAI Codex <codex@openai.com> Signed-off-by: Oseltamivir <58582368+Oseltamivir@users.noreply.github.com>
1 parent ba5879b commit 95e79da

6 files changed

Lines changed: 537 additions & 9 deletions

File tree

.github/configs/amd-master.yaml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2851,10 +2851,11 @@ minimaxm3-fp8-mi355x-vllm-mtp:
28512851
- { tp: 4, conc-start: 1, conc-end: 64, spec-decoding: mtp }
28522852
- { tp: 8, ep: 8, dp-attn: true, conc-start: 128, conc-end: 256, spec-decoding: mtp }
28532853

2854-
# MiniMax-M3 MXFP8 MI300X day-zero recipe. Reuse the dedicated ROCm image and
2855-
# MI355X serving shape, but retain the default BF16 KV cache because this
2856-
# checkpoint lacks calibrated ROCm FP8 attention scales. Use the TP8-only H100
2857-
# search space: TP8 for latency and TP8+EP8 (TEP) at high concurrency.
2854+
# MiniMax-M3 MXFP8 MI300X recipe. Convert the checkpoint's MXFP8 MoE weights to
2855+
# 128x128 block FP8 at load time and use the regular Triton block-FP8 backend.
2856+
# Retain the default BF16 KV cache because this checkpoint lacks calibrated
2857+
# ROCm FP8 attention scales. Use TP8 for latency and TP8+EP8 at high
2858+
# concurrency.
28582859
minimaxm3-fp8-mi300x-vllm:
28592860
image: vllm/vllm-openai-rocm:minimax-m3
28602861
model: MiniMaxAI/MiniMax-M3-MXFP8

benchmarks/single_node/fixed_seq_len/minimaxm3_fp8_mi300x.sh

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
#!/usr/bin/env bash
22

33
# MiniMax-M3 MXFP8 MI300X (gfx942) single-node vLLM recipe.
4-
# Reuses the dedicated ROCm image and the MI355X serving shape. Block size 128
5-
# is mandatory for MSA sparse attention. Keep the default BF16 KV cache on
6-
# gfx942: the checkpoint has no calibrated q/prob scales for ROCm FP8
7-
# attention, and vLLM's fallback scale of 1.0 corrupts model accuracy.
4+
# Reuses the dedicated ROCm image and converts MXFP8 MoE weights to 128x128
5+
# block FP8 at load time. Block size 128 is mandatory for MSA sparse attention.
6+
# Keep the default BF16 KV cache on gfx942: the checkpoint has no calibrated
7+
# q/prob scales for ROCm FP8 attention, and vLLM's fallback scale of 1.0
8+
# corrupts model accuracy.
9+
# Target image vLLM revision: 4a560dd8db67c270f5e2afb614558271b76f2294.
810

911
source "$(dirname "$0")/../../benchmark_lib.sh"
1012

@@ -24,6 +26,46 @@ if [[ -n "$SLURM_JOB_ID" ]]; then
2426
echo "JOB $SLURM_JOB_ID running on $SLURMD_NODENAME"
2527
fi
2628

29+
if ! VLLM_PACKAGE_ROOT="$(
30+
python3 - <<'PY'
31+
from pathlib import Path
32+
33+
import vllm
34+
35+
print(Path(vllm.__file__).resolve().parent.parent)
36+
PY
37+
)"; then
38+
echo "Failed to locate the installed vLLM package" >&2
39+
exit 1
40+
fi
41+
if [[ -z "$VLLM_PACKAGE_ROOT" || ! -d "$VLLM_PACKAGE_ROOT/vllm" ]]; then
42+
echo "Invalid installed vLLM package root: $VLLM_PACKAGE_ROOT" >&2
43+
exit 1
44+
fi
45+
46+
MXFP8_PATCH="$(dirname "$0")/minimaxm3_mi300x_mxfp8.patch"
47+
if [[ ! -f "$MXFP8_PATCH" ]]; then
48+
echo "MI300X MXFP8 patch is missing: $MXFP8_PATCH" >&2
49+
exit 1
50+
fi
51+
52+
PATCH_CHECK_ARGS=(--batch --silent -d "$VLLM_PACKAGE_ROOT" -p1 --dry-run)
53+
if patch "${PATCH_CHECK_ARGS[@]}" --reverse --forward < "$MXFP8_PATCH"; then
54+
echo "MI300X MXFP8 patch is already fully applied"
55+
elif patch "${PATCH_CHECK_ARGS[@]}" --forward < "$MXFP8_PATCH"; then
56+
if ! patch --batch --forward -d "$VLLM_PACKAGE_ROOT" -p1 < "$MXFP8_PATCH"; then
57+
echo "Failed to apply the MI300X MXFP8 patch" >&2
58+
exit 1
59+
fi
60+
else
61+
echo "Installed vLLM is neither cleanly patchable nor fully patched" >&2
62+
exit 1
63+
fi
64+
if ! patch "${PATCH_CHECK_ARGS[@]}" --reverse --forward < "$MXFP8_PATCH"; then
65+
echo "MI300X MXFP8 patch verification failed" >&2
66+
exit 1
67+
fi
68+
2769
if [[ "$MODEL" != /* ]]; then hf download "$MODEL"; fi
2870

2971
if [ -n "$ROCR_VISIBLE_DEVICES" ]; then

0 commit comments

Comments
 (0)