Skip to content

Commit c215222

Browse files
seungrokjclaude
andcommitted
fix(agentx): fix TP sizes and remove hardcoded max-model-len on MI355X agentic benchmarks
- dsv4 and minimaxm2.5 agentic: remove MAX_MODEL_LEN override and --max-model-len flag to let vLLM use server default - amd-master.yaml: update dsv4 agentic TP from 4→8, minimaxm2.5 agentic TP from 4→1 - launch_mi355x-amds.sh: extend HF_HUB_CACHE_MOUNT override to vllm framework for DeepSeek-V4-Pro Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 1064327 commit c215222

4 files changed

Lines changed: 11 additions & 21 deletions

File tree

.github/configs/amd-master.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -652,8 +652,8 @@ dsv4-fp4-mi355x-vllm-agentic:
652652
agentic-coding:
653653
- duration: 1800
654654
search-space:
655-
- { tp: 4, offloading: none, conc-list: [16, 24, 32, 40] }
656-
- { tp: 4, offloading: lmcache, conc-list: [16, 24, 32, 40] }
655+
- { tp: 8, offloading: none, conc-list: [16, 24, 32, 40] }
656+
- { tp: 8, offloading: lmcache, conc-list: [16, 24, 32, 40] }
657657

658658
minimaxm2.5-fp8-mi355x-vllm:
659659
image: vllm/vllm-openai-rocm:v0.21.0
@@ -786,8 +786,8 @@ minimaxm2.5-fp4-mi355x-vllm-agentic:
786786
agentic-coding:
787787
- duration: 1800
788788
search-space:
789-
- { tp: 4, offloading: none, conc-list: [16, 24, 32, 40] }
790-
- { tp: 4, offloading: lmcache, conc-list: [16, 24, 32, 40] }
789+
- { tp: 1, offloading: none, conc-list: [16, 24, 32, 40] }
790+
- { tp: 1, offloading: lmcache, conc-list: [16, 24, 32, 40] }
791791

792792
minimaxm2.5-fp8-mi300x-vllm:
793793
image: vllm/vllm-openai-rocm:v0.16.0

benchmarks/single_node/agentic/dsv4_fp4_mi355x_vllm.sh

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,9 @@ MAX_DELAY=${MAX_DELAY:-60}
2222
ADVANCE_MIN=${ADVANCE_MIN:-0.0}
2323
ADVANCE_MAX=${ADVANCE_MAX:-0.7}
2424
EP_SIZE=${EP_SIZE:-1}
25-
# Kimi-K2.5 advertises a 262144-token context window in vLLM 0.21.0.
26-
# Matrix defaults may export MAX_MODEL_LEN=0 to mean "server default"; for this
27-
# script we need the concrete value so AgentX filters prompt+max_tokens against
28-
# the same limit vLLM enforces.
29-
if [[ -z "${MAX_MODEL_LEN:-}" || "$MAX_MODEL_LEN" == "0" ]]; then
30-
MAX_MODEL_LEN=262144
31-
fi
25+
#if [[ -z "${MAX_MODEL_LEN:-}" || "$MAX_MODEL_LEN" == "0" ]]; then
26+
# MAX_MODEL_LEN=262144
27+
#fi
3228

3329
if [[ -n "${SLURM_JOB_ID:-}" ]]; then
3430
echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
@@ -249,7 +245,6 @@ VLLM_CMD=(
249245
--tokenizer-mode deepseek_v4 \
250246
--reasoning-parser deepseek_v4 \
251247
--compilation-config '{"mode":3,"cudagraph_mode":"FULL_AND_PIECEWISE"}' \
252-
--max-model-len "$MAX_MODEL_LEN"
253248
--max-num-seqs "$CONC"
254249
"${PREFIX_CACHE_ARGS[@]}"
255250
"${OFFLOAD_ARGS[@]}"

benchmarks/single_node/agentic/minimaxm2.5_fp4_mi355x.sh

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,9 @@ MAX_DELAY=${MAX_DELAY:-60}
2222
ADVANCE_MIN=${ADVANCE_MIN:-0.0}
2323
ADVANCE_MAX=${ADVANCE_MAX:-0.7}
2424
EP_SIZE=${EP_SIZE:-1}
25-
# Kimi-K2.5 advertises a 262144-token context window in vLLM 0.21.0.
26-
# Matrix defaults may export MAX_MODEL_LEN=0 to mean "server default"; for this
27-
# script we need the concrete value so AgentX filters prompt+max_tokens against
28-
# the same limit vLLM enforces.
29-
if [[ -z "${MAX_MODEL_LEN:-}" || "$MAX_MODEL_LEN" == "0" ]]; then
30-
MAX_MODEL_LEN=262144
31-
fi
25+
#if [[ -z "${MAX_MODEL_LEN:-}" || "$MAX_MODEL_LEN" == "0" ]]; then
26+
# MAX_MODEL_LEN=262144
27+
#fi
3228

3329
if [[ -n "${SLURM_JOB_ID:-}" ]]; then
3430
echo "JOB $SLURM_JOB_ID running on ${SLURMD_NODENAME:-unknown}"
@@ -245,7 +241,6 @@ VLLM_CMD=(
245241
--block-size=32
246242
--trust-remote-code
247243
--attention-backend "ROCM_AITER_FA" \
248-
--max-model-len "$MAX_MODEL_LEN"
249244
--max-num-seqs "$CONC"
250245
"${PREFIX_CACHE_ARGS[@]}"
251246
"${OFFLOAD_ARGS[@]}"

runners/launch_mi355x-amds.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ else
214214
fi
215215

216216
# to prevent reading outdated saved model. use a fresh model from hf repo
217-
if [[ "$FRAMEWORK" == "atom" ]] && [[ "$MODEL" == "deepseek-ai/DeepSeek-V4-Pro" ]]; then
217+
if [[ ("$FRAMEWORK" == "vllm" || "$FRAMEWORK" == "atom") ]] && [[ "$MODEL" == "deepseek-ai/DeepSeek-V4-Pro" ]]; then
218218
export HF_HUB_CACHE_MOUNT="/it-share/hf-hub-cache/"
219219
fi
220220

0 commit comments

Comments
 (0)