Skip to content

Commit 4f1f0fa

Browse files
committed
fix(profile): limit Flash vLLM trace to decode steps
1 parent 9b534f7 commit 4f1f0fa

1 file changed

Lines changed: 5 additions & 1 deletion

File tree

benchmarks/single_node/dsv4_fp4_b300_vllm_mtp.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,13 @@ fi
4646

4747
PROFILE_ARGS=()
4848
if [[ "${PROFILE:-}" == "1" ]]; then
49+
PROFILER_CONFIG="{\"profiler\":\"torch\",\"torch_profiler_dir\":\"${VLLM_TORCH_PROFILER_DIR:-/workspace/}\"}"
50+
if [[ "$MODEL" == "deepseek-ai/DeepSeek-V4-Flash" ]]; then
51+
PROFILER_CONFIG="{\"profiler\":\"torch\",\"torch_profiler_dir\":\"${VLLM_TORCH_PROFILER_DIR:-/workspace/}\",\"ignore_frontend\":true,\"delay_iterations\":1,\"max_iterations\":2,\"active_iterations\":2,\"torch_profiler_with_stack\":false}"
52+
fi
4953
PROFILE_ARGS=(
5054
--profiler-config
51-
"{\"profiler\":\"torch\",\"torch_profiler_dir\":\"${VLLM_TORCH_PROFILER_DIR:-/workspace/}\"}"
55+
"$PROFILER_CONFIG"
5256
)
5357
fi
5458

0 commit comments

Comments
 (0)