Skip to content

Commit 3b7d8a7

Browse files
committed
fix(profile): target third Flash decode step
1 parent 6a824fc commit 3b7d8a7

1 file changed

Lines changed: 2 additions & 2 deletions

File tree

benchmarks/single_node/dsv4_fp4_b300_vllm_mtp.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ PROFILE_ARGS=()
4848
if [[ "${PROFILE:-}" == "1" ]]; then
4949
PROFILER_CONFIG="{\"profiler\":\"torch\",\"torch_profiler_dir\":\"${VLLM_TORCH_PROFILER_DIR:-/workspace/}\"}"
5050
if [[ "$MODEL" == "deepseek-ai/DeepSeek-V4-Flash" ]]; then
51-
PROFILER_CONFIG="{\"profiler\":\"torch\",\"torch_profiler_dir\":\"${VLLM_TORCH_PROFILER_DIR:-/workspace/}\",\"ignore_frontend\":true,\"delay_iterations\":1,\"max_iterations\":1,\"active_iterations\":1,\"torch_profiler_with_stack\":false}"
51+
PROFILER_CONFIG="{\"profiler\":\"torch\",\"torch_profiler_dir\":\"${VLLM_TORCH_PROFILER_DIR:-/workspace/}\",\"ignore_frontend\":true,\"delay_iterations\":3,\"max_iterations\":1,\"active_iterations\":1,\"torch_profiler_with_stack\":false}"
5252
fi
5353
PROFILE_ARGS=(
5454
--profiler-config
@@ -74,7 +74,7 @@ BENCHMARK_MAX_CONCURRENCY=$CONC
7474
BENCHMARK_NUM_WARMUPS=$((2 * BENCHMARK_MAX_CONCURRENCY))
7575

7676
if [[ "${PROFILE:-}" == "1" && "$MODEL" == "deepseek-ai/DeepSeek-V4-Flash" ]]; then
77-
BENCHMARK_OUTPUT_LEN=1
77+
BENCHMARK_OUTPUT_LEN=3
7878
BENCHMARK_NUM_PROMPTS=256
7979
BENCHMARK_MAX_CONCURRENCY=256
8080
BENCHMARK_NUM_WARMUPS=4096

0 commit comments

Comments
 (0)