Skip to content

Commit 598eb8b

Browse files
authored
Revert "Change dsr1 fp8 image to lmsysorg/sglang 0.5.5.post3 and fp4 image to…" (#295)
This reverts commit 0fe9dcc.
1 parent 0fe9dcc commit 598eb8b

5 files changed

Lines changed: 3 additions & 14 deletions

File tree

.github/configs/amd-master.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
dsr1-fp4-mi355x-sglang:
2-
image: lmsysorg/sglang:v0.5.5.post2-rocm700-mi35x
2+
image: rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
33
model: amd/DeepSeek-R1-0528-MXFP4-Preview
44
model-prefix: dsr1
55
runner: mi355x
@@ -63,7 +63,7 @@ dsr1-fp8-mi325x-sglang:
6363
- { tp: 8, conc-start: 4, conc-end: 64 }
6464

6565
dsr1-fp8-mi355x-sglang:
66-
image: lmsysorg/sglang:v0.5.5.post3-rocm700-mi35x
66+
image: rocm/7.0:rocm7.0_ubuntu_22.04_sgl-dev-v0.5.2-rocm7.0-mi35x-20250915
6767
model: deepseek-ai/DeepSeek-R1-0528
6868
model-prefix: dsr1
6969
runner: mi355x

benchmarks/dsr1_fp4_mi355x_docker.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
# RESULT_FILENAME
1212
# NUM_PROMPTS
1313
export SGLANG_USE_AITER=1
14-
export ROCM_QUICK_REDUCE_QUANTIZATION=INT4
1514

1615
PREFILL_SIZE=196608
1716
if [[ "$ISL" == "8192" && "$OSL" == "1024" ]]; then

benchmarks/dsr1_fp4_mi355x_slurm.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
# RANDOM_RANGE_RATIO
1111
# RESULT_FILENAME
1212
export SGLANG_USE_AITER=1
13-
export ROCM_QUICK_REDUCE_QUANTIZATION=INT4
1413
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
1514

1615
PREFILL_SIZE=196608

benchmarks/dsr1_fp8_mi355x_docker.sh

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,10 @@
1414
# https://rocm.docs.amd.com/en/docs-7.0-docker/benchmark-docker/inference-sglang-deepseek-r1-fp8.html
1515

1616
export SGLANG_USE_AITER=1
17-
export RCCL_MSCCL_ENABLE=0
18-
export ROCM_QUICK_REDUCE_QUANTIZATION=INT4
19-
2017

2118
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
2219

2320
python3 -m sglang.launch_server \
24-
--attention-backend aiter \
2521
--model-path $MODEL \
2622
--host=0.0.0.0 \
2723
--port $PORT \
@@ -31,7 +27,6 @@ python3 -m sglang.launch_server \
3127
--mem-fraction-static 0.8 --disable-radix-cache \
3228
--num-continuous-decode-steps 4 \
3329
--max-prefill-tokens 196608 \
34-
--enable-torch-compile \
3530
--cuda-graph-max-bs 128 > $SERVER_LOG 2>&1 &
3631

3732
SERVER_PID=$!

benchmarks/dsr1_fp8_mi355x_slurm.sh

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,11 @@
1212

1313
export HF_MODULES_CACHE="/tmp/hf_modules_cache/"
1414
export SGLANG_USE_AITER=1
15-
export RCCL_MSCCL_ENABLE=0
16-
export ROCM_QUICK_REDUCE_QUANTIZATION=INT4
1715

1816
SERVER_LOG=$(mktemp /tmp/server-XXXXXX.log)
1917

2018
set -x
2119
python3 -m sglang.launch_server \
22-
--attention-backend aiter \
2320
--model-path $MODEL \
2421
--host=0.0.0.0 \
2522
--port $PORT \
@@ -30,8 +27,7 @@ python3 -m sglang.launch_server \
3027
--disable-radix-cache \
3128
--num-continuous-decode-steps 4 \
3229
--max-prefill-tokens 196608 \
33-
--cuda-graph-max-bs 128 \
34-
--enable-torch-compile > $SERVER_LOG 2>&1 &
30+
--cuda-graph-max-bs 128 > $SERVER_LOG 2>&1 &
3531

3632
SERVER_PID=$!
3733

0 commit comments

Comments
 (0)