Skip to content

Commit 06c73f0

Browse files
author
Jatin Gangani
committed
separate prefill and decode gpu tpt
1 parent 1221659 commit 06c73f0

3 files changed

Lines changed: 18 additions & 7 deletions

File tree

.github/workflows/benchmark-multinode-tmpl.yml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,10 +93,14 @@ jobs:
9393
for result_file in ${RESULT_FILENAME}_*.json; do
9494
if [ -f "$result_file" ]; then
9595
echo "Processing $result_file"
96-
# Extract GPU count from filename for tp_size calculation
97-
gpus=$(echo "$result_file" | sed "s/.*_gpus\([0-9]*\)\.json/\1/")
96+
# Extract GPU count, prefill_gpus and decode_gpus from filename for tp_size calculation
97+
gpus=$(echo "$result_file" | sed -n "s/.*_gpus_\([0-9]*\).*\.json/\1/p")
98+
prefill_gpus=$(echo "$result_file" | sed -n "s/.*_ctx_\([0-9]*\).*\.json/\1/p")
99+
decode_gpus=$(echo "$result_file" | sed -n "s/.*_gen_\([0-9]*\).*\.json/\1/p")
100+
98101
if [ -n "$gpus" ]; then
99-
TP=$gpus RESULT_FILENAME=${result_file%.json} EP_SIZE=1 DP_ATTENTION=false python3 utils/process_result.py
102+
echo "Extracted: gpus=$gpus, prefill_gpus=$prefill_gpus, decode_gpus=$decode_gpus"
103+
TP=$gpus RESULT_FILENAME=${result_file%.json} EP_SIZE=1 DP_ATTENTION=false PREFILL_GPUS="$prefill_gpus" DECODE_GPUS="$decode_gpus" python3 utils/process_result.py
100104
fi
101105
fi
102106
done

runners/launch_gb200-nv.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ if [[ $FRAMEWORK == "dynamo-trtllm" ]]; then
4949
rm -rf "$DYNAMO_PATH"
5050
git clone https://github.com/ai-dynamo/dynamo.git "$DYNAMO_PATH"
5151
cd "$DYNAMO_PATH"
52-
git checkout release/0.5.1-rc0.pre1
52+
git checkout tanmayv-sa
5353
git submodule update --init --recursive
5454

5555
# Navigate to performance sweeps directory
@@ -163,7 +163,7 @@ else # if statement at the top - search for "FRAMEWORK_DIFF_IF_STATEMENT #2"
163163
# Always clone and setup Dynamo
164164
echo "Cloning Dynamo repository..."
165165
rm -rf "$DYNAMO_PATH"
166-
git clone --branch update-wait-for-model https://github.com/Elnifio/dynamo.git $DYNAMO_PATH
166+
git clone --branch update-result-file-name https://github.com/Elnifio/dynamo.git $DYNAMO_PATH
167167
cd "$DYNAMO_PATH"
168168

169169
# Navigate to corresponding directory
@@ -272,7 +272,7 @@ else # search for "FRAMEWORK_DIFF_IF_STATEMENT #3" for this if-statement
272272

273273
# Result JSON are contained within the result directory
274274
for result_file in $(find $LOGS_DIR -type f); do
275-
# result_file should directly be isl_ISL_osl_OSL_concurrency_CONC_req_rate_R_gpusN.json
275+
# result_file should directly be isl_ISL_osl_OSL_concurrency_CONC_req_rate_R_gpus_N_ctx_M_gen_N.json
276276
file_name=$(basename $result_file)
277277
if [ -f $result_file ]; then
278278
# Copy the result file to workspace with a unique name

utils/process_result.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77
hw = os.environ.get('RUNNER_TYPE')
88
tp_size = int(os.environ.get('TP'))
99
ep_size = int(os.environ.get('EP_SIZE'))
10+
prefill_gpus_str = os.environ.get('PREFILL_GPUS', '')
11+
decode_gpus_str = os.environ.get('DECODE_GPUS', '')
12+
13+
# If empty string (aggregated runs), assign to tp_size (total gpus), otherwise convert to int
14+
prefill_gpus = tp_size if not prefill_gpus_str else int(prefill_gpus_str)
15+
decode_gpus = tp_size if not decode_gpus_str else int(decode_gpus_str)
1016
dp_attention = os.environ.get('DP_ATTENTION')
1117
result_filename = os.environ.get('RESULT_FILENAME')
1218
framework = os.environ.get('FRAMEWORK')
@@ -26,7 +32,8 @@
2632
'framework': framework,
2733
'precision': precision,
2834
'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size,
29-
'output_tput_per_gpu': float(bmk_result['output_throughput']) / tp_size
35+
'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus,
36+
'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput']) )/ prefill_gpus
3037
}
3138

3239
if mtp_mode: # MTP

0 commit comments

Comments
 (0)