Skip to content

Commit 1dd76e5

Browse files
jganganiJatin Gangani
authored andcommitted
adding ISL/OSL to collect results table summary (#249)
Co-authored-by: Jatin Gangani <jgangani@dc2-container-xterm-014.prd.it.nvidia.com>
1 parent f027b52 commit 1dd76e5

2 files changed

Lines changed: 11 additions & 3 deletions

File tree

utils/process_result.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
framework = os.environ.get('FRAMEWORK')
1919
precision = os.environ.get('PRECISION')
2020
mtp_mode = os.environ.get('MTP_MODE')
21+
isl = os.environ.get('ISL')
22+
osl = os.environ.get('OSL')
2123

2224
with open(f'{result_filename}.json') as f:
2325
bmk_result = json.load(f)
@@ -31,6 +33,8 @@
3133
'model': bmk_result['model_id'],
3234
'framework': framework,
3335
'precision': precision,
36+
'isl': int(isl) if isl else None,
37+
'osl': int(osl) if osl else None,
3438
'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size,
3539
'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus,
3640
'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput']) )/ prefill_gpus

utils/summarize.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,27 @@
99
with open(result_path) as f:
1010
result = json.load(f)
1111
results.append(result)
12-
results.sort(key=lambda r: (r.get('model', 'unknown'), r['hw'], r.get('framework', 'vllm'), r.get('precision', 'fp8'), r['tp'], r['ep'], r['conc']))
12+
results.sort(key=lambda r: (r.get('model', 'unknown'), r['hw'], r.get('framework', 'vllm'), r.get('precision', 'fp8'), r.get('isl', 0), r.get('osl', 0), r['tp'], r['ep'], r['conc']))
1313

1414
summary_header = f'''\
15-
| Model | Hardware | Framework | Precision | TP | EP | DP Attention | Conc | TTFT (ms) | TPOT (ms) | Interactivity (tok/s/user) | E2EL (s) | TPUT per GPU | Output TPUT per GPU | Input TPUT per GPU |
16-
| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |\
15+
| Model | Hardware | Framework | Precision | ISL | OSL | TP | EP | DP Attention | Conc | TTFT (ms) | TPOT (ms) | Interactivity (tok/s/user) | E2EL (s) | TPUT per GPU | Output TPUT per GPU | Input TPUT per GPU |
16+
| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |\
1717
'''
1818
print(summary_header)
1919

2020
for result in results:
2121
framework = result.get('framework', 'vllm')
2222
precision = result.get('precision', 'fp8')
2323
model = result.get('model', 'unknown')
24+
isl = result.get('isl', 'N/A')
25+
osl = result.get('osl', 'N/A')
2426
print(
2527
f"| {model} "
2628
f"| {result['hw'].upper()} "
2729
f"| {framework.upper()} "
2830
f"| {precision.upper()} "
31+
f"| {isl} "
32+
f"| {osl} "
2933
f"| {result['tp']} "
3034
f"| {result['ep']} "
3135
f"| {result['dp_attention']} "

0 commit comments

Comments
 (0)