adding ISL/OSL to collect results table summary (#249)

jgangani · Jatin Gangani · cquil11 · commit 1dd76e56dbe6 · 2025-11-24T08:39:43.000-06:00
Co-authored-by: Jatin Gangani &lt;jgangani@dc2-container-xterm-014.prd.it.nvidia.com&gt;
diff --git a/utils/process_result.py b/utils/process_result.py
@@ -18,6 +18,8 @@
 framework = os.environ.get('FRAMEWORK')
 precision = os.environ.get('PRECISION')
 mtp_mode = os.environ.get('MTP_MODE')
+isl = os.environ.get('ISL')
+osl = os.environ.get('OSL')
 
 with open(f'{result_filename}.json') as f:
     bmk_result = json.load(f)
@@ -31,6 +33,8 @@
     'model': bmk_result['model_id'],
     'framework': framework,
     'precision': precision,
+    'isl': int(isl) if isl else None,
+    'osl': int(osl) if osl else None,
     'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size,
     'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus,
     'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput']) )/ prefill_gpus
diff --git a/utils/summarize.py b/utils/summarize.py
@@ -9,23 +9,27 @@
     with open(result_path) as f:
         result = json.load(f)
     results.append(result)
-results.sort(key=lambda r: (r.get('model', 'unknown'), r['hw'], r.get('framework', 'vllm'), r.get('precision', 'fp8'), r['tp'], r['ep'], r['conc']))
+results.sort(key=lambda r: (r.get('model', 'unknown'), r['hw'], r.get('framework', 'vllm'), r.get('precision', 'fp8'), r.get('isl', 0), r.get('osl', 0), r['tp'], r['ep'], r['conc']))
 
 summary_header = f'''\
-| Model | Hardware | Framework | Precision | TP | EP | DP Attention | Conc | TTFT (ms) | TPOT (ms) | Interactivity (tok/s/user) | E2EL (s) | TPUT per GPU | Output TPUT per GPU | Input TPUT per GPU |
-| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |\
+| Model | Hardware | Framework | Precision | ISL | OSL | TP | EP | DP Attention | Conc | TTFT (ms) | TPOT (ms) | Interactivity (tok/s/user) | E2EL (s) | TPUT per GPU | Output TPUT per GPU | Input TPUT per GPU |
+| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |\
 '''
 print(summary_header)
 
 for result in results:
     framework = result.get('framework', 'vllm')
     precision = result.get('precision', 'fp8')
     model = result.get('model', 'unknown')
+    isl = result.get('isl', 'N/A')
+    osl = result.get('osl', 'N/A')
     print(
         f"| {model} "
         f"| {result['hw'].upper()} "
         f"| {framework.upper()} "
         f"| {precision.upper()} "
+        f"| {isl} "
+        f"| {osl} "
         f"| {result['tp']} "
         f"| {result['ep']} "
         f"| {result['dp_attention']} "