From b2f0b86f4bdca147497b167321b15686701cd77d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 6 Nov 2025 22:20:07 +0000
Subject: [PATCH 1/5] Add unit tests for utils/process_result.py and pytest CI
 workflow

Co-authored-by: functionstackx <47992694+functionstackx@users.noreply.github.com>
---
 .../workflows/test-utils-process-result.yml   |  41 ++
 .gitignore                                    |   7 +-
 utils/test_process_result.py                  | 554 ++++++++++++++++++
 3 files changed, 601 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/test-utils-process-result.yml
 create mode 100644 utils/test_process_result.py

diff --git a/.github/workflows/test-utils-process-result.yml b/.github/workflows/test-utils-process-result.yml
new file mode 100644
index 000000000..ea84e06bd
--- /dev/null
+++ b/.github/workflows/test-utils-process-result.yml
@@ -0,0 +1,41 @@
+name: Test Utils Process Result
+
+on:
+  pull_request:
+    paths:
+      - 'utils/process_result.py'
+      - 'utils/test_process_result.py'
+      - '.github/workflows/test-utils-process-result.yml'
+  push:
+    branches:
+      - main
+    paths:
+      - 'utils/process_result.py'
+      - 'utils/test_process_result.py'
+      - '.github/workflows/test-utils-process-result.yml'
+
+jobs:
+  test:
+    if: github.event_name != 'pull_request' || github.event.pull_request.draft != true
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+    
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install pytest
+
+      - name: Run pytest
+        run: |
+          cd utils
+          pytest test_process_result.py -v
diff --git a/.gitignore b/.gitignore
index 03d36472a..146afad17 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,7 @@
 **/__pycache__/**
-**/.coverage
\ No newline at end of file
+**/.coverage
+**/.pytest_cache/
+*.pyc
+*.pyo
+.coverage.*
+htmlcov/
\ No newline at end of file
diff --git a/utils/test_process_result.py b/utils/test_process_result.py
new file mode 100644
index 000000000..165daf335
--- /dev/null
+++ b/utils/test_process_result.py
@@ -0,0 +1,554 @@
+import pytest
+import json
+import os
+import sys
+from pathlib import Path
+from unittest.mock import patch, mock_open
+
+
+@pytest.fixture
+def sample_benchmark_result():
+    """Sample benchmark result JSON data."""
+    return {
+        'max_concurrency': 8,
+        'model_id': 'meta-llama/Llama-3-70b',
+        'total_token_throughput': 10000.0,
+        'output_throughput': 3000.0,
+        'ttft_ms': 150.5,
+        'tpot_ms': 25.0,
+        'e2e_latency_ms': 500.0,
+        'decode_tpot_ms': 30.0,
+        'prefill_tpot_ms': 20.0
+    }
+
+
+@pytest.fixture
+def basic_env_vars():
+    """Basic environment variables for testing."""
+    return {
+        'RUNNER_TYPE': 'h200',
+        'TP': '8',
+        'EP_SIZE': '1',
+        'PREFILL_GPUS': '',
+        'DECODE_GPUS': '',
+        'DP_ATTENTION': 'false',
+        'RESULT_FILENAME': 'test_result',
+        'FRAMEWORK': 'vllm',
+        'PRECISION': 'fp8',
+        'MTP_MODE': ''
+    }
+
+
+@pytest.fixture
+def temp_result_file(tmp_path, sample_benchmark_result):
+    """Create a temporary benchmark result file."""
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(sample_benchmark_result, f)
+    return result_file
+
+
+def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test basic processing of benchmark results."""
+    # Create result file
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(sample_benchmark_result, f)
+    
+    # Change to tmp_path directory
+    original_dir = os.getcwd()
+    os.chdir(tmp_path)
+    
+    try:
+        with patch.dict(os.environ, basic_env_vars):
+            # Import and execute the script
+            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+        
+        # Check output file was created
+        output_file = tmp_path / 'agg_test_result.json'
+        assert output_file.exists()
+        
+        # Load and verify output
+        with open(output_file) as f:
+            result = json.load(f)
+        
+        assert result['hw'] == 'h200'
+        assert result['tp'] == 8
+        assert result['ep'] == 1
+        assert result['dp_attention'] == 'false'
+        assert result['conc'] == 8
+        assert result['model'] == 'meta-llama/Llama-3-70b'
+        assert result['framework'] == 'vllm'
+        assert result['precision'] == 'fp8'
+        assert result['tput_per_gpu'] == 10000.0 / 8
+        assert result['output_tput_per_gpu'] == 3000.0 / 8
+        assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8
+        
+    finally:
+        os.chdir(original_dir)
+
+
+def test_ms_to_seconds_conversion(tmp_path, basic_env_vars):
+    """Test conversion of millisecond values to seconds."""
+    benchmark_result = {
+        'max_concurrency': 4,
+        'model_id': 'test/model',
+        'total_token_throughput': 5000.0,
+        'output_throughput': 1500.0,
+        'ttft_ms': 200.0,
+        'e2e_latency_ms': 1000.0,
+        'decode_latency_ms': 500.0
+    }
+    
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(benchmark_result, f)
+    
+    original_dir = os.getcwd()
+    os.chdir(tmp_path)
+    
+    try:
+        with patch.dict(os.environ, basic_env_vars):
+            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+        
+        output_file = tmp_path / 'agg_test_result.json'
+        with open(output_file) as f:
+            result = json.load(f)
+        
+        # Check ms values were converted to seconds
+        assert result['ttft'] == 200.0 / 1000.0
+        assert result['e2e_latency'] == 1000.0 / 1000.0
+        assert result['decode_latency'] == 500.0 / 1000.0
+        
+    finally:
+        os.chdir(original_dir)
+
+
+def test_tpot_to_intvty_conversion(tmp_path, basic_env_vars):
+    """Test conversion of tpot (time per output token) to intvty (interactivity/throughput)."""
+    benchmark_result = {
+        'max_concurrency': 2,
+        'model_id': 'test/model',
+        'total_token_throughput': 2000.0,
+        'output_throughput': 500.0,
+        'tpot_ms': 25.0,
+        'decode_tpot_ms': 20.0,
+        'prefill_tpot_ms': 30.0
+    }
+    
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(benchmark_result, f)
+    
+    original_dir = os.getcwd()
+    os.chdir(tmp_path)
+    
+    try:
+        with patch.dict(os.environ, basic_env_vars):
+            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+        
+        output_file = tmp_path / 'agg_test_result.json'
+        with open(output_file) as f:
+            result = json.load(f)
+        
+        # Check tpot values were converted to intvty
+        # The logic: if 'tpot' in key, convert ms value and then intvty = 1000.0 / tpot_ms
+        # So: tpot_ms: 25.0 -> tpot: 0.025 (ms to s), intvty: 1000.0/25.0 = 40.0
+        assert result['tpot'] == 25.0 / 1000.0  # Converted from ms to s
+        assert result['intvty'] == 1000.0 / 25.0  # intvty = 1000.0 / tpot_ms
+        
+        assert result['decode_tpot'] == 20.0 / 1000.0
+        assert result['decode_intvty'] == 1000.0 / 20.0
+        
+        assert result['prefill_tpot'] == 30.0 / 1000.0
+        assert result['prefill_intvty'] == 1000.0 / 30.0
+        
+        # Check that the intvty calculation is correct
+        assert 'decode_intvty' in result
+        assert 'prefill_intvty' in result
+        
+    finally:
+        os.chdir(original_dir)
+
+
+def test_mtp_mode_included(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test that MTP mode is included when set."""
+    env_vars = basic_env_vars.copy()
+    env_vars['MTP_MODE'] = 'disaggregated'
+    
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(sample_benchmark_result, f)
+    
+    original_dir = os.getcwd()
+    os.chdir(tmp_path)
+    
+    try:
+        with patch.dict(os.environ, env_vars):
+            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+        
+        output_file = tmp_path / 'agg_test_result.json'
+        with open(output_file) as f:
+            result = json.load(f)
+        
+        assert 'mtp' in result
+        assert result['mtp'] == 'disaggregated'
+        
+    finally:
+        os.chdir(original_dir)
+
+
+def test_mtp_mode_not_included(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test that MTP mode is not included when not set."""
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(sample_benchmark_result, f)
+    
+    original_dir = os.getcwd()
+    os.chdir(tmp_path)
+    
+    try:
+        with patch.dict(os.environ, basic_env_vars):
+            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+        
+        output_file = tmp_path / 'agg_test_result.json'
+        with open(output_file) as f:
+            result = json.load(f)
+        
+        assert 'mtp' not in result
+        
+    finally:
+        os.chdir(original_dir)
+
+
+def test_prefill_decode_gpus_explicit(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test explicit prefill and decode GPU counts."""
+    env_vars = basic_env_vars.copy()
+    env_vars['PREFILL_GPUS'] = '4'
+    env_vars['DECODE_GPUS'] = '4'
+    
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(sample_benchmark_result, f)
+    
+    original_dir = os.getcwd()
+    os.chdir(tmp_path)
+    
+    try:
+        with patch.dict(os.environ, env_vars):
+            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+        
+        output_file = tmp_path / 'agg_test_result.json'
+        with open(output_file) as f:
+            result = json.load(f)
+        
+        # With explicit GPU counts
+        assert result['output_tput_per_gpu'] == 3000.0 / 4
+        assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 4
+        
+    finally:
+        os.chdir(original_dir)
+
+
+def test_prefill_decode_gpus_defaults_to_tp(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test that prefill/decode GPUs default to TP size when not specified."""
+    # Default env vars have empty strings for PREFILL_GPUS and DECODE_GPUS
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(sample_benchmark_result, f)
+    
+    original_dir = os.getcwd()
+    os.chdir(tmp_path)
+    
+    try:
+        with patch.dict(os.environ, basic_env_vars):
+            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+        
+        output_file = tmp_path / 'agg_test_result.json'
+        with open(output_file) as f:
+            result = json.load(f)
+        
+        # Should use TP size (8) when PREFILL_GPUS and DECODE_GPUS are empty
+        assert result['output_tput_per_gpu'] == 3000.0 / 8
+        assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8
+        
+    finally:
+        os.chdir(original_dir)
+
+
+def test_different_tp_sizes(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test processing with different TP sizes."""
+    test_cases = [
+        ('1', 1),
+        ('2', 2),
+        ('4', 4),
+        ('8', 8),
+        ('16', 16)
+    ]
+    
+    for tp_str, tp_int in test_cases:
+        env_vars = basic_env_vars.copy()
+        env_vars['TP'] = tp_str
+        
+        result_file = tmp_path / 'test_result.json'
+        with open(result_file, 'w') as f:
+            json.dump(sample_benchmark_result, f)
+        
+        original_dir = os.getcwd()
+        os.chdir(tmp_path)
+        
+        try:
+            with patch.dict(os.environ, env_vars):
+                exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            
+            output_file = tmp_path / 'agg_test_result.json'
+            with open(output_file) as f:
+                result = json.load(f)
+            
+            assert result['tp'] == tp_int
+            assert result['tput_per_gpu'] == 10000.0 / tp_int
+            
+        finally:
+            os.chdir(original_dir)
+
+
+def test_different_ep_sizes(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test processing with different EP sizes."""
+    test_cases = [1, 2, 4, 8]
+    
+    for ep_size in test_cases:
+        env_vars = basic_env_vars.copy()
+        env_vars['EP_SIZE'] = str(ep_size)
+        
+        result_file = tmp_path / 'test_result.json'
+        with open(result_file, 'w') as f:
+            json.dump(sample_benchmark_result, f)
+        
+        original_dir = os.getcwd()
+        os.chdir(tmp_path)
+        
+        try:
+            with patch.dict(os.environ, env_vars):
+                exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            
+            output_file = tmp_path / 'agg_test_result.json'
+            with open(output_file) as f:
+                result = json.load(f)
+            
+            assert result['ep'] == ep_size
+            
+        finally:
+            os.chdir(original_dir)
+
+
+def test_output_file_content_structure(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test that output file has the expected structure."""
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(sample_benchmark_result, f)
+    
+    original_dir = os.getcwd()
+    os.chdir(tmp_path)
+    
+    try:
+        with patch.dict(os.environ, basic_env_vars):
+            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+        
+        output_file = tmp_path / 'agg_test_result.json'
+        with open(output_file) as f:
+            result = json.load(f)
+        
+        # Check required fields exist
+        required_fields = [
+            'hw', 'tp', 'ep', 'dp_attention', 'conc', 'model',
+            'framework', 'precision', 'tput_per_gpu', 
+            'output_tput_per_gpu', 'input_tput_per_gpu'
+        ]
+        
+        for field in required_fields:
+            assert field in result, f"Missing required field: {field}"
+        
+    finally:
+        os.chdir(original_dir)
+
+
+def test_complex_benchmark_result(tmp_path, basic_env_vars):
+    """Test processing with a more complex benchmark result."""
+    complex_result = {
+        'max_concurrency': 16,
+        'model_id': 'meta-llama/Llama-3-405b',
+        'total_token_throughput': 50000.0,
+        'output_throughput': 15000.0,
+        'ttft_ms': 100.0,
+        'tpot_ms': 15.0,
+        'e2e_latency_ms': 2000.0,
+        'decode_tpot_ms': 12.0,
+        'prefill_tpot_ms': 18.0,
+        'p50_latency_ms': 1500.0,
+        'p90_latency_ms': 2500.0,
+        'p99_latency_ms': 3000.0
+    }
+    
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(complex_result, f)
+    
+    original_dir = os.getcwd()
+    os.chdir(tmp_path)
+    
+    try:
+        with patch.dict(os.environ, basic_env_vars):
+            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+        
+        output_file = tmp_path / 'agg_test_result.json'
+        with open(output_file) as f:
+            result = json.load(f)
+        
+        # Check all ms values were converted
+        assert result['ttft'] == 100.0 / 1000.0
+        assert result['tpot'] == 15.0 / 1000.0
+        assert result['e2e_latency'] == 2000.0 / 1000.0
+        assert result['p50_latency'] == 1500.0 / 1000.0
+        assert result['p90_latency'] == 2500.0 / 1000.0
+        assert result['p99_latency'] == 3000.0 / 1000.0
+        
+        # Check tpot to intvty conversions
+        assert 'intvty' in result
+        assert 'decode_intvty' in result
+        assert 'prefill_intvty' in result
+        
+    finally:
+        os.chdir(original_dir)
+
+
+def test_dp_attention_values(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test different DP_ATTENTION values."""
+    test_values = ['true', 'false', 'True', 'False']
+    
+    for dp_attn_value in test_values:
+        env_vars = basic_env_vars.copy()
+        env_vars['DP_ATTENTION'] = dp_attn_value
+        
+        result_file = tmp_path / 'test_result.json'
+        with open(result_file, 'w') as f:
+            json.dump(sample_benchmark_result, f)
+        
+        original_dir = os.getcwd()
+        os.chdir(tmp_path)
+        
+        try:
+            with patch.dict(os.environ, env_vars):
+                exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            
+            output_file = tmp_path / 'agg_test_result.json'
+            with open(output_file) as f:
+                result = json.load(f)
+            
+            assert result['dp_attention'] == dp_attn_value
+            
+        finally:
+            os.chdir(original_dir)
+
+
+def test_different_frameworks(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test different framework values."""
+    frameworks = ['vllm', 'trt', 'sglang', 'tensorrt-llm']
+    
+    for framework in frameworks:
+        env_vars = basic_env_vars.copy()
+        env_vars['FRAMEWORK'] = framework
+        
+        result_file = tmp_path / 'test_result.json'
+        with open(result_file, 'w') as f:
+            json.dump(sample_benchmark_result, f)
+        
+        original_dir = os.getcwd()
+        os.chdir(tmp_path)
+        
+        try:
+            with patch.dict(os.environ, env_vars):
+                exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            
+            output_file = tmp_path / 'agg_test_result.json'
+            with open(output_file) as f:
+                result = json.load(f)
+            
+            assert result['framework'] == framework
+            
+        finally:
+            os.chdir(original_dir)
+
+
+def test_different_precisions(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test different precision values."""
+    precisions = ['fp8', 'fp16', 'fp32', 'int8', 'int4']
+    
+    for precision in precisions:
+        env_vars = basic_env_vars.copy()
+        env_vars['PRECISION'] = precision
+        
+        result_file = tmp_path / 'test_result.json'
+        with open(result_file, 'w') as f:
+            json.dump(sample_benchmark_result, f)
+        
+        original_dir = os.getcwd()
+        os.chdir(tmp_path)
+        
+        try:
+            with patch.dict(os.environ, env_vars):
+                exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            
+            output_file = tmp_path / 'agg_test_result.json'
+            with open(output_file) as f:
+                result = json.load(f)
+            
+            assert result['precision'] == precision
+            
+        finally:
+            os.chdir(original_dir)
+
+
+def test_throughput_calculations(tmp_path, basic_env_vars):
+    """Test throughput calculations with various values."""
+    benchmark_result = {
+        'max_concurrency': 10,
+        'model_id': 'test/model',
+        'total_token_throughput': 24000.0,
+        'output_throughput': 8000.0
+    }
+    
+    env_vars = basic_env_vars.copy()
+    env_vars['TP'] = '4'
+    env_vars['PREFILL_GPUS'] = '2'
+    env_vars['DECODE_GPUS'] = '2'
+    
+    result_file = tmp_path / 'test_result.json'
+    with open(result_file, 'w') as f:
+        json.dump(benchmark_result, f)
+    
+    original_dir = os.getcwd()
+    os.chdir(tmp_path)
+    
+    try:
+        with patch.dict(os.environ, env_vars):
+            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+        
+        output_file = tmp_path / 'agg_test_result.json'
+        with open(output_file) as f:
+            result = json.load(f)
+        
+        # tput_per_gpu = total_token_throughput / tp_size
+        assert result['tput_per_gpu'] == 24000.0 / 4
+        
+        # output_tput_per_gpu = output_throughput / decode_gpus
+        assert result['output_tput_per_gpu'] == 8000.0 / 2
+        
+        # input_tput_per_gpu = (total_token_throughput - output_throughput) / prefill_gpus
+        assert result['input_tput_per_gpu'] == (24000.0 - 8000.0) / 2
+        
+    finally:
+        os.chdir(original_dir)
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

From 73b52a40f2faa854c7b289b6533005d3f345c9be Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 6 Nov 2025 22:22:37 +0000
Subject: [PATCH 2/5] Improve test_process_result.py: use dynamic paths and
 remove unused fixture

Co-authored-by: functionstackx <47992694+functionstackx@users.noreply.github.com>
---
 utils/test_process_result.py | 44 ++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/utils/test_process_result.py b/utils/test_process_result.py
index 165daf335..2d57d3fd0 100644
--- a/utils/test_process_result.py
+++ b/utils/test_process_result.py
@@ -6,6 +6,11 @@
 from unittest.mock import patch, mock_open
 
 
+# Get the path to process_result.py dynamically
+SCRIPT_DIR = Path(__file__).parent
+PROCESS_RESULT_PATH = SCRIPT_DIR / 'process_result.py'
+
+
 @pytest.fixture
 def sample_benchmark_result():
     """Sample benchmark result JSON data."""
@@ -39,15 +44,6 @@ def basic_env_vars():
     }
 
 
-@pytest.fixture
-def temp_result_file(tmp_path, sample_benchmark_result):
-    """Create a temporary benchmark result file."""
-    result_file = tmp_path / 'test_result.json'
-    with open(result_file, 'w') as f:
-        json.dump(sample_benchmark_result, f)
-    return result_file
-
-
 def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars):
     """Test basic processing of benchmark results."""
     # Create result file
@@ -62,7 +58,7 @@ def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars):
     try:
         with patch.dict(os.environ, basic_env_vars):
             # Import and execute the script
-            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            exec(open(PROCESS_RESULT_PATH).read())
         
         # Check output file was created
         output_file = tmp_path / 'agg_test_result.json'
@@ -109,7 +105,7 @@ def test_ms_to_seconds_conversion(tmp_path, basic_env_vars):
     
     try:
         with patch.dict(os.environ, basic_env_vars):
-            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            exec(open(PROCESS_RESULT_PATH).read())
         
         output_file = tmp_path / 'agg_test_result.json'
         with open(output_file) as f:
@@ -145,7 +141,7 @@ def test_tpot_to_intvty_conversion(tmp_path, basic_env_vars):
     
     try:
         with patch.dict(os.environ, basic_env_vars):
-            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            exec(open(PROCESS_RESULT_PATH).read())
         
         output_file = tmp_path / 'agg_test_result.json'
         with open(output_file) as f:
@@ -185,7 +181,7 @@ def test_mtp_mode_included(tmp_path, sample_benchmark_result, basic_env_vars):
     
     try:
         with patch.dict(os.environ, env_vars):
-            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            exec(open(PROCESS_RESULT_PATH).read())
         
         output_file = tmp_path / 'agg_test_result.json'
         with open(output_file) as f:
@@ -209,7 +205,7 @@ def test_mtp_mode_not_included(tmp_path, sample_benchmark_result, basic_env_vars
     
     try:
         with patch.dict(os.environ, basic_env_vars):
-            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            exec(open(PROCESS_RESULT_PATH).read())
         
         output_file = tmp_path / 'agg_test_result.json'
         with open(output_file) as f:
@@ -236,7 +232,7 @@ def test_prefill_decode_gpus_explicit(tmp_path, sample_benchmark_result, basic_e
     
     try:
         with patch.dict(os.environ, env_vars):
-            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            exec(open(PROCESS_RESULT_PATH).read())
         
         output_file = tmp_path / 'agg_test_result.json'
         with open(output_file) as f:
@@ -262,7 +258,7 @@ def test_prefill_decode_gpus_defaults_to_tp(tmp_path, sample_benchmark_result, b
     
     try:
         with patch.dict(os.environ, basic_env_vars):
-            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            exec(open(PROCESS_RESULT_PATH).read())
         
         output_file = tmp_path / 'agg_test_result.json'
         with open(output_file) as f:
@@ -299,7 +295,7 @@ def test_different_tp_sizes(tmp_path, sample_benchmark_result, basic_env_vars):
         
         try:
             with patch.dict(os.environ, env_vars):
-                exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+                exec(open(PROCESS_RESULT_PATH).read())
             
             output_file = tmp_path / 'agg_test_result.json'
             with open(output_file) as f:
@@ -329,7 +325,7 @@ def test_different_ep_sizes(tmp_path, sample_benchmark_result, basic_env_vars):
         
         try:
             with patch.dict(os.environ, env_vars):
-                exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+                exec(open(PROCESS_RESULT_PATH).read())
             
             output_file = tmp_path / 'agg_test_result.json'
             with open(output_file) as f:
@@ -352,7 +348,7 @@ def test_output_file_content_structure(tmp_path, sample_benchmark_result, basic_
     
     try:
         with patch.dict(os.environ, basic_env_vars):
-            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            exec(open(PROCESS_RESULT_PATH).read())
         
         output_file = tmp_path / 'agg_test_result.json'
         with open(output_file) as f:
@@ -398,7 +394,7 @@ def test_complex_benchmark_result(tmp_path, basic_env_vars):
     
     try:
         with patch.dict(os.environ, basic_env_vars):
-            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            exec(open(PROCESS_RESULT_PATH).read())
         
         output_file = tmp_path / 'agg_test_result.json'
         with open(output_file) as f:
@@ -438,7 +434,7 @@ def test_dp_attention_values(tmp_path, sample_benchmark_result, basic_env_vars):
         
         try:
             with patch.dict(os.environ, env_vars):
-                exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+                exec(open(PROCESS_RESULT_PATH).read())
             
             output_file = tmp_path / 'agg_test_result.json'
             with open(output_file) as f:
@@ -467,7 +463,7 @@ def test_different_frameworks(tmp_path, sample_benchmark_result, basic_env_vars)
         
         try:
             with patch.dict(os.environ, env_vars):
-                exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+                exec(open(PROCESS_RESULT_PATH).read())
             
             output_file = tmp_path / 'agg_test_result.json'
             with open(output_file) as f:
@@ -496,7 +492,7 @@ def test_different_precisions(tmp_path, sample_benchmark_result, basic_env_vars)
         
         try:
             with patch.dict(os.environ, env_vars):
-                exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+                exec(open(PROCESS_RESULT_PATH).read())
             
             output_file = tmp_path / 'agg_test_result.json'
             with open(output_file) as f:
@@ -531,7 +527,7 @@ def test_throughput_calculations(tmp_path, basic_env_vars):
     
     try:
         with patch.dict(os.environ, env_vars):
-            exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read())
+            exec(open(PROCESS_RESULT_PATH).read())
         
         output_file = tmp_path / 'agg_test_result.json'
         with open(output_file) as f:

From a204048948fb1849addfca0175c1240fdc01c56f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 6 Nov 2025 22:39:10 +0000
Subject: [PATCH 3/5] Consolidate pytest workflows into test-utils.yml

Co-authored-by: functionstackx <47992694+functionstackx@users.noreply.github.com>
---
 .github/workflows/test-matrix-logic.yml       | 32 -------------------
 ...tils-process-result.yml => test-utils.yml} | 21 ++++++------
 2 files changed, 12 insertions(+), 41 deletions(-)
 delete mode 100644 .github/workflows/test-matrix-logic.yml
 rename .github/workflows/{test-utils-process-result.yml => test-utils.yml} (61%)

diff --git a/.github/workflows/test-matrix-logic.yml b/.github/workflows/test-matrix-logic.yml
deleted file mode 100644
index 2d54f5e17..000000000
--- a/.github/workflows/test-matrix-logic.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-name: Test Matrix Logic
-
-on:
-  pull_request:
-    paths:
-      - 'utils/matrix-logic/**'
-
-jobs:
-  test:
-    if: github.event.pull_request.draft != true
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-    
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v5
-
-      - name: Set up Python
-        uses: actions/setup-python@v6
-        with:
-          python-version: '3.12'
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install pytest pydantic pyyaml
-
-      - name: Run pytest
-        run: |
-          cd utils/matrix-logic
-          pytest test_generate_sweep_configs.py -v
diff --git a/.github/workflows/test-utils-process-result.yml b/.github/workflows/test-utils.yml
similarity index 61%
rename from .github/workflows/test-utils-process-result.yml
rename to .github/workflows/test-utils.yml
index ea84e06bd..348329b2c 100644
--- a/.github/workflows/test-utils-process-result.yml
+++ b/.github/workflows/test-utils.yml
@@ -1,18 +1,16 @@
-name: Test Utils Process Result
+name: Test Utils
 
 on:
   pull_request:
     paths:
-      - 'utils/process_result.py'
-      - 'utils/test_process_result.py'
-      - '.github/workflows/test-utils-process-result.yml'
+      - 'utils/**/*.py'
+      - '.github/workflows/test-utils.yml'
   push:
     branches:
       - main
     paths:
-      - 'utils/process_result.py'
-      - 'utils/test_process_result.py'
-      - '.github/workflows/test-utils-process-result.yml'
+      - 'utils/**/*.py'
+      - '.github/workflows/test-utils.yml'
 
 jobs:
   test:
@@ -33,9 +31,14 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install pytest
+          pip install pytest pydantic pyyaml
 
-      - name: Run pytest
+      - name: Run pytest for matrix-logic
+        run: |
+          cd utils/matrix-logic
+          pytest test_generate_sweep_configs.py -v
+
+      - name: Run pytest for process_result
         run: |
           cd utils
           pytest test_process_result.py -v

From 88415bc9fea6bff7cb405f5180e07a52872073f0 Mon Sep 17 00:00:00 2001
From: functionstackx <47992694+functionstackx@users.noreply.github.com>
Date: Thu, 6 Nov 2025 20:11:25 -0500
Subject: [PATCH 4/5] Update utils/test_process_result.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 utils/test_process_result.py | 60 +++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 31 deletions(-)

diff --git a/utils/test_process_result.py b/utils/test_process_result.py
index 2d57d3fd0..1babba5a9 100644
--- a/utils/test_process_result.py
+++ b/utils/test_process_result.py
@@ -44,46 +44,44 @@ def basic_env_vars():
     }
 
 
-def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars):
-    """Test basic processing of benchmark results."""
-    # Create result file
-    result_file = tmp_path / 'test_result.json'
+def run_process_result_script(tmp_path, result_data, env_vars, result_filename='test_result.json'):
+    """Helper to create result file, change directory, execute script, and clean up."""
+    result_file = tmp_path / result_filename
     with open(result_file, 'w') as f:
-        json.dump(sample_benchmark_result, f)
-    
-    # Change to tmp_path directory
+        json.dump(result_data, f)
     original_dir = os.getcwd()
     os.chdir(tmp_path)
-    
     try:
-        with patch.dict(os.environ, basic_env_vars):
-            # Import and execute the script
+        with patch.dict(os.environ, env_vars):
             exec(open(PROCESS_RESULT_PATH).read())
-        
-        # Check output file was created
-        output_file = tmp_path / 'agg_test_result.json'
-        assert output_file.exists()
-        
-        # Load and verify output
-        with open(output_file) as f:
-            result = json.load(f)
-        
-        assert result['hw'] == 'h200'
-        assert result['tp'] == 8
-        assert result['ep'] == 1
-        assert result['dp_attention'] == 'false'
-        assert result['conc'] == 8
-        assert result['model'] == 'meta-llama/Llama-3-70b'
-        assert result['framework'] == 'vllm'
-        assert result['precision'] == 'fp8'
-        assert result['tput_per_gpu'] == 10000.0 / 8
-        assert result['output_tput_per_gpu'] == 3000.0 / 8
-        assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8
-        
+        output_file = tmp_path / f'agg_{Path(result_filename).stem}.json'
+        return output_file
     finally:
         os.chdir(original_dir)
 
 
+def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars):
+    """Test basic processing of benchmark results."""
+    output_file = run_process_result_script(
+        tmp_path,
+        sample_benchmark_result,
+        basic_env_vars,
+        result_filename='test_result.json'
+    )
+    assert output_file.exists()
+    with open(output_file) as f:
+        result = json.load(f)
+    assert result['hw'] == 'h200'
+    assert result['tp'] == 8
+    assert result['ep'] == 1
+    assert result['dp_attention'] == 'false'
+    assert result['conc'] == 8
+    assert result['model'] == 'meta-llama/Llama-3-70b'
+    assert result['framework'] == 'vllm'
+    assert result['precision'] == 'fp8'
+    assert result['tput_per_gpu'] == 10000.0 / 8
+    assert result['output_tput_per_gpu'] == 3000.0 / 8
+    assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8
 def test_ms_to_seconds_conversion(tmp_path, basic_env_vars):
     """Test conversion of millisecond values to seconds."""
     benchmark_result = {

From f0353fc337233046bda066f49eb83b246feb3a52 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 7 Nov 2025 01:16:21 +0000
Subject: [PATCH 5/5] Refactor process_result.py to use functions instead of
 exec()

Co-authored-by: functionstackx <47992694+functionstackx@users.noreply.github.com>
---
 utils/process_result.py      | 105 ++++----
 utils/test_process_result.py | 459 ++++++++---------------------------
 2 files changed, 170 insertions(+), 394 deletions(-)

diff --git a/utils/process_result.py b/utils/process_result.py
index bfe6060ce..3068a7d8c 100644
--- a/utils/process_result.py
+++ b/utils/process_result.py
@@ -4,48 +4,71 @@
 from pathlib import Path
 
 
-hw = os.environ.get('RUNNER_TYPE')
-tp_size = int(os.environ.get('TP'))
-ep_size = int(os.environ.get('EP_SIZE'))
-prefill_gpus_str = os.environ.get('PREFILL_GPUS', '')
-decode_gpus_str = os.environ.get('DECODE_GPUS', '')
+def process_benchmark_result(bmk_result, env_vars):
+    """
+    Process benchmark results and generate aggregated metrics.
+    
+    Args:
+        bmk_result: Dictionary containing benchmark results
+        env_vars: Dictionary containing environment variables
+        
+    Returns:
+        Dictionary containing processed metrics
+    """
+    hw = env_vars.get('RUNNER_TYPE')
+    tp_size = int(env_vars.get('TP'))
+    ep_size = int(env_vars.get('EP_SIZE'))
+    prefill_gpus_str = env_vars.get('PREFILL_GPUS', '')
+    decode_gpus_str = env_vars.get('DECODE_GPUS', '')
+    
+    # If empty string (aggregated runs), assign to tp_size (total gpus), otherwise convert to int
+    prefill_gpus = tp_size if not prefill_gpus_str else int(prefill_gpus_str)
+    decode_gpus = tp_size if not decode_gpus_str else int(decode_gpus_str)
+    dp_attention = env_vars.get('DP_ATTENTION')
+    framework = env_vars.get('FRAMEWORK')
+    precision = env_vars.get('PRECISION')
+    mtp_mode = env_vars.get('MTP_MODE')
+    
+    data = {
+        'hw': hw,
+        'tp': tp_size,
+        'ep': ep_size,
+        'dp_attention': dp_attention,  # true or false
+        'conc': int(bmk_result['max_concurrency']),
+        'model': bmk_result['model_id'],
+        'framework': framework,
+        'precision': precision,
+        'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size,
+        'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus,
+        'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput'])) / prefill_gpus
+    }
+    
+    if mtp_mode:  # MTP
+        data['mtp'] = mtp_mode
+    
+    for key, value in bmk_result.items():
+        if key.endswith('ms'):
+            data[key.replace('_ms', '')] = float(value) / 1000.0
+        if 'tpot' in key:
+            data[key.replace('_ms', '').replace('tpot', 'intvty')] = 1000.0 / float(value)
+    
+    return data
 
-# If empty string (aggregated runs), assign to tp_size (total gpus), otherwise convert to int
-prefill_gpus = tp_size if not prefill_gpus_str else int(prefill_gpus_str)
-decode_gpus = tp_size if not decode_gpus_str else int(decode_gpus_str)
-dp_attention = os.environ.get('DP_ATTENTION')
-result_filename = os.environ.get('RESULT_FILENAME')
-framework = os.environ.get('FRAMEWORK')
-precision = os.environ.get('PRECISION')
-mtp_mode = os.environ.get('MTP_MODE')
 
-with open(f'{result_filename}.json') as f:
-    bmk_result = json.load(f)
+def main():
+    """Main function to process benchmark results from environment variables."""
+    result_filename = os.environ.get('RESULT_FILENAME')
+    
+    with open(f'{result_filename}.json') as f:
+        bmk_result = json.load(f)
+    
+    data = process_benchmark_result(bmk_result, os.environ)
+    
+    print(json.dumps(data, indent=2))
+    
+    with open(f'agg_{result_filename}.json', 'w') as f:
+        json.dump(data, f, indent=2)
 
-data = {
-    'hw': hw,
-    'tp': tp_size,
-    'ep': ep_size,
-    'dp_attention': dp_attention, # true or false
-    'conc': int(bmk_result['max_concurrency']),
-    'model': bmk_result['model_id'],
-    'framework': framework,
-    'precision': precision,
-    'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size,
-    'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus,
-    'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput']) )/ prefill_gpus
-}
 
-if mtp_mode:  # MTP
-    data['mtp'] = mtp_mode
-
-for key, value in bmk_result.items():
-    if key.endswith('ms'):
-        data[key.replace('_ms', '')] = float(value) / 1000.0
-    if 'tpot' in key:
-        data[key.replace('_ms', '').replace('tpot', 'intvty')] = 1000.0 / float(value)
-
-print(json.dumps(data, indent=2))
-
-with open(f'agg_{result_filename}.json', 'w') as f:
-    json.dump(data, f, indent=2)
+if __name__ == '__main__':
+    main()
diff --git a/utils/test_process_result.py b/utils/test_process_result.py
index 1babba5a9..cd7a5e072 100644
--- a/utils/test_process_result.py
+++ b/utils/test_process_result.py
@@ -3,12 +3,10 @@
 import os
 import sys
 from pathlib import Path
-from unittest.mock import patch, mock_open
 
-
-# Get the path to process_result.py dynamically
-SCRIPT_DIR = Path(__file__).parent
-PROCESS_RESULT_PATH = SCRIPT_DIR / 'process_result.py'
+# Import the function to test
+sys.path.insert(0, str(Path(__file__).parent))
+from process_result import process_benchmark_result
 
 
 @pytest.fixture
@@ -44,33 +42,10 @@ def basic_env_vars():
     }
 
 
-def run_process_result_script(tmp_path, result_data, env_vars, result_filename='test_result.json'):
-    """Helper to create result file, change directory, execute script, and clean up."""
-    result_file = tmp_path / result_filename
-    with open(result_file, 'w') as f:
-        json.dump(result_data, f)
-    original_dir = os.getcwd()
-    os.chdir(tmp_path)
-    try:
-        with patch.dict(os.environ, env_vars):
-            exec(open(PROCESS_RESULT_PATH).read())
-        output_file = tmp_path / f'agg_{Path(result_filename).stem}.json'
-        return output_file
-    finally:
-        os.chdir(original_dir)
-
-
-def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars):
+def test_basic_processing(sample_benchmark_result, basic_env_vars):
     """Test basic processing of benchmark results."""
-    output_file = run_process_result_script(
-        tmp_path,
-        sample_benchmark_result,
-        basic_env_vars,
-        result_filename='test_result.json'
-    )
-    assert output_file.exists()
-    with open(output_file) as f:
-        result = json.load(f)
+    result = process_benchmark_result(sample_benchmark_result, basic_env_vars)
+    
     assert result['hw'] == 'h200'
     assert result['tp'] == 8
     assert result['ep'] == 1
@@ -82,7 +57,9 @@ def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars):
     assert result['tput_per_gpu'] == 10000.0 / 8
     assert result['output_tput_per_gpu'] == 3000.0 / 8
     assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8
-def test_ms_to_seconds_conversion(tmp_path, basic_env_vars):
+
+
+def test_ms_to_seconds_conversion(basic_env_vars):
     """Test conversion of millisecond values to seconds."""
     benchmark_result = {
         'max_concurrency': 4,
@@ -94,31 +71,15 @@ def test_ms_to_seconds_conversion(tmp_path, basic_env_vars):
         'decode_latency_ms': 500.0
     }
     
-    result_file = tmp_path / 'test_result.json'
-    with open(result_file, 'w') as f:
-        json.dump(benchmark_result, f)
-    
-    original_dir = os.getcwd()
-    os.chdir(tmp_path)
+    result = process_benchmark_result(benchmark_result, basic_env_vars)
     
-    try:
-        with patch.dict(os.environ, basic_env_vars):
-            exec(open(PROCESS_RESULT_PATH).read())
-        
-        output_file = tmp_path / 'agg_test_result.json'
-        with open(output_file) as f:
-            result = json.load(f)
-        
-        # Check ms values were converted to seconds
-        assert result['ttft'] == 200.0 / 1000.0
-        assert result['e2e_latency'] == 1000.0 / 1000.0
-        assert result['decode_latency'] == 500.0 / 1000.0
-        
-    finally:
-        os.chdir(original_dir)
+    # Check ms values were converted to seconds
+    assert result['ttft'] == 200.0 / 1000.0
+    assert result['e2e_latency'] == 1000.0 / 1000.0
+    assert result['decode_latency'] == 500.0 / 1000.0
 
 
-def test_tpot_to_intvty_conversion(tmp_path, basic_env_vars):
+def test_tpot_to_intvty_conversion(basic_env_vars):
     """Test conversion of tpot (time per output token) to intvty (interactivity/throughput)."""
     benchmark_result = {
         'max_concurrency': 2,
@@ -130,147 +91,67 @@ def test_tpot_to_intvty_conversion(tmp_path, basic_env_vars):
         'prefill_tpot_ms': 30.0
     }
     
-    result_file = tmp_path / 'test_result.json'
-    with open(result_file, 'w') as f:
-        json.dump(benchmark_result, f)
+    result = process_benchmark_result(benchmark_result, basic_env_vars)
     
-    original_dir = os.getcwd()
-    os.chdir(tmp_path)
+    # Check tpot values were converted to intvty
+    # The logic: if 'tpot' in key, convert ms value and then intvty = 1000.0 / tpot_ms
+    # So: tpot_ms: 25.0 -> tpot: 0.025 (ms to s), intvty: 1000.0/25.0 = 40.0
+    assert result['tpot'] == 25.0 / 1000.0  # Converted from ms to s
+    assert result['intvty'] == 1000.0 / 25.0  # intvty = 1000.0 / tpot_ms
     
-    try:
-        with patch.dict(os.environ, basic_env_vars):
-            exec(open(PROCESS_RESULT_PATH).read())
-        
-        output_file = tmp_path / 'agg_test_result.json'
-        with open(output_file) as f:
-            result = json.load(f)
-        
-        # Check tpot values were converted to intvty
-        # The logic: if 'tpot' in key, convert ms value and then intvty = 1000.0 / tpot_ms
-        # So: tpot_ms: 25.0 -> tpot: 0.025 (ms to s), intvty: 1000.0/25.0 = 40.0
-        assert result['tpot'] == 25.0 / 1000.0  # Converted from ms to s
-        assert result['intvty'] == 1000.0 / 25.0  # intvty = 1000.0 / tpot_ms
-        
-        assert result['decode_tpot'] == 20.0 / 1000.0
-        assert result['decode_intvty'] == 1000.0 / 20.0
-        
-        assert result['prefill_tpot'] == 30.0 / 1000.0
-        assert result['prefill_intvty'] == 1000.0 / 30.0
-        
-        # Check that the intvty calculation is correct
-        assert 'decode_intvty' in result
-        assert 'prefill_intvty' in result
-        
-    finally:
-        os.chdir(original_dir)
+    assert result['decode_tpot'] == 20.0 / 1000.0
+    assert result['decode_intvty'] == 1000.0 / 20.0
+    
+    assert result['prefill_tpot'] == 30.0 / 1000.0
+    assert result['prefill_intvty'] == 1000.0 / 30.0
+    
+    # Check that the intvty calculation is correct
+    assert 'decode_intvty' in result
+    assert 'prefill_intvty' in result
 
 
-def test_mtp_mode_included(tmp_path, sample_benchmark_result, basic_env_vars):
+def test_mtp_mode_included(sample_benchmark_result, basic_env_vars):
     """Test that MTP mode is included when set."""
     env_vars = basic_env_vars.copy()
     env_vars['MTP_MODE'] = 'disaggregated'
     
-    result_file = tmp_path / 'test_result.json'
-    with open(result_file, 'w') as f:
-        json.dump(sample_benchmark_result, f)
-    
-    original_dir = os.getcwd()
-    os.chdir(tmp_path)
+    result = process_benchmark_result(sample_benchmark_result, env_vars)
     
-    try:
-        with patch.dict(os.environ, env_vars):
-            exec(open(PROCESS_RESULT_PATH).read())
-        
-        output_file = tmp_path / 'agg_test_result.json'
-        with open(output_file) as f:
-            result = json.load(f)
-        
-        assert 'mtp' in result
-        assert result['mtp'] == 'disaggregated'
-        
-    finally:
-        os.chdir(original_dir)
+    assert 'mtp' in result
+    assert result['mtp'] == 'disaggregated'
 
 
-def test_mtp_mode_not_included(tmp_path, sample_benchmark_result, basic_env_vars):
+def test_mtp_mode_not_included(sample_benchmark_result, basic_env_vars):
     """Test that MTP mode is not included when not set."""
-    result_file = tmp_path / 'test_result.json'
-    with open(result_file, 'w') as f:
-        json.dump(sample_benchmark_result, f)
-    
-    original_dir = os.getcwd()
-    os.chdir(tmp_path)
+    result = process_benchmark_result(sample_benchmark_result, basic_env_vars)
     
-    try:
-        with patch.dict(os.environ, basic_env_vars):
-            exec(open(PROCESS_RESULT_PATH).read())
-        
-        output_file = tmp_path / 'agg_test_result.json'
-        with open(output_file) as f:
-            result = json.load(f)
-        
-        assert 'mtp' not in result
-        
-    finally:
-        os.chdir(original_dir)
+    assert 'mtp' not in result
 
 
-def test_prefill_decode_gpus_explicit(tmp_path, sample_benchmark_result, basic_env_vars):
+def test_prefill_decode_gpus_explicit(sample_benchmark_result, basic_env_vars):
     """Test explicit prefill and decode GPU counts."""
     env_vars = basic_env_vars.copy()
     env_vars['PREFILL_GPUS'] = '4'
     env_vars['DECODE_GPUS'] = '4'
     
-    result_file = tmp_path / 'test_result.json'
-    with open(result_file, 'w') as f:
-        json.dump(sample_benchmark_result, f)
-    
-    original_dir = os.getcwd()
-    os.chdir(tmp_path)
+    result = process_benchmark_result(sample_benchmark_result, env_vars)
     
-    try:
-        with patch.dict(os.environ, env_vars):
-            exec(open(PROCESS_RESULT_PATH).read())
-        
-        output_file = tmp_path / 'agg_test_result.json'
-        with open(output_file) as f:
-            result = json.load(f)
-        
-        # With explicit GPU counts
-        assert result['output_tput_per_gpu'] == 3000.0 / 4
-        assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 4
-        
-    finally:
-        os.chdir(original_dir)
+    # With explicit GPU counts
+    assert result['output_tput_per_gpu'] == 3000.0 / 4
+    assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 4
 
 
-def test_prefill_decode_gpus_defaults_to_tp(tmp_path, sample_benchmark_result, basic_env_vars):
+def test_prefill_decode_gpus_defaults_to_tp(sample_benchmark_result, basic_env_vars):
     """Test that prefill/decode GPUs default to TP size when not specified."""
     # Default env vars have empty strings for PREFILL_GPUS and DECODE_GPUS
-    result_file = tmp_path / 'test_result.json'
-    with open(result_file, 'w') as f:
-        json.dump(sample_benchmark_result, f)
-    
-    original_dir = os.getcwd()
-    os.chdir(tmp_path)
+    result = process_benchmark_result(sample_benchmark_result, basic_env_vars)
     
-    try:
-        with patch.dict(os.environ, basic_env_vars):
-            exec(open(PROCESS_RESULT_PATH).read())
-        
-        output_file = tmp_path / 'agg_test_result.json'
-        with open(output_file) as f:
-            result = json.load(f)
-        
-        # Should use TP size (8) when PREFILL_GPUS and DECODE_GPUS are empty
-        assert result['output_tput_per_gpu'] == 3000.0 / 8
-        assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8
-        
-    finally:
-        os.chdir(original_dir)
+    # Should use TP size (8) when PREFILL_GPUS and DECODE_GPUS are empty
+    assert result['output_tput_per_gpu'] == 3000.0 / 8
+    assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8
 
 
-def test_different_tp_sizes(tmp_path, sample_benchmark_result, basic_env_vars):
+def test_different_tp_sizes(sample_benchmark_result, basic_env_vars):
     """Test processing with different TP sizes."""
     test_cases = [
         ('1', 1),
@@ -284,29 +165,13 @@ def test_different_tp_sizes(tmp_path, sample_benchmark_result, basic_env_vars):
         env_vars = basic_env_vars.copy()
         env_vars['TP'] = tp_str
         
-        result_file = tmp_path / 'test_result.json'
-        with open(result_file, 'w') as f:
-            json.dump(sample_benchmark_result, f)
+        result = process_benchmark_result(sample_benchmark_result, env_vars)
         
-        original_dir = os.getcwd()
-        os.chdir(tmp_path)
-        
-        try:
-            with patch.dict(os.environ, env_vars):
-                exec(open(PROCESS_RESULT_PATH).read())
-            
-            output_file = tmp_path / 'agg_test_result.json'
-            with open(output_file) as f:
-                result = json.load(f)
-            
-            assert result['tp'] == tp_int
-            assert result['tput_per_gpu'] == 10000.0 / tp_int
-            
-        finally:
-            os.chdir(original_dir)
-
-
-def test_different_ep_sizes(tmp_path, sample_benchmark_result, basic_env_vars):
+        assert result['tp'] == tp_int
+        assert result['tput_per_gpu'] == 10000.0 / tp_int
+
+
+def test_different_ep_sizes(sample_benchmark_result, basic_env_vars):
     """Test processing with different EP sizes."""
     test_cases = [1, 2, 4, 8]
     
@@ -314,59 +179,27 @@ def test_different_ep_sizes(tmp_path, sample_benchmark_result, basic_env_vars):
         env_vars = basic_env_vars.copy()
         env_vars['EP_SIZE'] = str(ep_size)
         
-        result_file = tmp_path / 'test_result.json'
-        with open(result_file, 'w') as f:
-            json.dump(sample_benchmark_result, f)
-        
-        original_dir = os.getcwd()
-        os.chdir(tmp_path)
+        result = process_benchmark_result(sample_benchmark_result, env_vars)
         
-        try:
-            with patch.dict(os.environ, env_vars):
-                exec(open(PROCESS_RESULT_PATH).read())
-            
-            output_file = tmp_path / 'agg_test_result.json'
-            with open(output_file) as f:
-                result = json.load(f)
-            
-            assert result['ep'] == ep_size
-            
-        finally:
-            os.chdir(original_dir)
-
-
-def test_output_file_content_structure(tmp_path, sample_benchmark_result, basic_env_vars):
-    """Test that output file has the expected structure."""
-    result_file = tmp_path / 'test_result.json'
-    with open(result_file, 'w') as f:
-        json.dump(sample_benchmark_result, f)
+        assert result['ep'] == ep_size
+
+
+def test_output_file_content_structure(sample_benchmark_result, basic_env_vars):
+    """Test that output has the expected structure."""
+    result = process_benchmark_result(sample_benchmark_result, basic_env_vars)
     
-    original_dir = os.getcwd()
-    os.chdir(tmp_path)
+    # Check required fields exist
+    required_fields = [
+        'hw', 'tp', 'ep', 'dp_attention', 'conc', 'model',
+        'framework', 'precision', 'tput_per_gpu', 
+        'output_tput_per_gpu', 'input_tput_per_gpu'
+    ]
     
-    try:
-        with patch.dict(os.environ, basic_env_vars):
-            exec(open(PROCESS_RESULT_PATH).read())
-        
-        output_file = tmp_path / 'agg_test_result.json'
-        with open(output_file) as f:
-            result = json.load(f)
-        
-        # Check required fields exist
-        required_fields = [
-            'hw', 'tp', 'ep', 'dp_attention', 'conc', 'model',
-            'framework', 'precision', 'tput_per_gpu', 
-            'output_tput_per_gpu', 'input_tput_per_gpu'
-        ]
-        
-        for field in required_fields:
-            assert field in result, f"Missing required field: {field}"
-        
-    finally:
-        os.chdir(original_dir)
+    for field in required_fields:
+        assert field in result, f"Missing required field: {field}"
 
 
-def test_complex_benchmark_result(tmp_path, basic_env_vars):
+def test_complex_benchmark_result(basic_env_vars):
     """Test processing with a more complex benchmark result."""
     complex_result = {
         'max_concurrency': 16,
@@ -383,39 +216,23 @@ def test_complex_benchmark_result(tmp_path, basic_env_vars):
         'p99_latency_ms': 3000.0
     }
     
-    result_file = tmp_path / 'test_result.json'
-    with open(result_file, 'w') as f:
-        json.dump(complex_result, f)
+    result = process_benchmark_result(complex_result, basic_env_vars)
     
-    original_dir = os.getcwd()
-    os.chdir(tmp_path)
+    # Check all ms values were converted
+    assert result['ttft'] == 100.0 / 1000.0
+    assert result['tpot'] == 15.0 / 1000.0
+    assert result['e2e_latency'] == 2000.0 / 1000.0
+    assert result['p50_latency'] == 1500.0 / 1000.0
+    assert result['p90_latency'] == 2500.0 / 1000.0
+    assert result['p99_latency'] == 3000.0 / 1000.0
     
-    try:
-        with patch.dict(os.environ, basic_env_vars):
-            exec(open(PROCESS_RESULT_PATH).read())
-        
-        output_file = tmp_path / 'agg_test_result.json'
-        with open(output_file) as f:
-            result = json.load(f)
-        
-        # Check all ms values were converted
-        assert result['ttft'] == 100.0 / 1000.0
-        assert result['tpot'] == 15.0 / 1000.0
-        assert result['e2e_latency'] == 2000.0 / 1000.0
-        assert result['p50_latency'] == 1500.0 / 1000.0
-        assert result['p90_latency'] == 2500.0 / 1000.0
-        assert result['p99_latency'] == 3000.0 / 1000.0
-        
-        # Check tpot to intvty conversions
-        assert 'intvty' in result
-        assert 'decode_intvty' in result
-        assert 'prefill_intvty' in result
-        
-    finally:
-        os.chdir(original_dir)
+    # Check tpot to intvty conversions
+    assert 'intvty' in result
+    assert 'decode_intvty' in result
+    assert 'prefill_intvty' in result
 
 
-def test_dp_attention_values(tmp_path, sample_benchmark_result, basic_env_vars):
+def test_dp_attention_values(sample_benchmark_result, basic_env_vars):
     """Test different DP_ATTENTION values."""
     test_values = ['true', 'false', 'True', 'False']
     
@@ -423,28 +240,12 @@ def test_dp_attention_values(tmp_path, sample_benchmark_result, basic_env_vars):
         env_vars = basic_env_vars.copy()
         env_vars['DP_ATTENTION'] = dp_attn_value
         
-        result_file = tmp_path / 'test_result.json'
-        with open(result_file, 'w') as f:
-            json.dump(sample_benchmark_result, f)
-        
-        original_dir = os.getcwd()
-        os.chdir(tmp_path)
+        result = process_benchmark_result(sample_benchmark_result, env_vars)
         
-        try:
-            with patch.dict(os.environ, env_vars):
-                exec(open(PROCESS_RESULT_PATH).read())
-            
-            output_file = tmp_path / 'agg_test_result.json'
-            with open(output_file) as f:
-                result = json.load(f)
-            
-            assert result['dp_attention'] == dp_attn_value
-            
-        finally:
-            os.chdir(original_dir)
-
-
-def test_different_frameworks(tmp_path, sample_benchmark_result, basic_env_vars):
+        assert result['dp_attention'] == dp_attn_value
+
+
+def test_different_frameworks(sample_benchmark_result, basic_env_vars):
     """Test different framework values."""
     frameworks = ['vllm', 'trt', 'sglang', 'tensorrt-llm']
     
@@ -452,28 +253,12 @@ def test_different_frameworks(tmp_path, sample_benchmark_result, basic_env_vars)
         env_vars = basic_env_vars.copy()
         env_vars['FRAMEWORK'] = framework
         
-        result_file = tmp_path / 'test_result.json'
-        with open(result_file, 'w') as f:
-            json.dump(sample_benchmark_result, f)
+        result = process_benchmark_result(sample_benchmark_result, env_vars)
         
-        original_dir = os.getcwd()
-        os.chdir(tmp_path)
-        
-        try:
-            with patch.dict(os.environ, env_vars):
-                exec(open(PROCESS_RESULT_PATH).read())
-            
-            output_file = tmp_path / 'agg_test_result.json'
-            with open(output_file) as f:
-                result = json.load(f)
-            
-            assert result['framework'] == framework
-            
-        finally:
-            os.chdir(original_dir)
-
-
-def test_different_precisions(tmp_path, sample_benchmark_result, basic_env_vars):
+        assert result['framework'] == framework
+
+
+def test_different_precisions(sample_benchmark_result, basic_env_vars):
     """Test different precision values."""
     precisions = ['fp8', 'fp16', 'fp32', 'int8', 'int4']
     
@@ -481,28 +266,12 @@ def test_different_precisions(tmp_path, sample_benchmark_result, basic_env_vars)
         env_vars = basic_env_vars.copy()
         env_vars['PRECISION'] = precision
         
-        result_file = tmp_path / 'test_result.json'
-        with open(result_file, 'w') as f:
-            json.dump(sample_benchmark_result, f)
-        
-        original_dir = os.getcwd()
-        os.chdir(tmp_path)
+        result = process_benchmark_result(sample_benchmark_result, env_vars)
         
-        try:
-            with patch.dict(os.environ, env_vars):
-                exec(open(PROCESS_RESULT_PATH).read())
-            
-            output_file = tmp_path / 'agg_test_result.json'
-            with open(output_file) as f:
-                result = json.load(f)
-            
-            assert result['precision'] == precision
-            
-        finally:
-            os.chdir(original_dir)
-
-
-def test_throughput_calculations(tmp_path, basic_env_vars):
+        assert result['precision'] == precision
+
+
+def test_throughput_calculations(basic_env_vars):
     """Test throughput calculations with various values."""
     benchmark_result = {
         'max_concurrency': 10,
@@ -516,32 +285,16 @@ def test_throughput_calculations(tmp_path, basic_env_vars):
     env_vars['PREFILL_GPUS'] = '2'
     env_vars['DECODE_GPUS'] = '2'
     
-    result_file = tmp_path / 'test_result.json'
-    with open(result_file, 'w') as f:
-        json.dump(benchmark_result, f)
+    result = process_benchmark_result(benchmark_result, env_vars)
     
-    original_dir = os.getcwd()
-    os.chdir(tmp_path)
+    # tput_per_gpu = total_token_throughput / tp_size
+    assert result['tput_per_gpu'] == 24000.0 / 4
     
-    try:
-        with patch.dict(os.environ, env_vars):
-            exec(open(PROCESS_RESULT_PATH).read())
-        
-        output_file = tmp_path / 'agg_test_result.json'
-        with open(output_file) as f:
-            result = json.load(f)
-        
-        # tput_per_gpu = total_token_throughput / tp_size
-        assert result['tput_per_gpu'] == 24000.0 / 4
-        
-        # output_tput_per_gpu = output_throughput / decode_gpus
-        assert result['output_tput_per_gpu'] == 8000.0 / 2
-        
-        # input_tput_per_gpu = (total_token_throughput - output_throughput) / prefill_gpus
-        assert result['input_tput_per_gpu'] == (24000.0 - 8000.0) / 2
-        
-    finally:
-        os.chdir(original_dir)
+    # output_tput_per_gpu = output_throughput / decode_gpus
+    assert result['output_tput_per_gpu'] == 8000.0 / 2
+    
+    # input_tput_per_gpu = (total_token_throughput - output_throughput) / prefill_gpus
+    assert result['input_tput_per_gpu'] == (24000.0 - 8000.0) / 2
 
 
 if __name__ == "__main__":