diff --git a/.github/workflows/test-matrix-logic.yml b/.github/workflows/test-utils.yml
similarity index 54%
rename from .github/workflows/test-matrix-logic.yml
rename to .github/workflows/test-utils.yml
index 2d54f5e17..348329b2c 100644
--- a/.github/workflows/test-matrix-logic.yml
+++ b/.github/workflows/test-utils.yml
@@ -1,13 +1,20 @@
-name: Test Matrix Logic
+name: Test Utils
 
 on:
   pull_request:
     paths:
-      - 'utils/matrix-logic/**'
+      - 'utils/**/*.py'
+      - '.github/workflows/test-utils.yml'
+  push:
+    branches:
+      - main
+    paths:
+      - 'utils/**/*.py'
+      - '.github/workflows/test-utils.yml'
 
 jobs:
   test:
-    if: github.event.pull_request.draft != true
+    if: github.event_name != 'pull_request' || github.event.pull_request.draft != true
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -26,7 +33,12 @@ jobs:
           python -m pip install --upgrade pip
           pip install pytest pydantic pyyaml
 
-      - name: Run pytest
+      - name: Run pytest for matrix-logic
         run: |
           cd utils/matrix-logic
           pytest test_generate_sweep_configs.py -v
+
+      - name: Run pytest for process_result
+        run: |
+          cd utils
+          pytest test_process_result.py -v
diff --git a/.gitignore b/.gitignore
index 03d36472a..146afad17 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,7 @@
 **/__pycache__/**
-**/.coverage
\ No newline at end of file
+**/.coverage
+**/.pytest_cache/
+*.pyc
+*.pyo
+.coverage.*
+htmlcov/
\ No newline at end of file
diff --git a/utils/process_result.py b/utils/process_result.py
index bfe6060ce..3068a7d8c 100644
--- a/utils/process_result.py
+++ b/utils/process_result.py
@@ -4,48 +4,71 @@
 from pathlib import Path
 
 
-hw = os.environ.get('RUNNER_TYPE')
-tp_size = int(os.environ.get('TP'))
-ep_size = int(os.environ.get('EP_SIZE'))
-prefill_gpus_str = os.environ.get('PREFILL_GPUS', '')
-decode_gpus_str = os.environ.get('DECODE_GPUS', '')
+def process_benchmark_result(bmk_result, env_vars):
+    """
+    Process benchmark results and generate aggregated metrics.
+    
+    Args:
+        bmk_result: Dictionary containing benchmark results
+        env_vars: Dictionary containing environment variables
+        
+    Returns:
+        Dictionary containing processed metrics
+    """
+    hw = env_vars.get('RUNNER_TYPE')
+    tp_size = int(env_vars.get('TP'))
+    ep_size = int(env_vars.get('EP_SIZE'))
+    prefill_gpus_str = env_vars.get('PREFILL_GPUS', '')
+    decode_gpus_str = env_vars.get('DECODE_GPUS', '')
+    
+    # If empty string (aggregated runs), assign to tp_size (total gpus), otherwise convert to int
+    prefill_gpus = tp_size if not prefill_gpus_str else int(prefill_gpus_str)
+    decode_gpus = tp_size if not decode_gpus_str else int(decode_gpus_str)
+    dp_attention = env_vars.get('DP_ATTENTION')
+    framework = env_vars.get('FRAMEWORK')
+    precision = env_vars.get('PRECISION')
+    mtp_mode = env_vars.get('MTP_MODE')
+    
+    data = {
+        'hw': hw,
+        'tp': tp_size,
+        'ep': ep_size,
+        'dp_attention': dp_attention,  # true or false
+        'conc': int(bmk_result['max_concurrency']),
+        'model': bmk_result['model_id'],
+        'framework': framework,
+        'precision': precision,
+        'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size,
+        'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus,
+        'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput'])) / prefill_gpus
+    }
+    
+    if mtp_mode:  # MTP
+        data['mtp'] = mtp_mode
+    
+    for key, value in bmk_result.items():
+        if key.endswith('ms'):
+            data[key.replace('_ms', '')] = float(value) / 1000.0
+        if 'tpot' in key:
+            data[key.replace('_ms', '').replace('tpot', 'intvty')] = 1000.0 / float(value)
+    
+    return data
 
-# If empty string (aggregated runs), assign to tp_size (total gpus), otherwise convert to int
-prefill_gpus = tp_size if not prefill_gpus_str else int(prefill_gpus_str)
-decode_gpus = tp_size if not decode_gpus_str else int(decode_gpus_str)
-dp_attention = os.environ.get('DP_ATTENTION')
-result_filename = os.environ.get('RESULT_FILENAME')
-framework = os.environ.get('FRAMEWORK')
-precision = os.environ.get('PRECISION')
-mtp_mode = os.environ.get('MTP_MODE')
 
-with open(f'{result_filename}.json') as f:
-    bmk_result = json.load(f)
+def main():
+    """Main function to process benchmark results from environment variables."""
+    result_filename = os.environ.get('RESULT_FILENAME')
+    
+    with open(f'{result_filename}.json') as f:
+        bmk_result = json.load(f)
+    
+    data = process_benchmark_result(bmk_result, os.environ)
+    
+    print(json.dumps(data, indent=2))
+    
+    with open(f'agg_{result_filename}.json', 'w') as f:
+        json.dump(data, f, indent=2)
 
-data = {
-    'hw': hw,
-    'tp': tp_size,
-    'ep': ep_size,
-    'dp_attention': dp_attention, # true or false
-    'conc': int(bmk_result['max_concurrency']),
-    'model': bmk_result['model_id'],
-    'framework': framework,
-    'precision': precision,
-    'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size,
-    'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus,
-    'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput']) )/ prefill_gpus
-}
 
-if mtp_mode:  # MTP
-    data['mtp'] = mtp_mode
-
-for key, value in bmk_result.items():
-    if key.endswith('ms'):
-        data[key.replace('_ms', '')] = float(value) / 1000.0
-    if 'tpot' in key:
-        data[key.replace('_ms', '').replace('tpot', 'intvty')] = 1000.0 / float(value)
-
-print(json.dumps(data, indent=2))
-
-with open(f'agg_{result_filename}.json', 'w') as f:
-    json.dump(data, f, indent=2)
+if __name__ == '__main__':
+    main()
diff --git a/utils/test_process_result.py b/utils/test_process_result.py
new file mode 100644
index 000000000..cd7a5e072
--- /dev/null
+++ b/utils/test_process_result.py
@@ -0,0 +1,301 @@
+import pytest
+import json
+import os
+import sys
+from pathlib import Path
+
+# Import the function to test
+sys.path.insert(0, str(Path(__file__).parent))
+from process_result import process_benchmark_result
+
+
+@pytest.fixture
+def sample_benchmark_result():
+    """Sample benchmark result JSON data."""
+    return {
+        'max_concurrency': 8,
+        'model_id': 'meta-llama/Llama-3-70b',
+        'total_token_throughput': 10000.0,
+        'output_throughput': 3000.0,
+        'ttft_ms': 150.5,
+        'tpot_ms': 25.0,
+        'e2e_latency_ms': 500.0,
+        'decode_tpot_ms': 30.0,
+        'prefill_tpot_ms': 20.0
+    }
+
+
+@pytest.fixture
+def basic_env_vars():
+    """Basic environment variables for testing."""
+    return {
+        'RUNNER_TYPE': 'h200',
+        'TP': '8',
+        'EP_SIZE': '1',
+        'PREFILL_GPUS': '',
+        'DECODE_GPUS': '',
+        'DP_ATTENTION': 'false',
+        'RESULT_FILENAME': 'test_result',
+        'FRAMEWORK': 'vllm',
+        'PRECISION': 'fp8',
+        'MTP_MODE': ''
+    }
+
+
+def test_basic_processing(sample_benchmark_result, basic_env_vars):
+    """Test basic processing of benchmark results."""
+    result = process_benchmark_result(sample_benchmark_result, basic_env_vars)
+    
+    assert result['hw'] == 'h200'
+    assert result['tp'] == 8
+    assert result['ep'] == 1
+    assert result['dp_attention'] == 'false'
+    assert result['conc'] == 8
+    assert result['model'] == 'meta-llama/Llama-3-70b'
+    assert result['framework'] == 'vllm'
+    assert result['precision'] == 'fp8'
+    assert result['tput_per_gpu'] == 10000.0 / 8
+    assert result['output_tput_per_gpu'] == 3000.0 / 8
+    assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8
+
+
+def test_ms_to_seconds_conversion(basic_env_vars):
+    """Test conversion of millisecond values to seconds."""
+    benchmark_result = {
+        'max_concurrency': 4,
+        'model_id': 'test/model',
+        'total_token_throughput': 5000.0,
+        'output_throughput': 1500.0,
+        'ttft_ms': 200.0,
+        'e2e_latency_ms': 1000.0,
+        'decode_latency_ms': 500.0
+    }
+    
+    result = process_benchmark_result(benchmark_result, basic_env_vars)
+    
+    # Check ms values were converted to seconds
+    assert result['ttft'] == 200.0 / 1000.0
+    assert result['e2e_latency'] == 1000.0 / 1000.0
+    assert result['decode_latency'] == 500.0 / 1000.0
+
+
+def test_tpot_to_intvty_conversion(basic_env_vars):
+    """Test conversion of tpot (time per output token) to intvty (interactivity/throughput)."""
+    benchmark_result = {
+        'max_concurrency': 2,
+        'model_id': 'test/model',
+        'total_token_throughput': 2000.0,
+        'output_throughput': 500.0,
+        'tpot_ms': 25.0,
+        'decode_tpot_ms': 20.0,
+        'prefill_tpot_ms': 30.0
+    }
+    
+    result = process_benchmark_result(benchmark_result, basic_env_vars)
+    
+    # Check tpot values were converted to intvty
+    # The logic: if 'tpot' in key, convert ms value and then intvty = 1000.0 / tpot_ms
+    # So: tpot_ms: 25.0 -> tpot: 0.025 (ms to s), intvty: 1000.0/25.0 = 40.0
+    assert result['tpot'] == 25.0 / 1000.0  # Converted from ms to s
+    assert result['intvty'] == 1000.0 / 25.0  # intvty = 1000.0 / tpot_ms
+    
+    assert result['decode_tpot'] == 20.0 / 1000.0
+    assert result['decode_intvty'] == 1000.0 / 20.0
+    
+    assert result['prefill_tpot'] == 30.0 / 1000.0
+    assert result['prefill_intvty'] == 1000.0 / 30.0
+    
+    # Check that the intvty calculation is correct
+    assert 'decode_intvty' in result
+    assert 'prefill_intvty' in result
+
+
+def test_mtp_mode_included(sample_benchmark_result, basic_env_vars):
+    """Test that MTP mode is included when set."""
+    env_vars = basic_env_vars.copy()
+    env_vars['MTP_MODE'] = 'disaggregated'
+    
+    result = process_benchmark_result(sample_benchmark_result, env_vars)
+    
+    assert 'mtp' in result
+    assert result['mtp'] == 'disaggregated'
+
+
+def test_mtp_mode_not_included(sample_benchmark_result, basic_env_vars):
+    """Test that MTP mode is not included when not set."""
+    result = process_benchmark_result(sample_benchmark_result, basic_env_vars)
+    
+    assert 'mtp' not in result
+
+
+def test_prefill_decode_gpus_explicit(sample_benchmark_result, basic_env_vars):
+    """Test explicit prefill and decode GPU counts."""
+    env_vars = basic_env_vars.copy()
+    env_vars['PREFILL_GPUS'] = '4'
+    env_vars['DECODE_GPUS'] = '4'
+    
+    result = process_benchmark_result(sample_benchmark_result, env_vars)
+    
+    # With explicit GPU counts
+    assert result['output_tput_per_gpu'] == 3000.0 / 4
+    assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 4
+
+
+def test_prefill_decode_gpus_defaults_to_tp(sample_benchmark_result, basic_env_vars):
+    """Test that prefill/decode GPUs default to TP size when not specified."""
+    # Default env vars have empty strings for PREFILL_GPUS and DECODE_GPUS
+    result = process_benchmark_result(sample_benchmark_result, basic_env_vars)
+    
+    # Should use TP size (8) when PREFILL_GPUS and DECODE_GPUS are empty
+    assert result['output_tput_per_gpu'] == 3000.0 / 8
+    assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8
+
+
+def test_different_tp_sizes(sample_benchmark_result, basic_env_vars):
+    """Test processing with different TP sizes."""
+    test_cases = [
+        ('1', 1),
+        ('2', 2),
+        ('4', 4),
+        ('8', 8),
+        ('16', 16)
+    ]
+    
+    for tp_str, tp_int in test_cases:
+        env_vars = basic_env_vars.copy()
+        env_vars['TP'] = tp_str
+        
+        result = process_benchmark_result(sample_benchmark_result, env_vars)
+        
+        assert result['tp'] == tp_int
+        assert result['tput_per_gpu'] == 10000.0 / tp_int
+
+
+def test_different_ep_sizes(sample_benchmark_result, basic_env_vars):
+    """Test processing with different EP sizes."""
+    test_cases = [1, 2, 4, 8]
+    
+    for ep_size in test_cases:
+        env_vars = basic_env_vars.copy()
+        env_vars['EP_SIZE'] = str(ep_size)
+        
+        result = process_benchmark_result(sample_benchmark_result, env_vars)
+        
+        assert result['ep'] == ep_size
+
+
+def test_output_file_content_structure(sample_benchmark_result, basic_env_vars):
+    """Test that output has the expected structure."""
+    result = process_benchmark_result(sample_benchmark_result, basic_env_vars)
+    
+    # Check required fields exist
+    required_fields = [
+        'hw', 'tp', 'ep', 'dp_attention', 'conc', 'model',
+        'framework', 'precision', 'tput_per_gpu', 
+        'output_tput_per_gpu', 'input_tput_per_gpu'
+    ]
+    
+    for field in required_fields:
+        assert field in result, f"Missing required field: {field}"
+
+
+def test_complex_benchmark_result(basic_env_vars):
+    """Test processing with a more complex benchmark result."""
+    complex_result = {
+        'max_concurrency': 16,
+        'model_id': 'meta-llama/Llama-3-405b',
+        'total_token_throughput': 50000.0,
+        'output_throughput': 15000.0,
+        'ttft_ms': 100.0,
+        'tpot_ms': 15.0,
+        'e2e_latency_ms': 2000.0,
+        'decode_tpot_ms': 12.0,
+        'prefill_tpot_ms': 18.0,
+        'p50_latency_ms': 1500.0,
+        'p90_latency_ms': 2500.0,
+        'p99_latency_ms': 3000.0
+    }
+    
+    result = process_benchmark_result(complex_result, basic_env_vars)
+    
+    # Check all ms values were converted
+    assert result['ttft'] == 100.0 / 1000.0
+    assert result['tpot'] == 15.0 / 1000.0
+    assert result['e2e_latency'] == 2000.0 / 1000.0
+    assert result['p50_latency'] == 1500.0 / 1000.0
+    assert result['p90_latency'] == 2500.0 / 1000.0
+    assert result['p99_latency'] == 3000.0 / 1000.0
+    
+    # Check tpot to intvty conversions
+    assert 'intvty' in result
+    assert 'decode_intvty' in result
+    assert 'prefill_intvty' in result
+
+
+def test_dp_attention_values(sample_benchmark_result, basic_env_vars):
+    """Test different DP_ATTENTION values."""
+    test_values = ['true', 'false', 'True', 'False']
+    
+    for dp_attn_value in test_values:
+        env_vars = basic_env_vars.copy()
+        env_vars['DP_ATTENTION'] = dp_attn_value
+        
+        result = process_benchmark_result(sample_benchmark_result, env_vars)
+        
+        assert result['dp_attention'] == dp_attn_value
+
+
+def test_different_frameworks(sample_benchmark_result, basic_env_vars):
+    """Test different framework values."""
+    frameworks = ['vllm', 'trt', 'sglang', 'tensorrt-llm']
+    
+    for framework in frameworks:
+        env_vars = basic_env_vars.copy()
+        env_vars['FRAMEWORK'] = framework
+        
+        result = process_benchmark_result(sample_benchmark_result, env_vars)
+        
+        assert result['framework'] == framework
+
+
+def test_different_precisions(sample_benchmark_result, basic_env_vars):
+    """Test different precision values."""
+    precisions = ['fp8', 'fp16', 'fp32', 'int8', 'int4']
+    
+    for precision in precisions:
+        env_vars = basic_env_vars.copy()
+        env_vars['PRECISION'] = precision
+        
+        result = process_benchmark_result(sample_benchmark_result, env_vars)
+        
+        assert result['precision'] == precision
+
+
+def test_throughput_calculations(basic_env_vars):
+    """Test throughput calculations with various values."""
+    benchmark_result = {
+        'max_concurrency': 10,
+        'model_id': 'test/model',
+        'total_token_throughput': 24000.0,
+        'output_throughput': 8000.0
+    }
+    
+    env_vars = basic_env_vars.copy()
+    env_vars['TP'] = '4'
+    env_vars['PREFILL_GPUS'] = '2'
+    env_vars['DECODE_GPUS'] = '2'
+    
+    result = process_benchmark_result(benchmark_result, env_vars)
+    
+    # tput_per_gpu = total_token_throughput / tp_size
+    assert result['tput_per_gpu'] == 24000.0 / 4
+    
+    # output_tput_per_gpu = output_throughput / decode_gpus
+    assert result['output_tput_per_gpu'] == 8000.0 / 2
+    
+    # input_tput_per_gpu = (total_token_throughput - output_throughput) / prefill_gpus
+    assert result['input_tput_per_gpu'] == (24000.0 - 8000.0) / 2
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])