From b2f0b86f4bdca147497b167321b15686701cd77d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 6 Nov 2025 22:20:07 +0000 Subject: [PATCH 1/5] Add unit tests for utils/process_result.py and pytest CI workflow Co-authored-by: functionstackx <47992694+functionstackx@users.noreply.github.com> --- .../workflows/test-utils-process-result.yml | 41 ++ .gitignore | 7 +- utils/test_process_result.py | 554 ++++++++++++++++++ 3 files changed, 601 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/test-utils-process-result.yml create mode 100644 utils/test_process_result.py diff --git a/.github/workflows/test-utils-process-result.yml b/.github/workflows/test-utils-process-result.yml new file mode 100644 index 000000000..ea84e06bd --- /dev/null +++ b/.github/workflows/test-utils-process-result.yml @@ -0,0 +1,41 @@ +name: Test Utils Process Result + +on: + pull_request: + paths: + - 'utils/process_result.py' + - 'utils/test_process_result.py' + - '.github/workflows/test-utils-process-result.yml' + push: + branches: + - main + paths: + - 'utils/process_result.py' + - 'utils/test_process_result.py' + - '.github/workflows/test-utils-process-result.yml' + +jobs: + test: + if: github.event_name != 'pull_request' || github.event.pull_request.draft != true + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + - name: Checkout code + uses: actions/checkout@v5 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest + + - name: Run pytest + run: | + cd utils + pytest test_process_result.py -v diff --git a/.gitignore b/.gitignore index 03d36472a..146afad17 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,7 @@ **/__pycache__/** -**/.coverage \ No newline at end of file +**/.coverage +**/.pytest_cache/ +*.pyc +*.pyo +.coverage.* +htmlcov/ \ No newline at end of file diff --git a/utils/test_process_result.py b/utils/test_process_result.py new file mode 100644 index 000000000..165daf335 --- /dev/null +++ b/utils/test_process_result.py @@ -0,0 +1,554 @@ +import pytest +import json +import os +import sys +from pathlib import Path +from unittest.mock import patch, mock_open + + +@pytest.fixture +def sample_benchmark_result(): + """Sample benchmark result JSON data.""" + return { + 'max_concurrency': 8, + 'model_id': 'meta-llama/Llama-3-70b', + 'total_token_throughput': 10000.0, + 'output_throughput': 3000.0, + 'ttft_ms': 150.5, + 'tpot_ms': 25.0, + 'e2e_latency_ms': 500.0, + 'decode_tpot_ms': 30.0, + 'prefill_tpot_ms': 20.0 + } + + +@pytest.fixture +def basic_env_vars(): + """Basic environment variables for testing.""" + return { + 'RUNNER_TYPE': 'h200', + 'TP': '8', + 'EP_SIZE': '1', + 'PREFILL_GPUS': '', + 'DECODE_GPUS': '', + 'DP_ATTENTION': 'false', + 'RESULT_FILENAME': 'test_result', + 'FRAMEWORK': 'vllm', + 'PRECISION': 'fp8', + 'MTP_MODE': '' + } + + +@pytest.fixture +def temp_result_file(tmp_path, sample_benchmark_result): + """Create a temporary benchmark result file.""" + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + return result_file + + +def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars): + """Test basic processing of benchmark results.""" + # Create result file + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + # Change to tmp_path directory + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, basic_env_vars): + # Import and execute the script + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + # Check output file was created + output_file = tmp_path / 'agg_test_result.json' + assert output_file.exists() + + # Load and verify output + with open(output_file) as f: + result = json.load(f) + + assert result['hw'] == 'h200' + assert result['tp'] == 8 + assert result['ep'] == 1 + assert result['dp_attention'] == 'false' + assert result['conc'] == 8 + assert result['model'] == 'meta-llama/Llama-3-70b' + assert result['framework'] == 'vllm' + assert result['precision'] == 'fp8' + assert result['tput_per_gpu'] == 10000.0 / 8 + assert result['output_tput_per_gpu'] == 3000.0 / 8 + assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8 + + finally: + os.chdir(original_dir) + + +def test_ms_to_seconds_conversion(tmp_path, basic_env_vars): + """Test conversion of millisecond values to seconds.""" + benchmark_result = { + 'max_concurrency': 4, + 'model_id': 'test/model', + 'total_token_throughput': 5000.0, + 'output_throughput': 1500.0, + 'ttft_ms': 200.0, + 'e2e_latency_ms': 1000.0, + 'decode_latency_ms': 500.0 + } + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, basic_env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + # Check ms values were converted to seconds + assert result['ttft'] == 200.0 / 1000.0 + assert result['e2e_latency'] == 1000.0 / 1000.0 + assert result['decode_latency'] == 500.0 / 1000.0 + + finally: + os.chdir(original_dir) + + +def test_tpot_to_intvty_conversion(tmp_path, basic_env_vars): + """Test conversion of tpot (time per output token) to intvty (interactivity/throughput).""" + benchmark_result = { + 'max_concurrency': 2, + 'model_id': 'test/model', + 'total_token_throughput': 2000.0, + 'output_throughput': 500.0, + 'tpot_ms': 25.0, + 'decode_tpot_ms': 20.0, + 'prefill_tpot_ms': 30.0 + } + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, basic_env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + # Check tpot values were converted to intvty + # The logic: if 'tpot' in key, convert ms value and then intvty = 1000.0 / tpot_ms + # So: tpot_ms: 25.0 -> tpot: 0.025 (ms to s), intvty: 1000.0/25.0 = 40.0 + assert result['tpot'] == 25.0 / 1000.0 # Converted from ms to s + assert result['intvty'] == 1000.0 / 25.0 # intvty = 1000.0 / tpot_ms + + assert result['decode_tpot'] == 20.0 / 1000.0 + assert result['decode_intvty'] == 1000.0 / 20.0 + + assert result['prefill_tpot'] == 30.0 / 1000.0 + assert result['prefill_intvty'] == 1000.0 / 30.0 + + # Check that the intvty calculation is correct + assert 'decode_intvty' in result + assert 'prefill_intvty' in result + + finally: + os.chdir(original_dir) + + +def test_mtp_mode_included(tmp_path, sample_benchmark_result, basic_env_vars): + """Test that MTP mode is included when set.""" + env_vars = basic_env_vars.copy() + env_vars['MTP_MODE'] = 'disaggregated' + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + assert 'mtp' in result + assert result['mtp'] == 'disaggregated' + + finally: + os.chdir(original_dir) + + +def test_mtp_mode_not_included(tmp_path, sample_benchmark_result, basic_env_vars): + """Test that MTP mode is not included when not set.""" + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, basic_env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + assert 'mtp' not in result + + finally: + os.chdir(original_dir) + + +def test_prefill_decode_gpus_explicit(tmp_path, sample_benchmark_result, basic_env_vars): + """Test explicit prefill and decode GPU counts.""" + env_vars = basic_env_vars.copy() + env_vars['PREFILL_GPUS'] = '4' + env_vars['DECODE_GPUS'] = '4' + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + # With explicit GPU counts + assert result['output_tput_per_gpu'] == 3000.0 / 4 + assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 4 + + finally: + os.chdir(original_dir) + + +def test_prefill_decode_gpus_defaults_to_tp(tmp_path, sample_benchmark_result, basic_env_vars): + """Test that prefill/decode GPUs default to TP size when not specified.""" + # Default env vars have empty strings for PREFILL_GPUS and DECODE_GPUS + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, basic_env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + # Should use TP size (8) when PREFILL_GPUS and DECODE_GPUS are empty + assert result['output_tput_per_gpu'] == 3000.0 / 8 + assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8 + + finally: + os.chdir(original_dir) + + +def test_different_tp_sizes(tmp_path, sample_benchmark_result, basic_env_vars): + """Test processing with different TP sizes.""" + test_cases = [ + ('1', 1), + ('2', 2), + ('4', 4), + ('8', 8), + ('16', 16) + ] + + for tp_str, tp_int in test_cases: + env_vars = basic_env_vars.copy() + env_vars['TP'] = tp_str + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + assert result['tp'] == tp_int + assert result['tput_per_gpu'] == 10000.0 / tp_int + + finally: + os.chdir(original_dir) + + +def test_different_ep_sizes(tmp_path, sample_benchmark_result, basic_env_vars): + """Test processing with different EP sizes.""" + test_cases = [1, 2, 4, 8] + + for ep_size in test_cases: + env_vars = basic_env_vars.copy() + env_vars['EP_SIZE'] = str(ep_size) + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + assert result['ep'] == ep_size + + finally: + os.chdir(original_dir) + + +def test_output_file_content_structure(tmp_path, sample_benchmark_result, basic_env_vars): + """Test that output file has the expected structure.""" + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, basic_env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + # Check required fields exist + required_fields = [ + 'hw', 'tp', 'ep', 'dp_attention', 'conc', 'model', + 'framework', 'precision', 'tput_per_gpu', + 'output_tput_per_gpu', 'input_tput_per_gpu' + ] + + for field in required_fields: + assert field in result, f"Missing required field: {field}" + + finally: + os.chdir(original_dir) + + +def test_complex_benchmark_result(tmp_path, basic_env_vars): + """Test processing with a more complex benchmark result.""" + complex_result = { + 'max_concurrency': 16, + 'model_id': 'meta-llama/Llama-3-405b', + 'total_token_throughput': 50000.0, + 'output_throughput': 15000.0, + 'ttft_ms': 100.0, + 'tpot_ms': 15.0, + 'e2e_latency_ms': 2000.0, + 'decode_tpot_ms': 12.0, + 'prefill_tpot_ms': 18.0, + 'p50_latency_ms': 1500.0, + 'p90_latency_ms': 2500.0, + 'p99_latency_ms': 3000.0 + } + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(complex_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, basic_env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + # Check all ms values were converted + assert result['ttft'] == 100.0 / 1000.0 + assert result['tpot'] == 15.0 / 1000.0 + assert result['e2e_latency'] == 2000.0 / 1000.0 + assert result['p50_latency'] == 1500.0 / 1000.0 + assert result['p90_latency'] == 2500.0 / 1000.0 + assert result['p99_latency'] == 3000.0 / 1000.0 + + # Check tpot to intvty conversions + assert 'intvty' in result + assert 'decode_intvty' in result + assert 'prefill_intvty' in result + + finally: + os.chdir(original_dir) + + +def test_dp_attention_values(tmp_path, sample_benchmark_result, basic_env_vars): + """Test different DP_ATTENTION values.""" + test_values = ['true', 'false', 'True', 'False'] + + for dp_attn_value in test_values: + env_vars = basic_env_vars.copy() + env_vars['DP_ATTENTION'] = dp_attn_value + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + assert result['dp_attention'] == dp_attn_value + + finally: + os.chdir(original_dir) + + +def test_different_frameworks(tmp_path, sample_benchmark_result, basic_env_vars): + """Test different framework values.""" + frameworks = ['vllm', 'trt', 'sglang', 'tensorrt-llm'] + + for framework in frameworks: + env_vars = basic_env_vars.copy() + env_vars['FRAMEWORK'] = framework + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + assert result['framework'] == framework + + finally: + os.chdir(original_dir) + + +def test_different_precisions(tmp_path, sample_benchmark_result, basic_env_vars): + """Test different precision values.""" + precisions = ['fp8', 'fp16', 'fp32', 'int8', 'int4'] + + for precision in precisions: + env_vars = basic_env_vars.copy() + env_vars['PRECISION'] = precision + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(sample_benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + assert result['precision'] == precision + + finally: + os.chdir(original_dir) + + +def test_throughput_calculations(tmp_path, basic_env_vars): + """Test throughput calculations with various values.""" + benchmark_result = { + 'max_concurrency': 10, + 'model_id': 'test/model', + 'total_token_throughput': 24000.0, + 'output_throughput': 8000.0 + } + + env_vars = basic_env_vars.copy() + env_vars['TP'] = '4' + env_vars['PREFILL_GPUS'] = '2' + env_vars['DECODE_GPUS'] = '2' + + result_file = tmp_path / 'test_result.json' + with open(result_file, 'w') as f: + json.dump(benchmark_result, f) + + original_dir = os.getcwd() + os.chdir(tmp_path) + + try: + with patch.dict(os.environ, env_vars): + exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + + output_file = tmp_path / 'agg_test_result.json' + with open(output_file) as f: + result = json.load(f) + + # tput_per_gpu = total_token_throughput / tp_size + assert result['tput_per_gpu'] == 24000.0 / 4 + + # output_tput_per_gpu = output_throughput / decode_gpus + assert result['output_tput_per_gpu'] == 8000.0 / 2 + + # input_tput_per_gpu = (total_token_throughput - output_throughput) / prefill_gpus + assert result['input_tput_per_gpu'] == (24000.0 - 8000.0) / 2 + + finally: + os.chdir(original_dir) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From 73b52a40f2faa854c7b289b6533005d3f345c9be Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 6 Nov 2025 22:22:37 +0000 Subject: [PATCH 2/5] Improve test_process_result.py: use dynamic paths and remove unused fixture Co-authored-by: functionstackx <47992694+functionstackx@users.noreply.github.com> --- utils/test_process_result.py | 44 ++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/utils/test_process_result.py b/utils/test_process_result.py index 165daf335..2d57d3fd0 100644 --- a/utils/test_process_result.py +++ b/utils/test_process_result.py @@ -6,6 +6,11 @@ from unittest.mock import patch, mock_open +# Get the path to process_result.py dynamically +SCRIPT_DIR = Path(__file__).parent +PROCESS_RESULT_PATH = SCRIPT_DIR / 'process_result.py' + + @pytest.fixture def sample_benchmark_result(): """Sample benchmark result JSON data.""" @@ -39,15 +44,6 @@ def basic_env_vars(): } -@pytest.fixture -def temp_result_file(tmp_path, sample_benchmark_result): - """Create a temporary benchmark result file.""" - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) - return result_file - - def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars): """Test basic processing of benchmark results.""" # Create result file @@ -62,7 +58,7 @@ def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars): try: with patch.dict(os.environ, basic_env_vars): # Import and execute the script - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) # Check output file was created output_file = tmp_path / 'agg_test_result.json' @@ -109,7 +105,7 @@ def test_ms_to_seconds_conversion(tmp_path, basic_env_vars): try: with patch.dict(os.environ, basic_env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -145,7 +141,7 @@ def test_tpot_to_intvty_conversion(tmp_path, basic_env_vars): try: with patch.dict(os.environ, basic_env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -185,7 +181,7 @@ def test_mtp_mode_included(tmp_path, sample_benchmark_result, basic_env_vars): try: with patch.dict(os.environ, env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -209,7 +205,7 @@ def test_mtp_mode_not_included(tmp_path, sample_benchmark_result, basic_env_vars try: with patch.dict(os.environ, basic_env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -236,7 +232,7 @@ def test_prefill_decode_gpus_explicit(tmp_path, sample_benchmark_result, basic_e try: with patch.dict(os.environ, env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -262,7 +258,7 @@ def test_prefill_decode_gpus_defaults_to_tp(tmp_path, sample_benchmark_result, b try: with patch.dict(os.environ, basic_env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -299,7 +295,7 @@ def test_different_tp_sizes(tmp_path, sample_benchmark_result, basic_env_vars): try: with patch.dict(os.environ, env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -329,7 +325,7 @@ def test_different_ep_sizes(tmp_path, sample_benchmark_result, basic_env_vars): try: with patch.dict(os.environ, env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -352,7 +348,7 @@ def test_output_file_content_structure(tmp_path, sample_benchmark_result, basic_ try: with patch.dict(os.environ, basic_env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -398,7 +394,7 @@ def test_complex_benchmark_result(tmp_path, basic_env_vars): try: with patch.dict(os.environ, basic_env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -438,7 +434,7 @@ def test_dp_attention_values(tmp_path, sample_benchmark_result, basic_env_vars): try: with patch.dict(os.environ, env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -467,7 +463,7 @@ def test_different_frameworks(tmp_path, sample_benchmark_result, basic_env_vars) try: with patch.dict(os.environ, env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -496,7 +492,7 @@ def test_different_precisions(tmp_path, sample_benchmark_result, basic_env_vars) try: with patch.dict(os.environ, env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: @@ -531,7 +527,7 @@ def test_throughput_calculations(tmp_path, basic_env_vars): try: with patch.dict(os.environ, env_vars): - exec(open('/home/runner/work/InferenceMAX/InferenceMAX/utils/process_result.py').read()) + exec(open(PROCESS_RESULT_PATH).read()) output_file = tmp_path / 'agg_test_result.json' with open(output_file) as f: From a204048948fb1849addfca0175c1240fdc01c56f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 6 Nov 2025 22:39:10 +0000 Subject: [PATCH 3/5] Consolidate pytest workflows into test-utils.yml Co-authored-by: functionstackx <47992694+functionstackx@users.noreply.github.com> --- .github/workflows/test-matrix-logic.yml | 32 ------------------- ...tils-process-result.yml => test-utils.yml} | 21 ++++++------ 2 files changed, 12 insertions(+), 41 deletions(-) delete mode 100644 .github/workflows/test-matrix-logic.yml rename .github/workflows/{test-utils-process-result.yml => test-utils.yml} (61%) diff --git a/.github/workflows/test-matrix-logic.yml b/.github/workflows/test-matrix-logic.yml deleted file mode 100644 index 2d54f5e17..000000000 --- a/.github/workflows/test-matrix-logic.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: Test Matrix Logic - -on: - pull_request: - paths: - - 'utils/matrix-logic/**' - -jobs: - test: - if: github.event.pull_request.draft != true - runs-on: ubuntu-latest - permissions: - contents: read - - steps: - - name: Checkout code - uses: actions/checkout@v5 - - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: '3.12' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pytest pydantic pyyaml - - - name: Run pytest - run: | - cd utils/matrix-logic - pytest test_generate_sweep_configs.py -v diff --git a/.github/workflows/test-utils-process-result.yml b/.github/workflows/test-utils.yml similarity index 61% rename from .github/workflows/test-utils-process-result.yml rename to .github/workflows/test-utils.yml index ea84e06bd..348329b2c 100644 --- a/.github/workflows/test-utils-process-result.yml +++ b/.github/workflows/test-utils.yml @@ -1,18 +1,16 @@ -name: Test Utils Process Result +name: Test Utils on: pull_request: paths: - - 'utils/process_result.py' - - 'utils/test_process_result.py' - - '.github/workflows/test-utils-process-result.yml' + - 'utils/**/*.py' + - '.github/workflows/test-utils.yml' push: branches: - main paths: - - 'utils/process_result.py' - - 'utils/test_process_result.py' - - '.github/workflows/test-utils-process-result.yml' + - 'utils/**/*.py' + - '.github/workflows/test-utils.yml' jobs: test: @@ -33,9 +31,14 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest + pip install pytest pydantic pyyaml - - name: Run pytest + - name: Run pytest for matrix-logic + run: | + cd utils/matrix-logic + pytest test_generate_sweep_configs.py -v + + - name: Run pytest for process_result run: | cd utils pytest test_process_result.py -v From 88415bc9fea6bff7cb405f5180e07a52872073f0 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Thu, 6 Nov 2025 20:11:25 -0500 Subject: [PATCH 4/5] Update utils/test_process_result.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- utils/test_process_result.py | 60 +++++++++++++++++------------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/utils/test_process_result.py b/utils/test_process_result.py index 2d57d3fd0..1babba5a9 100644 --- a/utils/test_process_result.py +++ b/utils/test_process_result.py @@ -44,46 +44,44 @@ def basic_env_vars(): } -def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars): - """Test basic processing of benchmark results.""" - # Create result file - result_file = tmp_path / 'test_result.json' +def run_process_result_script(tmp_path, result_data, env_vars, result_filename='test_result.json'): + """Helper to create result file, change directory, execute script, and clean up.""" + result_file = tmp_path / result_filename with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) - - # Change to tmp_path directory + json.dump(result_data, f) original_dir = os.getcwd() os.chdir(tmp_path) - try: - with patch.dict(os.environ, basic_env_vars): - # Import and execute the script + with patch.dict(os.environ, env_vars): exec(open(PROCESS_RESULT_PATH).read()) - - # Check output file was created - output_file = tmp_path / 'agg_test_result.json' - assert output_file.exists() - - # Load and verify output - with open(output_file) as f: - result = json.load(f) - - assert result['hw'] == 'h200' - assert result['tp'] == 8 - assert result['ep'] == 1 - assert result['dp_attention'] == 'false' - assert result['conc'] == 8 - assert result['model'] == 'meta-llama/Llama-3-70b' - assert result['framework'] == 'vllm' - assert result['precision'] == 'fp8' - assert result['tput_per_gpu'] == 10000.0 / 8 - assert result['output_tput_per_gpu'] == 3000.0 / 8 - assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8 - + output_file = tmp_path / f'agg_{Path(result_filename).stem}.json' + return output_file finally: os.chdir(original_dir) +def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars): + """Test basic processing of benchmark results.""" + output_file = run_process_result_script( + tmp_path, + sample_benchmark_result, + basic_env_vars, + result_filename='test_result.json' + ) + assert output_file.exists() + with open(output_file) as f: + result = json.load(f) + assert result['hw'] == 'h200' + assert result['tp'] == 8 + assert result['ep'] == 1 + assert result['dp_attention'] == 'false' + assert result['conc'] == 8 + assert result['model'] == 'meta-llama/Llama-3-70b' + assert result['framework'] == 'vllm' + assert result['precision'] == 'fp8' + assert result['tput_per_gpu'] == 10000.0 / 8 + assert result['output_tput_per_gpu'] == 3000.0 / 8 + assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8 def test_ms_to_seconds_conversion(tmp_path, basic_env_vars): """Test conversion of millisecond values to seconds.""" benchmark_result = { From f0353fc337233046bda066f49eb83b246feb3a52 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 7 Nov 2025 01:16:21 +0000 Subject: [PATCH 5/5] Refactor process_result.py to use functions instead of exec() Co-authored-by: functionstackx <47992694+functionstackx@users.noreply.github.com> --- utils/process_result.py | 105 ++++---- utils/test_process_result.py | 459 ++++++++--------------------------- 2 files changed, 170 insertions(+), 394 deletions(-) diff --git a/utils/process_result.py b/utils/process_result.py index bfe6060ce..3068a7d8c 100644 --- a/utils/process_result.py +++ b/utils/process_result.py @@ -4,48 +4,71 @@ from pathlib import Path -hw = os.environ.get('RUNNER_TYPE') -tp_size = int(os.environ.get('TP')) -ep_size = int(os.environ.get('EP_SIZE')) -prefill_gpus_str = os.environ.get('PREFILL_GPUS', '') -decode_gpus_str = os.environ.get('DECODE_GPUS', '') +def process_benchmark_result(bmk_result, env_vars): + """ + Process benchmark results and generate aggregated metrics. + + Args: + bmk_result: Dictionary containing benchmark results + env_vars: Dictionary containing environment variables + + Returns: + Dictionary containing processed metrics + """ + hw = env_vars.get('RUNNER_TYPE') + tp_size = int(env_vars.get('TP')) + ep_size = int(env_vars.get('EP_SIZE')) + prefill_gpus_str = env_vars.get('PREFILL_GPUS', '') + decode_gpus_str = env_vars.get('DECODE_GPUS', '') + + # If empty string (aggregated runs), assign to tp_size (total gpus), otherwise convert to int + prefill_gpus = tp_size if not prefill_gpus_str else int(prefill_gpus_str) + decode_gpus = tp_size if not decode_gpus_str else int(decode_gpus_str) + dp_attention = env_vars.get('DP_ATTENTION') + framework = env_vars.get('FRAMEWORK') + precision = env_vars.get('PRECISION') + mtp_mode = env_vars.get('MTP_MODE') + + data = { + 'hw': hw, + 'tp': tp_size, + 'ep': ep_size, + 'dp_attention': dp_attention, # true or false + 'conc': int(bmk_result['max_concurrency']), + 'model': bmk_result['model_id'], + 'framework': framework, + 'precision': precision, + 'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size, + 'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus, + 'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput'])) / prefill_gpus + } + + if mtp_mode: # MTP + data['mtp'] = mtp_mode + + for key, value in bmk_result.items(): + if key.endswith('ms'): + data[key.replace('_ms', '')] = float(value) / 1000.0 + if 'tpot' in key: + data[key.replace('_ms', '').replace('tpot', 'intvty')] = 1000.0 / float(value) + + return data -# If empty string (aggregated runs), assign to tp_size (total gpus), otherwise convert to int -prefill_gpus = tp_size if not prefill_gpus_str else int(prefill_gpus_str) -decode_gpus = tp_size if not decode_gpus_str else int(decode_gpus_str) -dp_attention = os.environ.get('DP_ATTENTION') -result_filename = os.environ.get('RESULT_FILENAME') -framework = os.environ.get('FRAMEWORK') -precision = os.environ.get('PRECISION') -mtp_mode = os.environ.get('MTP_MODE') -with open(f'{result_filename}.json') as f: - bmk_result = json.load(f) +def main(): + """Main function to process benchmark results from environment variables.""" + result_filename = os.environ.get('RESULT_FILENAME') + + with open(f'{result_filename}.json') as f: + bmk_result = json.load(f) + + data = process_benchmark_result(bmk_result, os.environ) + + print(json.dumps(data, indent=2)) + + with open(f'agg_{result_filename}.json', 'w') as f: + json.dump(data, f, indent=2) -data = { - 'hw': hw, - 'tp': tp_size, - 'ep': ep_size, - 'dp_attention': dp_attention, # true or false - 'conc': int(bmk_result['max_concurrency']), - 'model': bmk_result['model_id'], - 'framework': framework, - 'precision': precision, - 'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size, - 'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus, - 'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput']) )/ prefill_gpus -} -if mtp_mode: # MTP - data['mtp'] = mtp_mode - -for key, value in bmk_result.items(): - if key.endswith('ms'): - data[key.replace('_ms', '')] = float(value) / 1000.0 - if 'tpot' in key: - data[key.replace('_ms', '').replace('tpot', 'intvty')] = 1000.0 / float(value) - -print(json.dumps(data, indent=2)) - -with open(f'agg_{result_filename}.json', 'w') as f: - json.dump(data, f, indent=2) +if __name__ == '__main__': + main() diff --git a/utils/test_process_result.py b/utils/test_process_result.py index 1babba5a9..cd7a5e072 100644 --- a/utils/test_process_result.py +++ b/utils/test_process_result.py @@ -3,12 +3,10 @@ import os import sys from pathlib import Path -from unittest.mock import patch, mock_open - -# Get the path to process_result.py dynamically -SCRIPT_DIR = Path(__file__).parent -PROCESS_RESULT_PATH = SCRIPT_DIR / 'process_result.py' +# Import the function to test +sys.path.insert(0, str(Path(__file__).parent)) +from process_result import process_benchmark_result @pytest.fixture @@ -44,33 +42,10 @@ def basic_env_vars(): } -def run_process_result_script(tmp_path, result_data, env_vars, result_filename='test_result.json'): - """Helper to create result file, change directory, execute script, and clean up.""" - result_file = tmp_path / result_filename - with open(result_file, 'w') as f: - json.dump(result_data, f) - original_dir = os.getcwd() - os.chdir(tmp_path) - try: - with patch.dict(os.environ, env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - output_file = tmp_path / f'agg_{Path(result_filename).stem}.json' - return output_file - finally: - os.chdir(original_dir) - - -def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars): +def test_basic_processing(sample_benchmark_result, basic_env_vars): """Test basic processing of benchmark results.""" - output_file = run_process_result_script( - tmp_path, - sample_benchmark_result, - basic_env_vars, - result_filename='test_result.json' - ) - assert output_file.exists() - with open(output_file) as f: - result = json.load(f) + result = process_benchmark_result(sample_benchmark_result, basic_env_vars) + assert result['hw'] == 'h200' assert result['tp'] == 8 assert result['ep'] == 1 @@ -82,7 +57,9 @@ def test_basic_processing(tmp_path, sample_benchmark_result, basic_env_vars): assert result['tput_per_gpu'] == 10000.0 / 8 assert result['output_tput_per_gpu'] == 3000.0 / 8 assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8 -def test_ms_to_seconds_conversion(tmp_path, basic_env_vars): + + +def test_ms_to_seconds_conversion(basic_env_vars): """Test conversion of millisecond values to seconds.""" benchmark_result = { 'max_concurrency': 4, @@ -94,31 +71,15 @@ def test_ms_to_seconds_conversion(tmp_path, basic_env_vars): 'decode_latency_ms': 500.0 } - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(benchmark_result, f) - - original_dir = os.getcwd() - os.chdir(tmp_path) + result = process_benchmark_result(benchmark_result, basic_env_vars) - try: - with patch.dict(os.environ, basic_env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - # Check ms values were converted to seconds - assert result['ttft'] == 200.0 / 1000.0 - assert result['e2e_latency'] == 1000.0 / 1000.0 - assert result['decode_latency'] == 500.0 / 1000.0 - - finally: - os.chdir(original_dir) + # Check ms values were converted to seconds + assert result['ttft'] == 200.0 / 1000.0 + assert result['e2e_latency'] == 1000.0 / 1000.0 + assert result['decode_latency'] == 500.0 / 1000.0 -def test_tpot_to_intvty_conversion(tmp_path, basic_env_vars): +def test_tpot_to_intvty_conversion(basic_env_vars): """Test conversion of tpot (time per output token) to intvty (interactivity/throughput).""" benchmark_result = { 'max_concurrency': 2, @@ -130,147 +91,67 @@ def test_tpot_to_intvty_conversion(tmp_path, basic_env_vars): 'prefill_tpot_ms': 30.0 } - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(benchmark_result, f) + result = process_benchmark_result(benchmark_result, basic_env_vars) - original_dir = os.getcwd() - os.chdir(tmp_path) + # Check tpot values were converted to intvty + # The logic: if 'tpot' in key, convert ms value and then intvty = 1000.0 / tpot_ms + # So: tpot_ms: 25.0 -> tpot: 0.025 (ms to s), intvty: 1000.0/25.0 = 40.0 + assert result['tpot'] == 25.0 / 1000.0 # Converted from ms to s + assert result['intvty'] == 1000.0 / 25.0 # intvty = 1000.0 / tpot_ms - try: - with patch.dict(os.environ, basic_env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - # Check tpot values were converted to intvty - # The logic: if 'tpot' in key, convert ms value and then intvty = 1000.0 / tpot_ms - # So: tpot_ms: 25.0 -> tpot: 0.025 (ms to s), intvty: 1000.0/25.0 = 40.0 - assert result['tpot'] == 25.0 / 1000.0 # Converted from ms to s - assert result['intvty'] == 1000.0 / 25.0 # intvty = 1000.0 / tpot_ms - - assert result['decode_tpot'] == 20.0 / 1000.0 - assert result['decode_intvty'] == 1000.0 / 20.0 - - assert result['prefill_tpot'] == 30.0 / 1000.0 - assert result['prefill_intvty'] == 1000.0 / 30.0 - - # Check that the intvty calculation is correct - assert 'decode_intvty' in result - assert 'prefill_intvty' in result - - finally: - os.chdir(original_dir) + assert result['decode_tpot'] == 20.0 / 1000.0 + assert result['decode_intvty'] == 1000.0 / 20.0 + + assert result['prefill_tpot'] == 30.0 / 1000.0 + assert result['prefill_intvty'] == 1000.0 / 30.0 + + # Check that the intvty calculation is correct + assert 'decode_intvty' in result + assert 'prefill_intvty' in result -def test_mtp_mode_included(tmp_path, sample_benchmark_result, basic_env_vars): +def test_mtp_mode_included(sample_benchmark_result, basic_env_vars): """Test that MTP mode is included when set.""" env_vars = basic_env_vars.copy() env_vars['MTP_MODE'] = 'disaggregated' - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) - - original_dir = os.getcwd() - os.chdir(tmp_path) + result = process_benchmark_result(sample_benchmark_result, env_vars) - try: - with patch.dict(os.environ, env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - assert 'mtp' in result - assert result['mtp'] == 'disaggregated' - - finally: - os.chdir(original_dir) + assert 'mtp' in result + assert result['mtp'] == 'disaggregated' -def test_mtp_mode_not_included(tmp_path, sample_benchmark_result, basic_env_vars): +def test_mtp_mode_not_included(sample_benchmark_result, basic_env_vars): """Test that MTP mode is not included when not set.""" - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) - - original_dir = os.getcwd() - os.chdir(tmp_path) + result = process_benchmark_result(sample_benchmark_result, basic_env_vars) - try: - with patch.dict(os.environ, basic_env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - assert 'mtp' not in result - - finally: - os.chdir(original_dir) + assert 'mtp' not in result -def test_prefill_decode_gpus_explicit(tmp_path, sample_benchmark_result, basic_env_vars): +def test_prefill_decode_gpus_explicit(sample_benchmark_result, basic_env_vars): """Test explicit prefill and decode GPU counts.""" env_vars = basic_env_vars.copy() env_vars['PREFILL_GPUS'] = '4' env_vars['DECODE_GPUS'] = '4' - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) - - original_dir = os.getcwd() - os.chdir(tmp_path) + result = process_benchmark_result(sample_benchmark_result, env_vars) - try: - with patch.dict(os.environ, env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - # With explicit GPU counts - assert result['output_tput_per_gpu'] == 3000.0 / 4 - assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 4 - - finally: - os.chdir(original_dir) + # With explicit GPU counts + assert result['output_tput_per_gpu'] == 3000.0 / 4 + assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 4 -def test_prefill_decode_gpus_defaults_to_tp(tmp_path, sample_benchmark_result, basic_env_vars): +def test_prefill_decode_gpus_defaults_to_tp(sample_benchmark_result, basic_env_vars): """Test that prefill/decode GPUs default to TP size when not specified.""" # Default env vars have empty strings for PREFILL_GPUS and DECODE_GPUS - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) - - original_dir = os.getcwd() - os.chdir(tmp_path) + result = process_benchmark_result(sample_benchmark_result, basic_env_vars) - try: - with patch.dict(os.environ, basic_env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - # Should use TP size (8) when PREFILL_GPUS and DECODE_GPUS are empty - assert result['output_tput_per_gpu'] == 3000.0 / 8 - assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8 - - finally: - os.chdir(original_dir) + # Should use TP size (8) when PREFILL_GPUS and DECODE_GPUS are empty + assert result['output_tput_per_gpu'] == 3000.0 / 8 + assert result['input_tput_per_gpu'] == (10000.0 - 3000.0) / 8 -def test_different_tp_sizes(tmp_path, sample_benchmark_result, basic_env_vars): +def test_different_tp_sizes(sample_benchmark_result, basic_env_vars): """Test processing with different TP sizes.""" test_cases = [ ('1', 1), @@ -284,29 +165,13 @@ def test_different_tp_sizes(tmp_path, sample_benchmark_result, basic_env_vars): env_vars = basic_env_vars.copy() env_vars['TP'] = tp_str - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) + result = process_benchmark_result(sample_benchmark_result, env_vars) - original_dir = os.getcwd() - os.chdir(tmp_path) - - try: - with patch.dict(os.environ, env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - assert result['tp'] == tp_int - assert result['tput_per_gpu'] == 10000.0 / tp_int - - finally: - os.chdir(original_dir) - - -def test_different_ep_sizes(tmp_path, sample_benchmark_result, basic_env_vars): + assert result['tp'] == tp_int + assert result['tput_per_gpu'] == 10000.0 / tp_int + + +def test_different_ep_sizes(sample_benchmark_result, basic_env_vars): """Test processing with different EP sizes.""" test_cases = [1, 2, 4, 8] @@ -314,59 +179,27 @@ def test_different_ep_sizes(tmp_path, sample_benchmark_result, basic_env_vars): env_vars = basic_env_vars.copy() env_vars['EP_SIZE'] = str(ep_size) - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) - - original_dir = os.getcwd() - os.chdir(tmp_path) + result = process_benchmark_result(sample_benchmark_result, env_vars) - try: - with patch.dict(os.environ, env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - assert result['ep'] == ep_size - - finally: - os.chdir(original_dir) - - -def test_output_file_content_structure(tmp_path, sample_benchmark_result, basic_env_vars): - """Test that output file has the expected structure.""" - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) + assert result['ep'] == ep_size + + +def test_output_file_content_structure(sample_benchmark_result, basic_env_vars): + """Test that output has the expected structure.""" + result = process_benchmark_result(sample_benchmark_result, basic_env_vars) - original_dir = os.getcwd() - os.chdir(tmp_path) + # Check required fields exist + required_fields = [ + 'hw', 'tp', 'ep', 'dp_attention', 'conc', 'model', + 'framework', 'precision', 'tput_per_gpu', + 'output_tput_per_gpu', 'input_tput_per_gpu' + ] - try: - with patch.dict(os.environ, basic_env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - # Check required fields exist - required_fields = [ - 'hw', 'tp', 'ep', 'dp_attention', 'conc', 'model', - 'framework', 'precision', 'tput_per_gpu', - 'output_tput_per_gpu', 'input_tput_per_gpu' - ] - - for field in required_fields: - assert field in result, f"Missing required field: {field}" - - finally: - os.chdir(original_dir) + for field in required_fields: + assert field in result, f"Missing required field: {field}" -def test_complex_benchmark_result(tmp_path, basic_env_vars): +def test_complex_benchmark_result(basic_env_vars): """Test processing with a more complex benchmark result.""" complex_result = { 'max_concurrency': 16, @@ -383,39 +216,23 @@ def test_complex_benchmark_result(tmp_path, basic_env_vars): 'p99_latency_ms': 3000.0 } - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(complex_result, f) + result = process_benchmark_result(complex_result, basic_env_vars) - original_dir = os.getcwd() - os.chdir(tmp_path) + # Check all ms values were converted + assert result['ttft'] == 100.0 / 1000.0 + assert result['tpot'] == 15.0 / 1000.0 + assert result['e2e_latency'] == 2000.0 / 1000.0 + assert result['p50_latency'] == 1500.0 / 1000.0 + assert result['p90_latency'] == 2500.0 / 1000.0 + assert result['p99_latency'] == 3000.0 / 1000.0 - try: - with patch.dict(os.environ, basic_env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - # Check all ms values were converted - assert result['ttft'] == 100.0 / 1000.0 - assert result['tpot'] == 15.0 / 1000.0 - assert result['e2e_latency'] == 2000.0 / 1000.0 - assert result['p50_latency'] == 1500.0 / 1000.0 - assert result['p90_latency'] == 2500.0 / 1000.0 - assert result['p99_latency'] == 3000.0 / 1000.0 - - # Check tpot to intvty conversions - assert 'intvty' in result - assert 'decode_intvty' in result - assert 'prefill_intvty' in result - - finally: - os.chdir(original_dir) + # Check tpot to intvty conversions + assert 'intvty' in result + assert 'decode_intvty' in result + assert 'prefill_intvty' in result -def test_dp_attention_values(tmp_path, sample_benchmark_result, basic_env_vars): +def test_dp_attention_values(sample_benchmark_result, basic_env_vars): """Test different DP_ATTENTION values.""" test_values = ['true', 'false', 'True', 'False'] @@ -423,28 +240,12 @@ def test_dp_attention_values(tmp_path, sample_benchmark_result, basic_env_vars): env_vars = basic_env_vars.copy() env_vars['DP_ATTENTION'] = dp_attn_value - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) - - original_dir = os.getcwd() - os.chdir(tmp_path) + result = process_benchmark_result(sample_benchmark_result, env_vars) - try: - with patch.dict(os.environ, env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - assert result['dp_attention'] == dp_attn_value - - finally: - os.chdir(original_dir) - - -def test_different_frameworks(tmp_path, sample_benchmark_result, basic_env_vars): + assert result['dp_attention'] == dp_attn_value + + +def test_different_frameworks(sample_benchmark_result, basic_env_vars): """Test different framework values.""" frameworks = ['vllm', 'trt', 'sglang', 'tensorrt-llm'] @@ -452,28 +253,12 @@ def test_different_frameworks(tmp_path, sample_benchmark_result, basic_env_vars) env_vars = basic_env_vars.copy() env_vars['FRAMEWORK'] = framework - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) + result = process_benchmark_result(sample_benchmark_result, env_vars) - original_dir = os.getcwd() - os.chdir(tmp_path) - - try: - with patch.dict(os.environ, env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - assert result['framework'] == framework - - finally: - os.chdir(original_dir) - - -def test_different_precisions(tmp_path, sample_benchmark_result, basic_env_vars): + assert result['framework'] == framework + + +def test_different_precisions(sample_benchmark_result, basic_env_vars): """Test different precision values.""" precisions = ['fp8', 'fp16', 'fp32', 'int8', 'int4'] @@ -481,28 +266,12 @@ def test_different_precisions(tmp_path, sample_benchmark_result, basic_env_vars) env_vars = basic_env_vars.copy() env_vars['PRECISION'] = precision - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(sample_benchmark_result, f) - - original_dir = os.getcwd() - os.chdir(tmp_path) + result = process_benchmark_result(sample_benchmark_result, env_vars) - try: - with patch.dict(os.environ, env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - assert result['precision'] == precision - - finally: - os.chdir(original_dir) - - -def test_throughput_calculations(tmp_path, basic_env_vars): + assert result['precision'] == precision + + +def test_throughput_calculations(basic_env_vars): """Test throughput calculations with various values.""" benchmark_result = { 'max_concurrency': 10, @@ -516,32 +285,16 @@ def test_throughput_calculations(tmp_path, basic_env_vars): env_vars['PREFILL_GPUS'] = '2' env_vars['DECODE_GPUS'] = '2' - result_file = tmp_path / 'test_result.json' - with open(result_file, 'w') as f: - json.dump(benchmark_result, f) + result = process_benchmark_result(benchmark_result, env_vars) - original_dir = os.getcwd() - os.chdir(tmp_path) + # tput_per_gpu = total_token_throughput / tp_size + assert result['tput_per_gpu'] == 24000.0 / 4 - try: - with patch.dict(os.environ, env_vars): - exec(open(PROCESS_RESULT_PATH).read()) - - output_file = tmp_path / 'agg_test_result.json' - with open(output_file) as f: - result = json.load(f) - - # tput_per_gpu = total_token_throughput / tp_size - assert result['tput_per_gpu'] == 24000.0 / 4 - - # output_tput_per_gpu = output_throughput / decode_gpus - assert result['output_tput_per_gpu'] == 8000.0 / 2 - - # input_tput_per_gpu = (total_token_throughput - output_throughput) / prefill_gpus - assert result['input_tput_per_gpu'] == (24000.0 - 8000.0) / 2 - - finally: - os.chdir(original_dir) + # output_tput_per_gpu = output_throughput / decode_gpus + assert result['output_tput_per_gpu'] == 8000.0 / 2 + + # input_tput_per_gpu = (total_token_throughput - output_throughput) / prefill_gpus + assert result['input_tput_per_gpu'] == (24000.0 - 8000.0) / 2 if __name__ == "__main__":