Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
name: Test Matrix Logic
name: Test Utils

on:
pull_request:
paths:
- 'utils/matrix-logic/**'
- 'utils/**/*.py'
- '.github/workflows/test-utils.yml'
push:
branches:
- main
paths:
- 'utils/**/*.py'
- '.github/workflows/test-utils.yml'

jobs:
test:
if: github.event.pull_request.draft != true
if: github.event_name != 'pull_request' || github.event.pull_request.draft != true
Copy link

Copilot AI Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The condition skips tests on draft PRs, but the PR title contains '[WIP]' suggesting this is a work-in-progress pull request. Consider removing this condition or updating the workflow to run on draft PRs to ensure tests are executed during development.

Suggested change
if: github.event_name != 'pull_request' || github.event.pull_request.draft != true

Copilot uses AI. Check for mistakes.
runs-on: ubuntu-latest
permissions:
contents: read
Expand All @@ -26,7 +33,12 @@ jobs:
python -m pip install --upgrade pip
pip install pytest pydantic pyyaml

- name: Run pytest
- name: Run pytest for matrix-logic
run: |
cd utils/matrix-logic
pytest test_generate_sweep_configs.py -v

- name: Run pytest for process_result
run: |
cd utils
pytest test_process_result.py -v
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
**/__pycache__/**
**/.coverage
**/.coverage
**/.pytest_cache/
*.pyc
*.pyo
.coverage.*
htmlcov/
105 changes: 64 additions & 41 deletions utils/process_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,48 +4,71 @@
from pathlib import Path


hw = os.environ.get('RUNNER_TYPE')
tp_size = int(os.environ.get('TP'))
ep_size = int(os.environ.get('EP_SIZE'))
prefill_gpus_str = os.environ.get('PREFILL_GPUS', '')
decode_gpus_str = os.environ.get('DECODE_GPUS', '')
def process_benchmark_result(bmk_result, env_vars):
"""
Process benchmark results and generate aggregated metrics.

Args:
bmk_result: Dictionary containing benchmark results
env_vars: Dictionary containing environment variables

Returns:
Dictionary containing processed metrics
"""
hw = env_vars.get('RUNNER_TYPE')
tp_size = int(env_vars.get('TP'))
ep_size = int(env_vars.get('EP_SIZE'))
prefill_gpus_str = env_vars.get('PREFILL_GPUS', '')
decode_gpus_str = env_vars.get('DECODE_GPUS', '')

# If empty string (aggregated runs), assign to tp_size (total gpus), otherwise convert to int
prefill_gpus = tp_size if not prefill_gpus_str else int(prefill_gpus_str)
decode_gpus = tp_size if not decode_gpus_str else int(decode_gpus_str)
dp_attention = env_vars.get('DP_ATTENTION')
framework = env_vars.get('FRAMEWORK')
precision = env_vars.get('PRECISION')
mtp_mode = env_vars.get('MTP_MODE')

data = {
'hw': hw,
'tp': tp_size,
'ep': ep_size,
'dp_attention': dp_attention, # true or false
'conc': int(bmk_result['max_concurrency']),
'model': bmk_result['model_id'],
'framework': framework,
'precision': precision,
'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size,
'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus,
'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput'])) / prefill_gpus
}

if mtp_mode: # MTP
data['mtp'] = mtp_mode

for key, value in bmk_result.items():
if key.endswith('ms'):
data[key.replace('_ms', '')] = float(value) / 1000.0
if 'tpot' in key:
data[key.replace('_ms', '').replace('tpot', 'intvty')] = 1000.0 / float(value)

return data

# If empty string (aggregated runs), assign to tp_size (total gpus), otherwise convert to int
prefill_gpus = tp_size if not prefill_gpus_str else int(prefill_gpus_str)
decode_gpus = tp_size if not decode_gpus_str else int(decode_gpus_str)
dp_attention = os.environ.get('DP_ATTENTION')
result_filename = os.environ.get('RESULT_FILENAME')
framework = os.environ.get('FRAMEWORK')
precision = os.environ.get('PRECISION')
mtp_mode = os.environ.get('MTP_MODE')

with open(f'{result_filename}.json') as f:
bmk_result = json.load(f)
def main():
"""Main function to process benchmark results from environment variables."""
result_filename = os.environ.get('RESULT_FILENAME')

with open(f'{result_filename}.json') as f:
bmk_result = json.load(f)

data = process_benchmark_result(bmk_result, os.environ)

print(json.dumps(data, indent=2))

with open(f'agg_{result_filename}.json', 'w') as f:
json.dump(data, f, indent=2)

data = {
'hw': hw,
'tp': tp_size,
'ep': ep_size,
'dp_attention': dp_attention, # true or false
'conc': int(bmk_result['max_concurrency']),
'model': bmk_result['model_id'],
'framework': framework,
'precision': precision,
'tput_per_gpu': float(bmk_result['total_token_throughput']) / tp_size,
'output_tput_per_gpu': float(bmk_result['output_throughput']) / decode_gpus,
'input_tput_per_gpu': (float(bmk_result['total_token_throughput']) - float(bmk_result['output_throughput']) )/ prefill_gpus
}

if mtp_mode: # MTP
data['mtp'] = mtp_mode

for key, value in bmk_result.items():
if key.endswith('ms'):
data[key.replace('_ms', '')] = float(value) / 1000.0
if 'tpot' in key:
data[key.replace('_ms', '').replace('tpot', 'intvty')] = 1000.0 / float(value)

print(json.dumps(data, indent=2))

with open(f'agg_{result_filename}.json', 'w') as f:
json.dump(data, f, indent=2)
if __name__ == '__main__':
main()
Loading