updating structure of changelog

cquil11 · cquil11 · commit ec3e7c1cadf3 · 2025-12-04T11:30:56.000-06:00
diff --git a/.github/workflows/diff-only-runs.yml b/.github/workflows/diff-only-runs.yml
@@ -0,0 +1,233 @@
+name: "Diff Only Runs"
+
+on:
+    workflow_dispatch:
+    push:
+        branches:
+            - main
+        paths:
+            - "perf-changelog.yaml"
+
+jobs:
+    get-dsr1-configs:
+        runs-on: ubuntu-latest
+        outputs:
+            multi-node-search-space-config: ${{ steps.get-dsr1-configs.outputs.multi-node-search-space-config }}
+            single-node-search-space-config: ${{ steps.get-dsr1-configs.outputs.single-node-search-space-config }}
+        steps:
+            - name: Checkout code
+              uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+
+            - id: get-dsr1-configs
+              run: |
+                  pip install pydantic
+                  CONFIG_JSON_MULTI_NODE=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py full-sweep --multi-node --seq-lens 1k1k --model-prefix dsr1 --config-files ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
+                  CONFIG_JSON_SINGLE_NODE=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py full-sweep --single-node --seq-lens 1k1k --model-prefix dsr1 --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
+                  echo "multi-node-search-space-config=$CONFIG_JSON_MULTI_NODE" >> $GITHUB_OUTPUT
+                  echo "single-node-search-space-config=$CONFIG_JSON_SINGLE_NODE" >> $GITHUB_OUTPUT
+
+    get-gptoss-configs:
+        runs-on: ubuntu-latest
+        outputs:
+            multi-node-search-space-config: ${{ steps.get-gptoss-configs.outputs.multi-node-search-space-config }}
+            single-node-search-space-config: ${{ steps.get-gptoss-configs.outputs.single-node-search-space-config }}
+        steps:
+            - name: Checkout code
+              uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+
+            - id: get-gptoss-configs
+              run: |
+                  pip install pydantic
+                  CONFIG_JSON_MULTI_NODE=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py full-sweep --multi-node --seq-lens 1k1k --model-prefix gptoss ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
+                  CONFIG_JSON_SINGLE_NODE=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py full-sweep --single-node --seq-lens 1k1k --model-prefix gptoss ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
+                  echo "multi-node-search-space-config=$CONFIG_JSON_MULTI_NODE" >> $GITHUB_OUTPUT
+                  echo "single-node-search-space-config=$CONFIG_JSON_SINGLE_NODE" >> $GITHUB_OUTPUT
+
+    benchmark-dsr1-multi-node:
+        needs: get-dsr1-configs
+        if: ${{ needs.get-dsr1-configs.outputs.multi-node-search-space-config != '[]' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: dsr1 1k1k multi-node /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.get-dsr1-configs.outputs.multi-node-search-space-config) }}
+        secrets: inherit
+        with:
+            isl: 1024
+            osl: 1024
+            max-model-len: 2048
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            exp-name: "dsr1_1k1k"
+            conc-list: ${{ toJson(matrix.config.conc) }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+
+            prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
+            prefill-tp: ${{ matrix.config.prefill.tp }}
+            prefill-ep: ${{ matrix.config.prefill.ep }}
+            prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
+            prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
+
+            decode-num-worker: ${{ matrix.config.decode.num-worker }}
+            decode-tp: ${{ matrix.config.decode.tp }}
+            decode-ep: ${{ matrix.config.decode.ep }}
+            decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
+            decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
+
+    benchmark-dsr1-single-node:
+        needs: get-dsr1-configs
+        if: ${{ needs.get-dsr1-configs.outputs.single-node-search-space-config != '[]' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: dsr1 1k1k single-node /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.get-dsr1-configs.outputs.single-node-search-space-config) }}
+        secrets: inherit
+        with:
+            exp-name: "dsr1_1k1k"
+            isl: 1024
+            osl: 1024
+            max-model-len: 2048
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            tp: ${{ matrix.config.tp }}
+            ep: ${{ matrix.config.ep }}
+            dp-attn: ${{ matrix.config.dp-attn }}
+            conc: ${{ matrix.config.conc }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+
+    benchmark-gptoss-multi-node:
+        needs: get-gptoss-configs
+        if: ${{ needs.get-gptoss-configs.outputs.multi-node-search-space-config != '[]' }}
+        uses: ./.github/workflows/benchmark-multinode-tmpl.yml
+        name: gptoss 1k1k multi-node /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.get-gptoss-configs.outputs.multi-node-search-space-config) }}
+        secrets: inherit
+        with:
+            isl: 1024
+            osl: 1024
+            max-model-len: 2048
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            exp-name: "dsr1_1k1k"
+            conc-list: ${{ toJson(matrix.config.conc) }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+
+            prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
+            prefill-tp: ${{ matrix.config.prefill.tp }}
+            prefill-ep: ${{ matrix.config.prefill.ep }}
+            prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
+            prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
+
+            decode-num-worker: ${{ matrix.config.decode.num-worker }}
+            decode-tp: ${{ matrix.config.decode.tp }}
+            decode-ep: ${{ matrix.config.decode.ep }}
+            decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
+            decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
+
+    benchmark-gptoss-single-node:
+        needs: get-gptoss-configs
+        if: ${{ needs.get-gptoss-configs.outputs.single-node-search-space-config != '[]' }}
+        uses: ./.github/workflows/benchmark-tmpl.yml
+        name: gptoss 1k1k single-node /
+        strategy:
+            fail-fast: false
+            matrix:
+                config: ${{ fromJson(needs.get-gptoss-configs.outputs.single-node-search-space-config) }}
+        secrets: inherit
+        with:
+            exp-name: "gptoss_1k1k"
+            isl: 1024
+            osl: 1024
+            max-model-len: 2048
+            runner: ${{ matrix.config.runner }}
+            image: ${{ matrix.config.image }}
+            model: ${{ matrix.config.model }}
+            model-prefix: ${{ matrix.config.model-prefix }}
+            framework: ${{ matrix.config.framework }}
+            precision: ${{ matrix.config.precision }}
+            tp: ${{ matrix.config.tp }}
+            ep: ${{ matrix.config.ep }}
+            dp-attn: ${{ matrix.config.dp-attn }}
+            conc: ${{ matrix.config.conc }}
+            spec-decoding: ${{ matrix.config.spec-decoding }}
+            disagg: ${{ matrix.config.disagg }}
+
+    collect-dsr1-results:
+        needs:
+            [
+                get-dsr1-configs,
+                benchmark-dsr1-single-node,
+                benchmark-dsr1-multi-node,
+            ]
+        if: ${{ always() && needs.get-dsr1-configs.result == 'success' && needs.get-dsr1-configs.outputs.single-node-search-space-config != '[]' && needs.get-dsr1-configs.outputs.multi-node-search-space-config != '[]'  }}
+        uses: ./.github/workflows/collect-results.yml
+        secrets: inherit
+        with:
+            exp-name: "dsr1_1k1k"
+
+    collect-gptoss-results:
+        needs:
+            [
+                get-gptoss-configs,
+                benchmark-gptoss-single-node,
+                benchmark-gptoss-multi-node,
+            ]
+        if: ${{ always() && needs.get-gptoss-configs.result == 'success' && needs.get-gptoss-configs.outputs.single-node-search-space-config != '[]' && needs.get-gptoss-configs.outputs.multi-node-search-space-config != '[]' }}
+        uses: ./.github/workflows/collect-results.yml
+        secrets: inherit
+        with:
+            exp-name: "gptoss_1k1k"
+
+    calc-success-rate:
+        needs: [collect-dsr1-results, collect-gptoss-results]
+        if: ${{ always() }}
+        runs-on: ubuntu-latest
+
+        env:
+            RESULTS_DIR: "results/"
+            STATS_FILENAME: "run_stats"
+            GITHUB_TOKEN: ${{ secrets.REPO_PAT }}
+
+        steps:
+            - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+              with:
+                  token: ${{ secrets.REPO_PAT }}
+                  fetch-depth: 0
+
+            - name: Download results artifacts
+              uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
+              with:
+                  path: ${{ env.RESULTS_DIR }}
+                  pattern: results_*
+
+            - name: Install python dependencies
+              run: pip install PyGithub
+
+            - name: Calculate success rate
+              run: python3 utils/calc_success_rate.py $STATS_FILENAME
+
+            - uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
+              with:
+                  name: "run-stats"
+                  path: ${{ env.STATS_FILENAME }}.json
diff --git a/perf-changelog.yaml b/perf-changelog.yaml
@@ -1,8 +1,6 @@
 - config-keys:
   - dsr1-fp8-h200-trt
   description: Test change abc
-  seq-lens: [ 1k1k, 1k8k, 8k1k ]
 - config-keys:
   - dsr1-fp8-h200-sglang
   description: Test change 2
-  seq-lens: [ 1k1k, 1k8k, 8k1k ]
diff --git a/utils/process_changelog.py b/utils/process_changelog.py
@@ -1,10 +1,13 @@
 import yaml
 import json
 import argparse
+import subprocess
 
 from pydantic import BaseModel, ConfigDict, Field
 
-from matrix_logic.validation import load_config_files, load_runner_file
+from pprint import pprint
+
+from matrix_logic.validation import load_config_files
 
 MASTER_CONFIGS = [".github/configs/amd-master.yaml",
                   ".github/configs/nvidia-master.yaml"]
@@ -18,31 +21,55 @@ class ChangelogEntry(BaseModel):
     seq_lens: list[str] = Field(alias='seq-lens')
 
 
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--changelog-file',
-        type=str,
-        required=True,
-        help='Path to the changelog YAML file'
+def get_added_lines(base_ref, head_ref, filepath):
+    result = subprocess.run(
+        ["git", "diff", base_ref, head_ref, "--", filepath],
+        capture_output=True,
+        text=True
     )
+    
+    added_lines = []
+    for line in result.stdout.split('\n'):
+        if line.startswith('+') and not line.startswith('+++'):
+            added_lines.append(line[1:])
+    
+    return '\n'.join(added_lines)
+
 
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--base-ref', type=str, required=True)
+    parser.add_argument('--head-ref', type=str, required=True)
+    parser.add_argument('--changelog-file', type=str, required=True)
     args = parser.parse_args()
 
     master_config_data = load_config_files(MASTER_CONFIGS)
 
-    with open(args.changelog_file, 'r') as f:
-        changelog_data = yaml.safe_load(f)
+    added_yaml = get_added_lines(args.base_ref, args.head_ref, args.changelog_file)
+    
+    if not added_yaml.strip():
+        print("No new changelog entries found")
+        return
+
+    changelog_data = yaml.safe_load(added_yaml)
+    pprint(changelog_data)
+
+    if not changelog_data:
+        print("No new changelog entries found")
+        return
 
     for entry_data in changelog_data:
         entry = ChangelogEntry.model_validate(entry_data)
         
-        # Make sure the specfied config keys actually exist in the master config files
         for config_key in entry.config_keys:
-            if config_key not in master_config_data.keys():
+            if config_key not in master_config_data:
                 raise ValueError(
                     f"Config key '{config_key}' does not exist in master config files."
                 )
+        
+        # print(f"Config keys: {entry.config_keys}")
+        # print(f"Seq lens: {entry.seq_lens}")
+        # print(f"Description: {entry.description}")
 
 
 if __name__ == "__main__":