Skip to content

Commit ec3e7c1

Browse files
committed
updating structure of changelog
1 parent 4b7fc65 commit ec3e7c1

3 files changed

Lines changed: 272 additions & 14 deletions

File tree

Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
name: "Diff Only Runs"
2+
3+
on:
4+
workflow_dispatch:
5+
push:
6+
branches:
7+
- main
8+
paths:
9+
- "perf-changelog.yaml"
10+
11+
jobs:
12+
get-dsr1-configs:
13+
runs-on: ubuntu-latest
14+
outputs:
15+
multi-node-search-space-config: ${{ steps.get-dsr1-configs.outputs.multi-node-search-space-config }}
16+
single-node-search-space-config: ${{ steps.get-dsr1-configs.outputs.single-node-search-space-config }}
17+
steps:
18+
- name: Checkout code
19+
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
20+
21+
- id: get-dsr1-configs
22+
run: |
23+
pip install pydantic
24+
CONFIG_JSON_MULTI_NODE=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py full-sweep --multi-node --seq-lens 1k1k --model-prefix dsr1 --config-files ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
25+
CONFIG_JSON_SINGLE_NODE=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py full-sweep --single-node --seq-lens 1k1k --model-prefix dsr1 --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
26+
echo "multi-node-search-space-config=$CONFIG_JSON_MULTI_NODE" >> $GITHUB_OUTPUT
27+
echo "single-node-search-space-config=$CONFIG_JSON_SINGLE_NODE" >> $GITHUB_OUTPUT
28+
29+
get-gptoss-configs:
30+
runs-on: ubuntu-latest
31+
outputs:
32+
multi-node-search-space-config: ${{ steps.get-gptoss-configs.outputs.multi-node-search-space-config }}
33+
single-node-search-space-config: ${{ steps.get-gptoss-configs.outputs.single-node-search-space-config }}
34+
steps:
35+
- name: Checkout code
36+
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
37+
38+
- id: get-gptoss-configs
39+
run: |
40+
pip install pydantic
41+
CONFIG_JSON_MULTI_NODE=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py full-sweep --multi-node --seq-lens 1k1k --model-prefix gptoss ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
42+
CONFIG_JSON_SINGLE_NODE=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py full-sweep --single-node --seq-lens 1k1k --model-prefix gptoss ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
43+
echo "multi-node-search-space-config=$CONFIG_JSON_MULTI_NODE" >> $GITHUB_OUTPUT
44+
echo "single-node-search-space-config=$CONFIG_JSON_SINGLE_NODE" >> $GITHUB_OUTPUT
45+
46+
benchmark-dsr1-multi-node:
47+
needs: get-dsr1-configs
48+
if: ${{ needs.get-dsr1-configs.outputs.multi-node-search-space-config != '[]' }}
49+
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
50+
name: dsr1 1k1k multi-node /
51+
strategy:
52+
fail-fast: false
53+
matrix:
54+
config: ${{ fromJson(needs.get-dsr1-configs.outputs.multi-node-search-space-config) }}
55+
secrets: inherit
56+
with:
57+
isl: 1024
58+
osl: 1024
59+
max-model-len: 2048
60+
runner: ${{ matrix.config.runner }}
61+
image: ${{ matrix.config.image }}
62+
model: ${{ matrix.config.model }}
63+
model-prefix: ${{ matrix.config.model-prefix }}
64+
framework: ${{ matrix.config.framework }}
65+
precision: ${{ matrix.config.precision }}
66+
exp-name: "dsr1_1k1k"
67+
conc-list: ${{ toJson(matrix.config.conc) }}
68+
spec-decoding: ${{ matrix.config.spec-decoding }}
69+
disagg: ${{ matrix.config.disagg }}
70+
71+
prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
72+
prefill-tp: ${{ matrix.config.prefill.tp }}
73+
prefill-ep: ${{ matrix.config.prefill.ep }}
74+
prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
75+
prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
76+
77+
decode-num-worker: ${{ matrix.config.decode.num-worker }}
78+
decode-tp: ${{ matrix.config.decode.tp }}
79+
decode-ep: ${{ matrix.config.decode.ep }}
80+
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
81+
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
82+
83+
benchmark-dsr1-single-node:
84+
needs: get-dsr1-configs
85+
if: ${{ needs.get-dsr1-configs.outputs.single-node-search-space-config != '[]' }}
86+
uses: ./.github/workflows/benchmark-tmpl.yml
87+
name: dsr1 1k1k single-node /
88+
strategy:
89+
fail-fast: false
90+
matrix:
91+
config: ${{ fromJson(needs.get-dsr1-configs.outputs.single-node-search-space-config) }}
92+
secrets: inherit
93+
with:
94+
exp-name: "dsr1_1k1k"
95+
isl: 1024
96+
osl: 1024
97+
max-model-len: 2048
98+
runner: ${{ matrix.config.runner }}
99+
image: ${{ matrix.config.image }}
100+
model: ${{ matrix.config.model }}
101+
model-prefix: ${{ matrix.config.model-prefix }}
102+
framework: ${{ matrix.config.framework }}
103+
precision: ${{ matrix.config.precision }}
104+
tp: ${{ matrix.config.tp }}
105+
ep: ${{ matrix.config.ep }}
106+
dp-attn: ${{ matrix.config.dp-attn }}
107+
conc: ${{ matrix.config.conc }}
108+
spec-decoding: ${{ matrix.config.spec-decoding }}
109+
disagg: ${{ matrix.config.disagg }}
110+
111+
benchmark-gptoss-multi-node:
112+
needs: get-gptoss-configs
113+
if: ${{ needs.get-gptoss-configs.outputs.multi-node-search-space-config != '[]' }}
114+
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
115+
name: gptoss 1k1k multi-node /
116+
strategy:
117+
fail-fast: false
118+
matrix:
119+
config: ${{ fromJson(needs.get-gptoss-configs.outputs.multi-node-search-space-config) }}
120+
secrets: inherit
121+
with:
122+
isl: 1024
123+
osl: 1024
124+
max-model-len: 2048
125+
runner: ${{ matrix.config.runner }}
126+
image: ${{ matrix.config.image }}
127+
model: ${{ matrix.config.model }}
128+
model-prefix: ${{ matrix.config.model-prefix }}
129+
framework: ${{ matrix.config.framework }}
130+
precision: ${{ matrix.config.precision }}
131+
exp-name: "dsr1_1k1k"
132+
conc-list: ${{ toJson(matrix.config.conc) }}
133+
spec-decoding: ${{ matrix.config.spec-decoding }}
134+
disagg: ${{ matrix.config.disagg }}
135+
136+
prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
137+
prefill-tp: ${{ matrix.config.prefill.tp }}
138+
prefill-ep: ${{ matrix.config.prefill.ep }}
139+
prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
140+
prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
141+
142+
decode-num-worker: ${{ matrix.config.decode.num-worker }}
143+
decode-tp: ${{ matrix.config.decode.tp }}
144+
decode-ep: ${{ matrix.config.decode.ep }}
145+
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
146+
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
147+
148+
benchmark-gptoss-single-node:
149+
needs: get-gptoss-configs
150+
if: ${{ needs.get-gptoss-configs.outputs.single-node-search-space-config != '[]' }}
151+
uses: ./.github/workflows/benchmark-tmpl.yml
152+
name: gptoss 1k1k single-node /
153+
strategy:
154+
fail-fast: false
155+
matrix:
156+
config: ${{ fromJson(needs.get-gptoss-configs.outputs.single-node-search-space-config) }}
157+
secrets: inherit
158+
with:
159+
exp-name: "gptoss_1k1k"
160+
isl: 1024
161+
osl: 1024
162+
max-model-len: 2048
163+
runner: ${{ matrix.config.runner }}
164+
image: ${{ matrix.config.image }}
165+
model: ${{ matrix.config.model }}
166+
model-prefix: ${{ matrix.config.model-prefix }}
167+
framework: ${{ matrix.config.framework }}
168+
precision: ${{ matrix.config.precision }}
169+
tp: ${{ matrix.config.tp }}
170+
ep: ${{ matrix.config.ep }}
171+
dp-attn: ${{ matrix.config.dp-attn }}
172+
conc: ${{ matrix.config.conc }}
173+
spec-decoding: ${{ matrix.config.spec-decoding }}
174+
disagg: ${{ matrix.config.disagg }}
175+
176+
collect-dsr1-results:
177+
needs:
178+
[
179+
get-dsr1-configs,
180+
benchmark-dsr1-single-node,
181+
benchmark-dsr1-multi-node,
182+
]
183+
if: ${{ always() && needs.get-dsr1-configs.result == 'success' && needs.get-dsr1-configs.outputs.single-node-search-space-config != '[]' && needs.get-dsr1-configs.outputs.multi-node-search-space-config != '[]' }}
184+
uses: ./.github/workflows/collect-results.yml
185+
secrets: inherit
186+
with:
187+
exp-name: "dsr1_1k1k"
188+
189+
collect-gptoss-results:
190+
needs:
191+
[
192+
get-gptoss-configs,
193+
benchmark-gptoss-single-node,
194+
benchmark-gptoss-multi-node,
195+
]
196+
if: ${{ always() && needs.get-gptoss-configs.result == 'success' && needs.get-gptoss-configs.outputs.single-node-search-space-config != '[]' && needs.get-gptoss-configs.outputs.multi-node-search-space-config != '[]' }}
197+
uses: ./.github/workflows/collect-results.yml
198+
secrets: inherit
199+
with:
200+
exp-name: "gptoss_1k1k"
201+
202+
calc-success-rate:
203+
needs: [collect-dsr1-results, collect-gptoss-results]
204+
if: ${{ always() }}
205+
runs-on: ubuntu-latest
206+
207+
env:
208+
RESULTS_DIR: "results/"
209+
STATS_FILENAME: "run_stats"
210+
GITHUB_TOKEN: ${{ secrets.REPO_PAT }}
211+
212+
steps:
213+
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
214+
with:
215+
token: ${{ secrets.REPO_PAT }}
216+
fetch-depth: 0
217+
218+
- name: Download results artifacts
219+
uses: actions/download-artifact@018cc2cf5baa6db3ef3c5f8a56943fffe632ef53 # v6.0.0
220+
with:
221+
path: ${{ env.RESULTS_DIR }}
222+
pattern: results_*
223+
224+
- name: Install python dependencies
225+
run: pip install PyGithub
226+
227+
- name: Calculate success rate
228+
run: python3 utils/calc_success_rate.py $STATS_FILENAME
229+
230+
- uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
231+
with:
232+
name: "run-stats"
233+
path: ${{ env.STATS_FILENAME }}.json

perf-changelog.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
- config-keys:
22
- dsr1-fp8-h200-trt
33
description: Test change abc
4-
seq-lens: [ 1k1k, 1k8k, 8k1k ]
54
- config-keys:
65
- dsr1-fp8-h200-sglang
76
description: Test change 2
8-
seq-lens: [ 1k1k, 1k8k, 8k1k ]

utils/process_changelog.py

Lines changed: 39 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import yaml
22
import json
33
import argparse
4+
import subprocess
45

56
from pydantic import BaseModel, ConfigDict, Field
67

7-
from matrix_logic.validation import load_config_files, load_runner_file
8+
from pprint import pprint
9+
10+
from matrix_logic.validation import load_config_files
811

912
MASTER_CONFIGS = [".github/configs/amd-master.yaml",
1013
".github/configs/nvidia-master.yaml"]
@@ -18,31 +21,55 @@ class ChangelogEntry(BaseModel):
1821
seq_lens: list[str] = Field(alias='seq-lens')
1922

2023

21-
def main():
22-
parser = argparse.ArgumentParser()
23-
parser.add_argument(
24-
'--changelog-file',
25-
type=str,
26-
required=True,
27-
help='Path to the changelog YAML file'
24+
def get_added_lines(base_ref, head_ref, filepath):
25+
result = subprocess.run(
26+
["git", "diff", base_ref, head_ref, "--", filepath],
27+
capture_output=True,
28+
text=True
2829
)
30+
31+
added_lines = []
32+
for line in result.stdout.split('\n'):
33+
if line.startswith('+') and not line.startswith('+++'):
34+
added_lines.append(line[1:])
35+
36+
return '\n'.join(added_lines)
37+
2938

39+
def main():
40+
parser = argparse.ArgumentParser()
41+
parser.add_argument('--base-ref', type=str, required=True)
42+
parser.add_argument('--head-ref', type=str, required=True)
43+
parser.add_argument('--changelog-file', type=str, required=True)
3044
args = parser.parse_args()
3145

3246
master_config_data = load_config_files(MASTER_CONFIGS)
3347

34-
with open(args.changelog_file, 'r') as f:
35-
changelog_data = yaml.safe_load(f)
48+
added_yaml = get_added_lines(args.base_ref, args.head_ref, args.changelog_file)
49+
50+
if not added_yaml.strip():
51+
print("No new changelog entries found")
52+
return
53+
54+
changelog_data = yaml.safe_load(added_yaml)
55+
pprint(changelog_data)
56+
57+
if not changelog_data:
58+
print("No new changelog entries found")
59+
return
3660

3761
for entry_data in changelog_data:
3862
entry = ChangelogEntry.model_validate(entry_data)
3963

40-
# Make sure the specfied config keys actually exist in the master config files
4164
for config_key in entry.config_keys:
42-
if config_key not in master_config_data.keys():
65+
if config_key not in master_config_data:
4366
raise ValueError(
4467
f"Config key '{config_key}' does not exist in master config files."
4568
)
69+
70+
# print(f"Config keys: {entry.config_keys}")
71+
# print(f"Seq lens: {entry.seq_lens}")
72+
# print(f"Description: {entry.description}")
4673

4774

4875
if __name__ == "__main__":

0 commit comments

Comments
 (0)