Skip to content

Commit e9209b5

Browse files
committed
test
1 parent fb6d3b3 commit e9209b5

2 files changed

Lines changed: 155 additions & 59 deletions

File tree

.github/workflows/run-sweep.yml

Lines changed: 146 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ on:
66
branches:
77
- main
88
- diff-only-runs
9-
# paths:
10-
# - "perf-changelog.yaml"
119

1210
jobs:
1311
get-jobs:
@@ -30,15 +28,112 @@ jobs:
3028
3129
echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
3230
33-
sweep-multi-node-1k1k:
31+
# ============================================
32+
# Single Node - DeepSeek R1
33+
# ============================================
34+
sweep-single-node-dsr1-1k1k:
3435
needs: get-jobs
35-
if: ${{ needs.get-jobs.outputs.search-space-config.multi_node['1k1k'] != '[]' }}
36+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['dsr1_1k1k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).single_node['dsr1_1k1k'] != '[]' }}
37+
uses: ./.github/workflows/benchmark-tmpl.yml
38+
name: single-node dsr1_1k1k /
39+
strategy:
40+
fail-fast: false
41+
matrix:
42+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['dsr1_1k1k'] }}
43+
secrets: inherit
44+
with: &single-node-inputs
45+
exp-name: ${{ matrix.config.exp-name }}
46+
isl: ${{ matrix.config.isl }}
47+
osl: ${{ matrix.config.osl }}
48+
max-model-len: ${{ matrix.config.max-model-len }}
49+
runner: ${{ matrix.config.runner }}
50+
image: ${{ matrix.config.image }}
51+
model: ${{ matrix.config.model }}
52+
model-prefix: ${{ matrix.config.model-prefix }}
53+
framework: ${{ matrix.config.framework }}
54+
precision: ${{ matrix.config.precision }}
55+
tp: ${{ matrix.config.tp }}
56+
ep: ${{ matrix.config.ep }}
57+
dp-attn: ${{ matrix.config.dp-attn }}
58+
conc: ${{ matrix.config.conc }}
59+
spec-decoding: ${{ matrix.config.spec-decoding }}
60+
disagg: ${{ matrix.config.disagg }}
61+
62+
sweep-single-node-dsr1-1k8k:
63+
needs: get-jobs
64+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['dsr1_1k8k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).single_node['dsr1_1k8k'] != '[]' }}
65+
uses: ./.github/workflows/benchmark-tmpl.yml
66+
name: single-node dsr1_1k8k /
67+
strategy:
68+
fail-fast: false
69+
matrix:
70+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['dsr1_1k8k'] }}
71+
secrets: inherit
72+
with: *single-node-inputs
73+
74+
sweep-single-node-dsr1-8k1k:
75+
needs: get-jobs
76+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['dsr1_8k1k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).single_node['dsr1_8k1k'] != '[]' }}
77+
uses: ./.github/workflows/benchmark-tmpl.yml
78+
name: single-node dsr1_8k1k /
79+
strategy:
80+
fail-fast: false
81+
matrix:
82+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['dsr1_8k1k'] }}
83+
secrets: inherit
84+
with: *single-node-inputs
85+
86+
# ============================================
87+
# Single Node - GPT OSS
88+
# ============================================
89+
sweep-single-node-gptoss-1k1k:
90+
needs: get-jobs
91+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['gptoss_1k1k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).single_node['gptoss_1k1k'] != '[]' }}
92+
uses: ./.github/workflows/benchmark-tmpl.yml
93+
name: single-node gptoss_1k1k /
94+
strategy:
95+
fail-fast: false
96+
matrix:
97+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['gptoss_1k1k'] }}
98+
secrets: inherit
99+
with: *single-node-inputs
100+
101+
sweep-single-node-gptoss-1k8k:
102+
needs: get-jobs
103+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['gptoss_1k8k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).single_node['gptoss_1k8k'] != '[]' }}
104+
uses: ./.github/workflows/benchmark-tmpl.yml
105+
name: single-node gptoss_1k8k /
106+
strategy:
107+
fail-fast: false
108+
matrix:
109+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['gptoss_1k8k'] }}
110+
secrets: inherit
111+
with: *single-node-inputs
112+
113+
sweep-single-node-gptoss-8k1k:
114+
needs: get-jobs
115+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['gptoss_8k1k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).single_node['gptoss_8k1k'] != '[]' }}
116+
uses: ./.github/workflows/benchmark-tmpl.yml
117+
name: single-node gptoss_8k1k /
118+
strategy:
119+
fail-fast: false
120+
matrix:
121+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['gptoss_8k1k'] }}
122+
secrets: inherit
123+
with: *single-node-inputs
124+
125+
# ============================================
126+
# Multi Node - DeepSeek R1
127+
# ============================================
128+
sweep-multi-node-dsr1-1k1k:
129+
needs: get-jobs
130+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['dsr1_1k1k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).multi_node['dsr1_1k1k'] != '[]' }}
36131
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
37-
name: multi-node 1k1k /
132+
name: multi-node dsr1_1k1k /
38133
strategy:
39134
fail-fast: false
40135
matrix:
41-
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['1k1k'] }}
136+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['dsr1_1k1k'] }}
42137
secrets: inherit
43138
with: &multi-node-inputs
44139
isl: ${{ matrix.config.isl }}
@@ -67,91 +162,87 @@ jobs:
67162
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
68163
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
69164

70-
sweep-multi-node-1k8k:
165+
sweep-multi-node-dsr1-1k8k:
71166
needs: get-jobs
72-
if: ${{ needs.get-jobs.outputs.search-space-config.multi_node['1k8k'] != '[]' }}
167+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['dsr1_1k8k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).multi_node['dsr1_1k8k'] != '[]' }}
73168
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
74-
name: multi-node 1k8k /
169+
name: multi-node dsr1_1k8k /
75170
strategy:
76171
fail-fast: false
77172
matrix:
78-
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['1k8k'] }}
173+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['dsr1_1k8k'] }}
79174
secrets: inherit
80175
with: *multi-node-inputs
81176

82-
sweep-multi-node-8k1k:
177+
sweep-multi-node-dsr1-8k1k:
83178
needs: get-jobs
84-
if: ${{ needs.get-jobs.outputs.search-space-config.multi_node['8k1k'] != '[]' }}
179+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['dsr1_8k1k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).multi_node['dsr1_8k1k'] != '[]' }}
85180
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
86-
name: multi-node 1k8k /
181+
name: multi-node dsr1_8k1k /
87182
strategy:
88183
fail-fast: false
89184
matrix:
90-
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['8k1k'] }}
185+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['dsr1_8k1k'] }}
91186
secrets: inherit
92187
with: *multi-node-inputs
93188

94-
sweep-single-node-1k1k:
189+
# ============================================
190+
# Multi Node - GPT OSS (add if needed)
191+
# ============================================
192+
sweep-multi-node-gptoss-1k1k:
95193
needs: get-jobs
96-
if: ${{ needs.get-jobs.outputs.search-space-config.single_node['1k1k'] != '[]' }}
97-
uses: ./.github/workflows/benchmark-tmpl.yml
98-
name: single-node 1k1k /
194+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['gptoss_1k1k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).multi_node['gptoss_1k1k'] != '[]' }}
195+
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
196+
name: multi-node gptoss_1k1k /
99197
strategy:
100198
fail-fast: false
101199
matrix:
102-
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['1k1k'] }}
200+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['gptoss_1k1k'] }}
103201
secrets: inherit
104-
with: &single-node-inputs
105-
exp-name: ${{ matrix.config.exp-name }}
106-
isl: ${{ matrix.config.isl }}
107-
osl: ${{ matrix.config.osl }}
108-
max-model-len: ${{ matrix.config.max-model-len }}
109-
runner: ${{ matrix.config.runner }}
110-
image: ${{ matrix.config.image }}
111-
model: ${{ matrix.config.model }}
112-
model-prefix: ${{ matrix.config.model-prefix }}
113-
framework: ${{ matrix.config.framework }}
114-
precision: ${{ matrix.config.precision }}
115-
tp: ${{ matrix.config.tp }}
116-
ep: ${{ matrix.config.ep }}
117-
dp-attn: ${{ matrix.config.dp-attn }}
118-
conc: ${{ matrix.config.conc }}
119-
spec-decoding: ${{ matrix.config.spec-decoding }}
120-
disagg: ${{ matrix.config.disagg }}
202+
with: *multi-node-inputs
121203

122-
sweep-single-node-1k8k:
204+
sweep-multi-node-gptoss-1k8k:
123205
needs: get-jobs
124-
if: ${{ needs.get-jobs.outputs.search-space-config.single_node['1k8k'] != '[]' }}
125-
uses: ./.github/workflows/benchmark-tmpl.yml
126-
name: single-node 1k8k /
206+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['gptoss_1k8k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).multi_node['gptoss_1k8k'] != '[]' }}
207+
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
208+
name: multi-node gptoss_1k8k /
127209
strategy:
128210
fail-fast: false
129211
matrix:
130-
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['1k8k'] }}
212+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['gptoss_1k8k'] }}
131213
secrets: inherit
132-
with: *single-node-inputs
214+
with: *multi-node-inputs
133215

134-
sweep-single-node-8k1k:
216+
sweep-multi-node-gptoss-8k1k:
135217
needs: get-jobs
136-
if: ${{ needs.get-jobs.outputs.search-space-config.single_node['8k1k'] != '[]' }}
137-
uses: ./.github/workflows/benchmark-tmpl.yml
138-
name: single-node 8k1k /
218+
if: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['gptoss_8k1k'] != null && fromJson(needs.get-jobs.outputs.search-space-config).multi_node['gptoss_8k1k'] != '[]' }}
219+
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
220+
name: multi-node gptoss_8k1k /
139221
strategy:
140222
fail-fast: false
141223
matrix:
142-
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).single_node['8k1k'] }}
224+
config: ${{ fromJson(needs.get-jobs.outputs.search-space-config).multi_node['gptoss_8k1k'] }}
143225
secrets: inherit
144-
with: *single-node-inputs
226+
with: *multi-node-inputs
145227

228+
# ============================================
229+
# Results Collection
230+
# ============================================
146231
collect-results:
147232
needs:
148233
[
149-
sweep-single-node-1k1k,
150-
sweep-single-node-1k8k,
151-
sweep-single-node-8k1k,
152-
sweep-multi-node-1k1k,
153-
sweep-multi-node-1k8k,
154-
sweep-multi-node-8k1k,
234+
sweep-single-node-dsr1-1k1k,
235+
sweep-single-node-dsr1-1k8k,
236+
sweep-single-node-dsr1-8k1k,
237+
sweep-single-node-gptoss-1k1k,
238+
sweep-single-node-gptoss-1k8k,
239+
sweep-single-node-gptoss-8k1k,
240+
sweep-multi-node-dsr1-1k1k,
241+
sweep-multi-node-dsr1-1k8k,
242+
sweep-multi-node-dsr1-8k1k,
243+
sweep-multi-node-gptoss-1k1k,
244+
sweep-multi-node-gptoss-1k8k,
245+
sweep-multi-node-gptoss-8k1k,
155246
]
156247
if: ${{ always() }}
157248
uses: ./.github/workflows/collect-results.yml
@@ -188,4 +279,4 @@ jobs:
188279
- uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
189280
with:
190281
name: "run-stats"
191-
path: ${{ env.STATS_FILENAME }}.json
282+
path: ${{ env.STATS_FILENAME }}.json

utils/process_changelog.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,13 +124,18 @@ def main():
124124
print(e.stderr)
125125

126126
all_results.extend(json.loads(result.stdout))
127-
127+
128128
for result in all_results:
129-
seq_len_str = seq_len_to_str(result["isl"], result["osl"])
129+
exp_name = result['exp-name']
130+
130131
if "prefill" in result and result["prefill"] is not None:
131-
final_results["multi_node"][seq_len_str].append(result)
132+
final_results["multi_node"][exp_name].append(result)
132133
else:
133-
final_results["single_node"][seq_len_str].append(result)
134+
final_results["single_node"][exp_name].append(result)
135+
136+
# Convert defaultdicts to regular dicts for JSON serialization
137+
final_results["single_node"] = dict(final_results["single_node"])
138+
final_results["multi_node"] = dict(final_results["multi_node"])
134139

135140
# pprint(final_results)
136141
print(json.dumps(final_results))

0 commit comments

Comments
 (0)