@@ -113,6 +113,17 @@ jobs:
113113 EP_SIZE : ${{ matrix.config.ep }}
114114 DP_ATTENTION : ${{ matrix.config['dp-attn'] }}
115115 CONC : ${{ matrix.config.conc }}
116+ CONC_JSON : ${{ toJson(matrix.config.conc) }}
117+ PREFILL_NUM_WORKERS : ${{ matrix.config.prefill['num-worker'] }}
118+ PREFILL_TP : ${{ matrix.config.prefill.tp }}
119+ PREFILL_EP : ${{ matrix.config.prefill.ep }}
120+ PREFILL_DP_ATTN : ${{ matrix.config.prefill['dp-attn'] }}
121+ PREFILL_ADDITIONAL_SETTINGS_JSON : ${{ toJson(matrix.config.prefill['additional-settings']) }}
122+ DECODE_NUM_WORKERS : ${{ matrix.config.decode['num-worker'] }}
123+ DECODE_TP : ${{ matrix.config.decode.tp }}
124+ DECODE_EP : ${{ matrix.config.decode.ep }}
125+ DECODE_DP_ATTN : ${{ matrix.config.decode['dp-attn'] }}
126+ DECODE_ADDITIONAL_SETTINGS_JSON : ${{ toJson(matrix.config.decode['additional-settings']) }}
116127 SPEC_DECODING : ${{ matrix.config.spec-decoding }}
117128 DISAGG : ${{ matrix.config.disagg }}
118129 MOE_DEBUG : ' 0'
@@ -148,7 +159,7 @@ jobs:
148159 ref : ${{ inputs.ref || github.sha }}
149160 clean : false
150161
151- - name : Launch + Profile (single-node sglang/vllm)
162+ - name : Launch + Profile
152163 id : run
153164 env :
154165 RUNNER_NAME : ${{ runner.name }}
@@ -159,19 +170,108 @@ jobs:
159170 shell : bash
160171 run : |
161172 set -euo pipefail
162- ep_val="${EP_SIZE:-1}"
163- res_name="${EXP_NAME}_${PRECISION}_${FRAMEWORK}_tp${TP}_ep${ep_val}_dpa_${DP_ATTENTION}_conc${CONC}_${RUNNER_NAME}"
173+
174+ export_additional_settings() {
175+ local settings_json="$1"
176+ python3 - "$settings_json" <<'PY'
177+ import json
178+ import sys
179+
180+ raw = sys.argv[1]
181+ if not raw or raw == "null":
182+ raise SystemExit(0)
183+ for item in json.loads(raw) or []:
184+ print(item)
185+ PY
186+ }
187+
188+ normalize_conc() {
189+ python3 - <<'PY'
190+ import json
191+ import os
192+
193+ raw = os.environ.get("CONC_JSON") or os.environ.get("CONC") or "[]"
194+ try:
195+ value = json.loads(raw)
196+ except json.JSONDecodeError:
197+ value = raw
198+ if isinstance(value, list):
199+ print("x".join(str(v) for v in value))
200+ else:
201+ print(str(value))
202+ PY
203+ }
204+
205+ if [ -n "${PREFILL_NUM_WORKERS:-}" ] && [ -n "${DECODE_NUM_WORKERS:-}" ]; then
206+ conc_val="$(normalize_conc)"
207+ res_name="${EXP_NAME}_${PRECISION}_${FRAMEWORK}_prefill-tp${PREFILL_TP}-ep${PREFILL_EP}-dp${PREFILL_DP_ATTN}-nw${PREFILL_NUM_WORKERS}_decode-tp${DECODE_TP}-ep${DECODE_EP}-dp${DECODE_DP_ATTN}-nw${DECODE_NUM_WORKERS}_disagg-${DISAGG}_spec-${SPEC_DECODING}_conc${conc_val}_${RUNNER_NAME}"
208+
209+ echo "IS_MULTINODE=true" >> "$GITHUB_ENV"
210+ echo "PREFILL_GPUS=$((PREFILL_NUM_WORKERS * PREFILL_TP))" >> "$GITHUB_ENV"
211+ echo "DECODE_GPUS=$((DECODE_NUM_WORKERS * DECODE_TP))" >> "$GITHUB_ENV"
212+
213+ while IFS= read -r setting; do
214+ if [ -n "$setting" ]; then
215+ export "$setting"
216+ fi
217+ done < <(export_additional_settings "${PREFILL_ADDITIONAL_SETTINGS_JSON:-null}")
218+ while IFS= read -r setting; do
219+ if [ -n "$setting" ]; then
220+ export "$setting"
221+ fi
222+ done < <(export_additional_settings "${DECODE_ADDITIONAL_SETTINGS_JSON:-null}")
223+ else
224+ ep_val="${EP_SIZE:-1}"
225+ res_name="${EXP_NAME}_${PRECISION}_${FRAMEWORK}_tp${TP}_ep${ep_val}_dpa_${DP_ATTENTION}_conc${CONC}_${RUNNER_NAME}"
226+ fi
227+
164228 export RESULT_FILENAME="${res_name}"
165229 echo "RESULT_FILENAME=${res_name}" >> "$GITHUB_ENV"
166230
167231 bash ./runners/launch_${RUNNER_NAME%%_*}.sh
168232
169233 if [ ! -f "${res_name}.json" ]; then
170- echo "Run failed: Benchmark result ${res_name}.json not found." >&2
171- exit 1
234+ result_candidate="$(find . -maxdepth 1 -type f -name "${res_name}_*.json" | sort | head -n1 || true)"
235+ if [ -n "$result_candidate" ] && [ -f "$result_candidate" ]; then
236+ cp "$result_candidate" "${res_name}.json"
237+ else
238+ echo "Run failed: Benchmark result ${res_name}.json not found." >&2
239+ exit 1
240+ fi
172241 fi
173242
174243 trace_path="profile_${res_name}.trace.json.gz"
244+ if [ ! -f "$trace_path" ] && [ -d LOGS/profiles ]; then
245+ trace_candidate="$(python3 - <<'PY'
246+ from pathlib import Path
247+
248+ root = Path("LOGS/profiles")
249+ candidates = [
250+ p for p in root.rglob("*")
251+ if p.is_file() and (
252+ p.name.endswith(".trace.json")
253+ or p.name.endswith(".trace.json.gz")
254+ or p.name.endswith(".pt.trace.json")
255+ or p.name.endswith(".json")
256+ )
257+ ]
258+ candidates = [
259+ p for p in candidates
260+ if not p.name.startswith("results_") and "profile_export" not in p.name
261+ ]
262+ if candidates:
263+ print(max(candidates, key=lambda p: p.stat().st_size))
264+ PY
265+ )"
266+ if [ -n "$trace_candidate" ] && [ -f "$trace_candidate" ]; then
267+ if [[ "$trace_candidate" == *.gz ]]; then
268+ cp "$trace_candidate" "$trace_path"
269+ else
270+ gzip -c "$trace_candidate" > "$trace_path"
271+ fi
272+ fi
273+ fi
274+
175275 if [ -f "$trace_path" ]; then
176276 echo "trace=$trace_path" >> "$GITHUB_OUTPUT"
177277 if [ "${FRAMEWORK}" = "sglang" ]; then
@@ -252,21 +352,21 @@ jobs:
252352 run : |
253353 set -euo pipefail
254354
255- dest_dir="storage/profiles/${GITHUB_SHA}/${{ matrix.config.runner }}/${{ matrix.config.framework }}/${{ matrix.config['exp-name'] }}_${{ matrix.config.precision }}_tp${{ matrix.config.tp }}_ep${{ matrix.config.ep || 1 }}_conc${{ matrix.config.conc } }"
355+ dest_dir="storage/profiles/${GITHUB_SHA}/${{ matrix.config.runner }}/${{ matrix.config.framework }}/${RESULT_FILENAME }"
256356 mkdir -p "$dest_dir"
257357 cp "$TRACE_LOCAL" "$dest_dir/trace.json.gz"
258358
259359 pushd storage >/dev/null
260360 git config user.name "github-actions"
261361 git config user.email "github-actions@github.com"
262362 git add -A
263- git commit -m "Add profile: ${GITHUB_SHA} ${{ matrix.config['exp-name'] }} tp${{ matrix.config.tp }} ep${{ matrix.config.ep || 1 }} conc${{ matrix.config.conc } }" || echo "Nothing to commit"
363+ git commit -m "Add profile: ${GITHUB_SHA} ${RESULT_FILENAME }" || echo "Nothing to commit"
264364 git push
265365 STORAGE_SHA="$(git rev-parse HEAD)"
266366 popd >/dev/null
267367
268- export RAW_URL="https://raw.githubusercontent.com/SemiAnalysisAI/InferenceX-trace-storage/${STORAGE_SHA}/profiles/${GITHUB_SHA}/${{ matrix.config.runner }}/${{ matrix.config.framework }}/${{ matrix.config['exp-name'] }}_${{ matrix.config.precision }}_tp${{ matrix.config.tp }}_ep${{ matrix.config.ep || 1 }}_conc${{ matrix.config.conc } }/trace.json.gz"
269- export TITLE="${{ matrix.config['exp-name'] }}_${{ matrix.config.precision }}_tp${{ matrix.config.tp }}_ep${{ matrix.config.ep || 1 }}_conc${{ matrix.config.conc } }"
368+ export RAW_URL="https://raw.githubusercontent.com/SemiAnalysisAI/InferenceX-trace-storage/${STORAGE_SHA}/profiles/${GITHUB_SHA}/${{ matrix.config.runner }}/${{ matrix.config.framework }}/${RESULT_FILENAME }/trace.json.gz"
369+ export TITLE="${RESULT_FILENAME }"
270370
271371 enc_src="$(python3 -c 'import os,urllib.parse; print(urllib.parse.quote(os.environ["RAW_URL"], safe=""))')"
272372 enc_title="$(python3 -c 'import os,urllib.parse; print(urllib.parse.quote(os.environ["TITLE"], safe=""))')"
0 commit comments