Skip to content

Profile

Profile #111

Workflow file for this run

name: Profile
on:
workflow_dispatch:
inputs:
config-key:
description: "Config key from config yaml"
required: true
type: string
config-file:
description: "Config file to use"
required: false
type: string
default: '.github/configs/nvidia-master.yaml'
conc:
description: "Concurrency value (must exist in config's conc-range/list)"
required: false
type: string
default: '64'
moe-debug:
description: "Enable MoE debug patch and log (MOE_DEBUG_LOG)"
required: false
type: boolean
default: false
ref:
description: "Ref (branch/sha) to checkout"
required: false
type: string
permissions:
contents: read
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
HF_HUB_CACHE: '/mnt/hf_hub_cache/'
RANDOM_RANGE_RATIO: '0.8'
PERFETTO_RELAY_URL: https://semianalysisai.github.io/InferenceX-trace-storage
PYTHONDONTWRITEBYTECODE: '1'
PYTHONPYCACHEPREFIX: /tmp/inferencex-pycache
jobs:
get-jobs:
runs-on: ubuntu-latest
outputs:
filtered-matrix: ${{ steps.filter.outputs.filtered }}
count: ${{ steps.filter.outputs.count }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
ref: ${{ inputs.ref || github.sha }}
- id: gen
name: Generate matrix via script
run: |
pip install pydantic
CLI_ARGS="test-config --config-files ${{ inputs.config-file }} --config-keys ${{ inputs.config-key }} --conc ${{ inputs.conc }}"
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py $CLI_ARGS)
echo "raw=$CONFIG_JSON" >> $GITHUB_OUTPUT
- id: filter
name: Take first generated job
shell: python
run: |
import json, os, sys
raw = '${{ steps.gen.outputs.raw }}'
try:
data = json.loads(raw)
except Exception as e:
print('Invalid generator output:', e, file=sys.stderr)
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write("filtered=[]\ncount=0\n")
raise
if not isinstance(data, list):
print('Generator output is not a list.', file=sys.stderr)
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write("filtered=[]\ncount=0\n")
raise SystemExit(1)
filt = data[:1]
out = json.dumps(filt)
print(out)
with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
f.write(f"filtered={out}\n")
f.write(f"count={len(filt)}\n")
- name: Fail if no matching entries
if: ${{ steps.filter.outputs.count == '0' }}
run: |
echo "No entries produced for config-key=${{ inputs.config-key }}, conc=${{ inputs.conc }}." >&2
exit 1
profile:
needs: get-jobs
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.get-jobs.outputs.filtered-matrix) }}
runs-on: ${{ matrix.config.runner }}
env:
EXP_NAME: ${{ matrix.config.exp-name }}
MODEL: ${{ matrix.config.model }}
MODEL_PREFIX: ${{ matrix.config.model-prefix }}
ISL: ${{ matrix.config.isl }}
OSL: ${{ matrix.config.osl }}
MAX_MODEL_LEN: ${{ matrix.config.max-model-len }}
IMAGE: ${{ matrix.config.image }}
FRAMEWORK: ${{ matrix.config.framework }}
PRECISION: ${{ matrix.config.precision }}
TP: ${{ matrix.config.tp }}
EP_SIZE: ${{ matrix.config.ep }}
DP_ATTENTION: ${{ matrix.config['dp-attn'] }}
CONC: ${{ toJson(matrix.config.conc) }}
CONC_JSON: ${{ toJson(matrix.config.conc) }}
PREFILL_NUM_WORKERS: ${{ matrix.config.prefill['num-worker'] }}
PREFILL_TP: ${{ matrix.config.prefill.tp }}
PREFILL_EP: ${{ matrix.config.prefill.ep }}
PREFILL_DP_ATTN: ${{ matrix.config.prefill['dp-attn'] }}
PREFILL_ADDITIONAL_SETTINGS_JSON: ${{ toJson(matrix.config.prefill['additional-settings']) }}
DECODE_NUM_WORKERS: ${{ matrix.config.decode['num-worker'] }}
DECODE_TP: ${{ matrix.config.decode.tp }}
DECODE_EP: ${{ matrix.config.decode.ep }}
DECODE_DP_ATTN: ${{ matrix.config.decode['dp-attn'] }}
DECODE_ADDITIONAL_SETTINGS_JSON: ${{ toJson(matrix.config.decode['additional-settings']) }}
SPEC_DECODING: ${{ matrix.config.spec-decoding }}
DISAGG: ${{ matrix.config.disagg }}
MOE_DEBUG: '0'
MOE_DEBUG_LOG: ${{ (inputs.moe-debug) && '/workspace/moe_debug.tp0.log' || '' }}
steps:
- name: Resource cleanup
run: |
# Cleanup Docker resources
if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
echo "[Docker] Cleaning up resources ..."
docker ps -aq | xargs -r docker rm -f
docker network prune -f
while [ -n "$(docker ps -aq)" ]; do
docker ps -a
sleep 5
done
fi
# Cleanup SLURM resources
if command -v squeue >/dev/null 2>&1; then
echo "[Slurm] Cleaning up jobs with name: ${{ runner.name }} ..."
scancel --name="${{ runner.name }}" || true
while [ -n "$(squeue --name='${{ runner.name }}' --noheader --format='%i')" ]; do
squeue --name="${{ runner.name }}"
sleep 5
done
fi
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
ref: ${{ inputs.ref || github.sha }}
clean: false
- name: Launch + Profile
id: run
env:
RUNNER_NAME: ${{ runner.name }}
PROFILE: '1'
SGLANG_TORCH_PROFILER_DIR: /workspace/
VLLM_TORCH_PROFILER_DIR: /workspace/
VLLM_RPC_TIMEOUT: '1800000'
shell: bash
run: |
set -euo pipefail
export_additional_settings() {
local settings_json="$1"
python3 - "$settings_json" <<'PY'
import json
import sys
raw = sys.argv[1]
if not raw or raw == "null":
raise SystemExit(0)
for item in json.loads(raw) or []:
print(item)
PY
}
normalize_conc() {
python3 - <<'PY'
import json
import os
raw = os.environ.get("CONC_JSON") or os.environ.get("CONC") or "[]"
try:
value = json.loads(raw)
except json.JSONDecodeError:
value = raw
if isinstance(value, list):
print("x".join(str(v) for v in value))
else:
print(str(value))
PY
}
if [ -n "${PREFILL_NUM_WORKERS:-}" ] && [ -n "${DECODE_NUM_WORKERS:-}" ]; then
conc_val="$(normalize_conc)"
res_name="${EXP_NAME}_${PRECISION}_${FRAMEWORK}_prefill-tp${PREFILL_TP}-ep${PREFILL_EP}-dp${PREFILL_DP_ATTN}-nw${PREFILL_NUM_WORKERS}_decode-tp${DECODE_TP}-ep${DECODE_EP}-dp${DECODE_DP_ATTN}-nw${DECODE_NUM_WORKERS}_disagg-${DISAGG}_spec-${SPEC_DECODING}_conc${conc_val}_${RUNNER_NAME}"
echo "IS_MULTINODE=true" >> "$GITHUB_ENV"
echo "PREFILL_GPUS=$((PREFILL_NUM_WORKERS * PREFILL_TP))" >> "$GITHUB_ENV"
echo "DECODE_GPUS=$((DECODE_NUM_WORKERS * DECODE_TP))" >> "$GITHUB_ENV"
while IFS= read -r setting; do
if [ -n "$setting" ]; then
export "$setting"
fi
done < <(export_additional_settings "${PREFILL_ADDITIONAL_SETTINGS_JSON:-null}")
while IFS= read -r setting; do
if [ -n "$setting" ]; then
export "$setting"
fi
done < <(export_additional_settings "${DECODE_ADDITIONAL_SETTINGS_JSON:-null}")
else
ep_val="${EP_SIZE:-1}"
res_name="${EXP_NAME}_${PRECISION}_${FRAMEWORK}_tp${TP}_ep${ep_val}_dpa_${DP_ATTENTION}_conc${CONC}_${RUNNER_NAME}"
fi
export RESULT_FILENAME="${res_name}"
echo "RESULT_FILENAME=${res_name}" >> "$GITHUB_ENV"
echo "Removing stale profile artifacts from previous runs"
rm -rf LOGS
rm -f profile_*.trace.json.gz multinode_server_logs.tar.gz
bash ./runners/launch_${RUNNER_NAME%%_*}.sh
if [ ! -f "${res_name}.json" ]; then
result_candidate="$(find . -maxdepth 1 -type f -name "${res_name}_*.json" | sort | head -n1 || true)"
if [ -n "$result_candidate" ] && [ -f "$result_candidate" ]; then
cp "$result_candidate" "${res_name}.json"
else
echo "Run failed: Benchmark result ${res_name}.json not found." >&2
exit 1
fi
fi
trace_path="profile_${res_name}.trace.json.gz"
if [ ! -f "$trace_path" ] && [ -d LOGS ]; then
trace_candidate="$(python3 - <<'PY'
from pathlib import Path
root = Path("LOGS")
def is_trace_candidate(path: Path) -> bool:
name = path.name
if name.startswith("results_") or "profile_export" in name:
return False
if name.endswith((".trace.json", ".trace.json.gz", ".pt.trace.json", ".pt.trace.json.gz")):
return True
return "trace" in name and name.endswith((".json", ".json.gz"))
candidates = [p for p in root.rglob("*") if p.is_file() and is_trace_candidate(p)]
if candidates:
print(max(candidates, key=lambda p: (p.stat().st_mtime_ns, p.stat().st_size)))
PY
)"
if [ -n "$trace_candidate" ] && [ -f "$trace_candidate" ]; then
echo "Selected profile trace candidate: $trace_candidate"
if [[ "$trace_candidate" == *.gz ]]; then
cp "$trace_candidate" "$trace_path"
else
gzip -c "$trace_candidate" > "$trace_path"
fi
fi
fi
if [ -f "$trace_path" ]; then
echo "Profile trace prepared: $trace_path"
ls -lh "$trace_path"
sha256sum "$trace_path"
python3 - "$trace_path" <<'PY'
import gzip
import os
import re
import sys
trace_path = sys.argv[1]
expected = set()
worker_gpus = []
for workers_key, tp_key in (
("PREFILL_NUM_WORKERS", "PREFILL_TP"),
("DECODE_NUM_WORKERS", "DECODE_TP"),
):
workers = os.environ.get(workers_key)
tp = os.environ.get(tp_key)
if workers and workers.isdigit() and tp and tp.isdigit():
gpus = int(workers) * int(tp)
if gpus:
expected.add(gpus)
worker_gpus.append(gpus)
if len(worker_gpus) > 1:
expected.add(sum(worker_gpus))
opener = gzip.open if trace_path.endswith(".gz") else open
with opener(trace_path, "rt", errors="replace") as f:
prefix = f.read(1024 * 1024)
if '"traceEvents"' not in prefix:
raise SystemExit(f"{trace_path} does not look like a Perfetto trace: traceEvents key not found near start")
match = re.search(r'"world_size"\s*:\s*(\d+)', prefix)
if expected and match:
world_size = int(match.group(1))
if world_size not in expected:
allowed = ", ".join(str(v) for v in sorted(expected))
raise SystemExit(
f"{trace_path} has distributed world_size={world_size}, expected one of: {allowed}"
)
PY
echo "trace=$trace_path" >> "$GITHUB_OUTPUT"
if [ "${FRAMEWORK}" = "sglang" ]; then
# Try to locate corresponding TP-0 traces produced by SGLang profiler
merged_latest=$(ls -t profiles/merged-*.trace.json.gz 2>/dev/null | head -n1 || true)
if [ -n "${merged_latest}" ] && [ -f "${merged_latest}" ]; then
ts_name="${merged_latest##*/}"
ts_name="${ts_name#merged-}"
ts_name="${ts_name%.trace.json.gz}"
tp0_decode="profiles/${ts_name}-TP-0-DECODE.trace.json.gz"
tp0_extend="profiles/${ts_name}-TP-0-EXTEND.trace.json.gz"
if [ -f "${tp0_decode}" ]; then
echo "tp0_decode=${tp0_decode}" >> "$GITHUB_OUTPUT"
fi
if [ -f "${tp0_extend}" ]; then
echo "tp0_extend=${tp0_extend}" >> "$GITHUB_OUTPUT"
fi
fi
fi
else
echo "Profile trace not found: $trace_path" >&2
if [ -d LOGS ]; then
echo "LOGS profile candidates:" >&2
find LOGS -maxdepth 8 -type f \( -path "*/profiles/*" -o -name "*trace*" -o -name "*profile*" \) -printf "%p %s bytes\n" 2>/dev/null | sort >&2 || true
fi
exit 1
fi
- name: Process result (json -> agg)
env:
RUNNER_TYPE: ${{ matrix.config.runner }}
run: |
python3 utils/process_result.py
- name: Upload profile as artifact
if: ${{ steps.run.outputs.trace != '' }}
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: profile_${{ env.RESULT_FILENAME }}
path: ${{ steps.run.outputs.trace }}
if-no-files-found: ignore
- name: Upload TP-0-DECODE trace as artifact
if: ${{ steps.run.outputs.tp0_decode != '' }}
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: profile_${{ env.RESULT_FILENAME }}_TP0_DECODE
path: ${{ steps.run.outputs.tp0_decode }}
if-no-files-found: ignore
- name: Upload TP-0-EXTEND trace as artifact
if: ${{ steps.run.outputs.tp0_extend != '' }}
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: profile_${{ env.RESULT_FILENAME }}_TP0_EXTEND
path: ${{ steps.run.outputs.tp0_extend }}
if-no-files-found: ignore
- name: Upload MoE debug log as artifact
if: ${{ env.MOE_DEBUG == '1' }}
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: moe_debug_${{ env.RESULT_FILENAME }}
path: "moe_debug.tp0.log"
if-no-files-found: ignore
- name: Checkout storage repo
if: ${{ steps.run.outputs.trace != '' }}
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
repository: SemiAnalysisAI/InferenceX-trace-storage
path: storage
ref: master
token: ${{ secrets.REPO_PAT }}
fetch-depth: 0
- name: Push profile to storage repo
if: ${{ steps.run.outputs.trace != '' }}
id: push
env:
TRACE_LOCAL: ${{ steps.run.outputs.trace }}
REPO_PAT: ${{ secrets.REPO_PAT }}
shell: bash
run: |
set -euo pipefail
dest_dir="storage/profiles/${GITHUB_SHA}/${{ matrix.config.runner }}/${{ matrix.config.framework }}/${RESULT_FILENAME}"
mkdir -p "$dest_dir"
cp "$TRACE_LOCAL" "$dest_dir/trace.json.gz"
pushd storage >/dev/null
git config user.name "github-actions"
git config user.email "github-actions@github.com"
git remote set-url origin "https://x-access-token:${REPO_PAT}@github.com/SemiAnalysisAI/InferenceX-trace-storage.git"
git add -A
git commit -m "Add profile: ${GITHUB_SHA} ${RESULT_FILENAME}" || echo "Nothing to commit"
git push origin HEAD:master
STORAGE_SHA="$(git rev-parse HEAD)"
popd >/dev/null
export RAW_URL="https://raw.githubusercontent.com/SemiAnalysisAI/InferenceX-trace-storage/${STORAGE_SHA}/profiles/${GITHUB_SHA}/${{ matrix.config.runner }}/${{ matrix.config.framework }}/${RESULT_FILENAME}/trace.json.gz"
export TITLE="${RESULT_FILENAME}"
enc_src="$(python3 -c 'import os,urllib.parse; print(urllib.parse.quote(os.environ["RAW_URL"], safe=""))')"
enc_title="$(python3 -c 'import os,urllib.parse; print(urllib.parse.quote(os.environ["TITLE"], safe=""))')"
relay="${PERFETTO_RELAY_URL%/}"
RELAY_URL="${relay}/?src=${enc_src}&title=${enc_title}"
echo "raw_url=$RAW_URL" >> "$GITHUB_OUTPUT"
echo "relay_url=$RELAY_URL" >> "$GITHUB_OUTPUT"
- name: Print Perfetto link (relay)
if: ${{ steps.push.outputs.relay_url != '' }}
env:
RELAY_URL: ${{ steps.push.outputs.relay_url }}
RAW_URL: ${{ steps.push.outputs.raw_url }}
shell: bash
run: |
set -euo pipefail
echo "RAW trace URL: $RAW_URL"
echo "Perfetto Relay URL: $RELAY_URL"
printf "\n**Perfetto (Relay):** %s\n" "$RELAY_URL" >> "$GITHUB_STEP_SUMMARY"