Profile #111
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Profile | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| config-key: | |
| description: "Config key from config yaml" | |
| required: true | |
| type: string | |
| config-file: | |
| description: "Config file to use" | |
| required: false | |
| type: string | |
| default: '.github/configs/nvidia-master.yaml' | |
| conc: | |
| description: "Concurrency value (must exist in config's conc-range/list)" | |
| required: false | |
| type: string | |
| default: '64' | |
| moe-debug: | |
| description: "Enable MoE debug patch and log (MOE_DEBUG_LOG)" | |
| required: false | |
| type: boolean | |
| default: false | |
| ref: | |
| description: "Ref (branch/sha) to checkout" | |
| required: false | |
| type: string | |
| permissions: | |
| contents: read | |
| env: | |
| HF_TOKEN: ${{ secrets.HF_TOKEN }} | |
| HF_HUB_CACHE: '/mnt/hf_hub_cache/' | |
| RANDOM_RANGE_RATIO: '0.8' | |
| PERFETTO_RELAY_URL: https://semianalysisai.github.io/InferenceX-trace-storage | |
| PYTHONDONTWRITEBYTECODE: '1' | |
| PYTHONPYCACHEPREFIX: /tmp/inferencex-pycache | |
| jobs: | |
| get-jobs: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| filtered-matrix: ${{ steps.filter.outputs.filtered }} | |
| count: ${{ steps.filter.outputs.count }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| ref: ${{ inputs.ref || github.sha }} | |
| - id: gen | |
| name: Generate matrix via script | |
| run: | | |
| pip install pydantic | |
| CLI_ARGS="test-config --config-files ${{ inputs.config-file }} --config-keys ${{ inputs.config-key }} --conc ${{ inputs.conc }}" | |
| CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py $CLI_ARGS) | |
| echo "raw=$CONFIG_JSON" >> $GITHUB_OUTPUT | |
| - id: filter | |
| name: Take first generated job | |
| shell: python | |
| run: | | |
| import json, os, sys | |
| raw = '${{ steps.gen.outputs.raw }}' | |
| try: | |
| data = json.loads(raw) | |
| except Exception as e: | |
| print('Invalid generator output:', e, file=sys.stderr) | |
| with open(os.environ['GITHUB_OUTPUT'], 'a') as f: | |
| f.write("filtered=[]\ncount=0\n") | |
| raise | |
| if not isinstance(data, list): | |
| print('Generator output is not a list.', file=sys.stderr) | |
| with open(os.environ['GITHUB_OUTPUT'], 'a') as f: | |
| f.write("filtered=[]\ncount=0\n") | |
| raise SystemExit(1) | |
| filt = data[:1] | |
| out = json.dumps(filt) | |
| print(out) | |
| with open(os.environ['GITHUB_OUTPUT'], 'a') as f: | |
| f.write(f"filtered={out}\n") | |
| f.write(f"count={len(filt)}\n") | |
| - name: Fail if no matching entries | |
| if: ${{ steps.filter.outputs.count == '0' }} | |
| run: | | |
| echo "No entries produced for config-key=${{ inputs.config-key }}, conc=${{ inputs.conc }}." >&2 | |
| exit 1 | |
| profile: | |
| needs: get-jobs | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| config: ${{ fromJson(needs.get-jobs.outputs.filtered-matrix) }} | |
| runs-on: ${{ matrix.config.runner }} | |
| env: | |
| EXP_NAME: ${{ matrix.config.exp-name }} | |
| MODEL: ${{ matrix.config.model }} | |
| MODEL_PREFIX: ${{ matrix.config.model-prefix }} | |
| ISL: ${{ matrix.config.isl }} | |
| OSL: ${{ matrix.config.osl }} | |
| MAX_MODEL_LEN: ${{ matrix.config.max-model-len }} | |
| IMAGE: ${{ matrix.config.image }} | |
| FRAMEWORK: ${{ matrix.config.framework }} | |
| PRECISION: ${{ matrix.config.precision }} | |
| TP: ${{ matrix.config.tp }} | |
| EP_SIZE: ${{ matrix.config.ep }} | |
| DP_ATTENTION: ${{ matrix.config['dp-attn'] }} | |
| CONC: ${{ toJson(matrix.config.conc) }} | |
| CONC_JSON: ${{ toJson(matrix.config.conc) }} | |
| PREFILL_NUM_WORKERS: ${{ matrix.config.prefill['num-worker'] }} | |
| PREFILL_TP: ${{ matrix.config.prefill.tp }} | |
| PREFILL_EP: ${{ matrix.config.prefill.ep }} | |
| PREFILL_DP_ATTN: ${{ matrix.config.prefill['dp-attn'] }} | |
| PREFILL_ADDITIONAL_SETTINGS_JSON: ${{ toJson(matrix.config.prefill['additional-settings']) }} | |
| DECODE_NUM_WORKERS: ${{ matrix.config.decode['num-worker'] }} | |
| DECODE_TP: ${{ matrix.config.decode.tp }} | |
| DECODE_EP: ${{ matrix.config.decode.ep }} | |
| DECODE_DP_ATTN: ${{ matrix.config.decode['dp-attn'] }} | |
| DECODE_ADDITIONAL_SETTINGS_JSON: ${{ toJson(matrix.config.decode['additional-settings']) }} | |
| SPEC_DECODING: ${{ matrix.config.spec-decoding }} | |
| DISAGG: ${{ matrix.config.disagg }} | |
| MOE_DEBUG: '0' | |
| MOE_DEBUG_LOG: ${{ (inputs.moe-debug) && '/workspace/moe_debug.tp0.log' || '' }} | |
| steps: | |
| - name: Resource cleanup | |
| run: | | |
| # Cleanup Docker resources | |
| if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then | |
| echo "[Docker] Cleaning up resources ..." | |
| docker ps -aq | xargs -r docker rm -f | |
| docker network prune -f | |
| while [ -n "$(docker ps -aq)" ]; do | |
| docker ps -a | |
| sleep 5 | |
| done | |
| fi | |
| # Cleanup SLURM resources | |
| if command -v squeue >/dev/null 2>&1; then | |
| echo "[Slurm] Cleaning up jobs with name: ${{ runner.name }} ..." | |
| scancel --name="${{ runner.name }}" || true | |
| while [ -n "$(squeue --name='${{ runner.name }}' --noheader --format='%i')" ]; do | |
| squeue --name="${{ runner.name }}" | |
| sleep 5 | |
| done | |
| fi | |
| - name: Checkout code | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| fetch-depth: 0 | |
| ref: ${{ inputs.ref || github.sha }} | |
| clean: false | |
| - name: Launch + Profile | |
| id: run | |
| env: | |
| RUNNER_NAME: ${{ runner.name }} | |
| PROFILE: '1' | |
| SGLANG_TORCH_PROFILER_DIR: /workspace/ | |
| VLLM_TORCH_PROFILER_DIR: /workspace/ | |
| VLLM_RPC_TIMEOUT: '1800000' | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| export_additional_settings() { | |
| local settings_json="$1" | |
| python3 - "$settings_json" <<'PY' | |
| import json | |
| import sys | |
| raw = sys.argv[1] | |
| if not raw or raw == "null": | |
| raise SystemExit(0) | |
| for item in json.loads(raw) or []: | |
| print(item) | |
| PY | |
| } | |
| normalize_conc() { | |
| python3 - <<'PY' | |
| import json | |
| import os | |
| raw = os.environ.get("CONC_JSON") or os.environ.get("CONC") or "[]" | |
| try: | |
| value = json.loads(raw) | |
| except json.JSONDecodeError: | |
| value = raw | |
| if isinstance(value, list): | |
| print("x".join(str(v) for v in value)) | |
| else: | |
| print(str(value)) | |
| PY | |
| } | |
| if [ -n "${PREFILL_NUM_WORKERS:-}" ] && [ -n "${DECODE_NUM_WORKERS:-}" ]; then | |
| conc_val="$(normalize_conc)" | |
| res_name="${EXP_NAME}_${PRECISION}_${FRAMEWORK}_prefill-tp${PREFILL_TP}-ep${PREFILL_EP}-dp${PREFILL_DP_ATTN}-nw${PREFILL_NUM_WORKERS}_decode-tp${DECODE_TP}-ep${DECODE_EP}-dp${DECODE_DP_ATTN}-nw${DECODE_NUM_WORKERS}_disagg-${DISAGG}_spec-${SPEC_DECODING}_conc${conc_val}_${RUNNER_NAME}" | |
| echo "IS_MULTINODE=true" >> "$GITHUB_ENV" | |
| echo "PREFILL_GPUS=$((PREFILL_NUM_WORKERS * PREFILL_TP))" >> "$GITHUB_ENV" | |
| echo "DECODE_GPUS=$((DECODE_NUM_WORKERS * DECODE_TP))" >> "$GITHUB_ENV" | |
| while IFS= read -r setting; do | |
| if [ -n "$setting" ]; then | |
| export "$setting" | |
| fi | |
| done < <(export_additional_settings "${PREFILL_ADDITIONAL_SETTINGS_JSON:-null}") | |
| while IFS= read -r setting; do | |
| if [ -n "$setting" ]; then | |
| export "$setting" | |
| fi | |
| done < <(export_additional_settings "${DECODE_ADDITIONAL_SETTINGS_JSON:-null}") | |
| else | |
| ep_val="${EP_SIZE:-1}" | |
| res_name="${EXP_NAME}_${PRECISION}_${FRAMEWORK}_tp${TP}_ep${ep_val}_dpa_${DP_ATTENTION}_conc${CONC}_${RUNNER_NAME}" | |
| fi | |
| export RESULT_FILENAME="${res_name}" | |
| echo "RESULT_FILENAME=${res_name}" >> "$GITHUB_ENV" | |
| echo "Removing stale profile artifacts from previous runs" | |
| rm -rf LOGS | |
| rm -f profile_*.trace.json.gz multinode_server_logs.tar.gz | |
| bash ./runners/launch_${RUNNER_NAME%%_*}.sh | |
| if [ ! -f "${res_name}.json" ]; then | |
| result_candidate="$(find . -maxdepth 1 -type f -name "${res_name}_*.json" | sort | head -n1 || true)" | |
| if [ -n "$result_candidate" ] && [ -f "$result_candidate" ]; then | |
| cp "$result_candidate" "${res_name}.json" | |
| else | |
| echo "Run failed: Benchmark result ${res_name}.json not found." >&2 | |
| exit 1 | |
| fi | |
| fi | |
| trace_path="profile_${res_name}.trace.json.gz" | |
| if [ ! -f "$trace_path" ] && [ -d LOGS ]; then | |
| trace_candidate="$(python3 - <<'PY' | |
| from pathlib import Path | |
| root = Path("LOGS") | |
| def is_trace_candidate(path: Path) -> bool: | |
| name = path.name | |
| if name.startswith("results_") or "profile_export" in name: | |
| return False | |
| if name.endswith((".trace.json", ".trace.json.gz", ".pt.trace.json", ".pt.trace.json.gz")): | |
| return True | |
| return "trace" in name and name.endswith((".json", ".json.gz")) | |
| candidates = [p for p in root.rglob("*") if p.is_file() and is_trace_candidate(p)] | |
| if candidates: | |
| print(max(candidates, key=lambda p: (p.stat().st_mtime_ns, p.stat().st_size))) | |
| PY | |
| )" | |
| if [ -n "$trace_candidate" ] && [ -f "$trace_candidate" ]; then | |
| echo "Selected profile trace candidate: $trace_candidate" | |
| if [[ "$trace_candidate" == *.gz ]]; then | |
| cp "$trace_candidate" "$trace_path" | |
| else | |
| gzip -c "$trace_candidate" > "$trace_path" | |
| fi | |
| fi | |
| fi | |
| if [ -f "$trace_path" ]; then | |
| echo "Profile trace prepared: $trace_path" | |
| ls -lh "$trace_path" | |
| sha256sum "$trace_path" | |
| python3 - "$trace_path" <<'PY' | |
| import gzip | |
| import os | |
| import re | |
| import sys | |
| trace_path = sys.argv[1] | |
| expected = set() | |
| worker_gpus = [] | |
| for workers_key, tp_key in ( | |
| ("PREFILL_NUM_WORKERS", "PREFILL_TP"), | |
| ("DECODE_NUM_WORKERS", "DECODE_TP"), | |
| ): | |
| workers = os.environ.get(workers_key) | |
| tp = os.environ.get(tp_key) | |
| if workers and workers.isdigit() and tp and tp.isdigit(): | |
| gpus = int(workers) * int(tp) | |
| if gpus: | |
| expected.add(gpus) | |
| worker_gpus.append(gpus) | |
| if len(worker_gpus) > 1: | |
| expected.add(sum(worker_gpus)) | |
| opener = gzip.open if trace_path.endswith(".gz") else open | |
| with opener(trace_path, "rt", errors="replace") as f: | |
| prefix = f.read(1024 * 1024) | |
| if '"traceEvents"' not in prefix: | |
| raise SystemExit(f"{trace_path} does not look like a Perfetto trace: traceEvents key not found near start") | |
| match = re.search(r'"world_size"\s*:\s*(\d+)', prefix) | |
| if expected and match: | |
| world_size = int(match.group(1)) | |
| if world_size not in expected: | |
| allowed = ", ".join(str(v) for v in sorted(expected)) | |
| raise SystemExit( | |
| f"{trace_path} has distributed world_size={world_size}, expected one of: {allowed}" | |
| ) | |
| PY | |
| echo "trace=$trace_path" >> "$GITHUB_OUTPUT" | |
| if [ "${FRAMEWORK}" = "sglang" ]; then | |
| # Try to locate corresponding TP-0 traces produced by SGLang profiler | |
| merged_latest=$(ls -t profiles/merged-*.trace.json.gz 2>/dev/null | head -n1 || true) | |
| if [ -n "${merged_latest}" ] && [ -f "${merged_latest}" ]; then | |
| ts_name="${merged_latest##*/}" | |
| ts_name="${ts_name#merged-}" | |
| ts_name="${ts_name%.trace.json.gz}" | |
| tp0_decode="profiles/${ts_name}-TP-0-DECODE.trace.json.gz" | |
| tp0_extend="profiles/${ts_name}-TP-0-EXTEND.trace.json.gz" | |
| if [ -f "${tp0_decode}" ]; then | |
| echo "tp0_decode=${tp0_decode}" >> "$GITHUB_OUTPUT" | |
| fi | |
| if [ -f "${tp0_extend}" ]; then | |
| echo "tp0_extend=${tp0_extend}" >> "$GITHUB_OUTPUT" | |
| fi | |
| fi | |
| fi | |
| else | |
| echo "Profile trace not found: $trace_path" >&2 | |
| if [ -d LOGS ]; then | |
| echo "LOGS profile candidates:" >&2 | |
| find LOGS -maxdepth 8 -type f \( -path "*/profiles/*" -o -name "*trace*" -o -name "*profile*" \) -printf "%p %s bytes\n" 2>/dev/null | sort >&2 || true | |
| fi | |
| exit 1 | |
| fi | |
| - name: Process result (json -> agg) | |
| env: | |
| RUNNER_TYPE: ${{ matrix.config.runner }} | |
| run: | | |
| python3 utils/process_result.py | |
| - name: Upload profile as artifact | |
| if: ${{ steps.run.outputs.trace != '' }} | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: profile_${{ env.RESULT_FILENAME }} | |
| path: ${{ steps.run.outputs.trace }} | |
| if-no-files-found: ignore | |
| - name: Upload TP-0-DECODE trace as artifact | |
| if: ${{ steps.run.outputs.tp0_decode != '' }} | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: profile_${{ env.RESULT_FILENAME }}_TP0_DECODE | |
| path: ${{ steps.run.outputs.tp0_decode }} | |
| if-no-files-found: ignore | |
| - name: Upload TP-0-EXTEND trace as artifact | |
| if: ${{ steps.run.outputs.tp0_extend != '' }} | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: profile_${{ env.RESULT_FILENAME }}_TP0_EXTEND | |
| path: ${{ steps.run.outputs.tp0_extend }} | |
| if-no-files-found: ignore | |
| - name: Upload MoE debug log as artifact | |
| if: ${{ env.MOE_DEBUG == '1' }} | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 | |
| with: | |
| name: moe_debug_${{ env.RESULT_FILENAME }} | |
| path: "moe_debug.tp0.log" | |
| if-no-files-found: ignore | |
| - name: Checkout storage repo | |
| if: ${{ steps.run.outputs.trace != '' }} | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 | |
| with: | |
| repository: SemiAnalysisAI/InferenceX-trace-storage | |
| path: storage | |
| ref: master | |
| token: ${{ secrets.REPO_PAT }} | |
| fetch-depth: 0 | |
| - name: Push profile to storage repo | |
| if: ${{ steps.run.outputs.trace != '' }} | |
| id: push | |
| env: | |
| TRACE_LOCAL: ${{ steps.run.outputs.trace }} | |
| REPO_PAT: ${{ secrets.REPO_PAT }} | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| dest_dir="storage/profiles/${GITHUB_SHA}/${{ matrix.config.runner }}/${{ matrix.config.framework }}/${RESULT_FILENAME}" | |
| mkdir -p "$dest_dir" | |
| cp "$TRACE_LOCAL" "$dest_dir/trace.json.gz" | |
| pushd storage >/dev/null | |
| git config user.name "github-actions" | |
| git config user.email "github-actions@github.com" | |
| git remote set-url origin "https://x-access-token:${REPO_PAT}@github.com/SemiAnalysisAI/InferenceX-trace-storage.git" | |
| git add -A | |
| git commit -m "Add profile: ${GITHUB_SHA} ${RESULT_FILENAME}" || echo "Nothing to commit" | |
| git push origin HEAD:master | |
| STORAGE_SHA="$(git rev-parse HEAD)" | |
| popd >/dev/null | |
| export RAW_URL="https://raw.githubusercontent.com/SemiAnalysisAI/InferenceX-trace-storage/${STORAGE_SHA}/profiles/${GITHUB_SHA}/${{ matrix.config.runner }}/${{ matrix.config.framework }}/${RESULT_FILENAME}/trace.json.gz" | |
| export TITLE="${RESULT_FILENAME}" | |
| enc_src="$(python3 -c 'import os,urllib.parse; print(urllib.parse.quote(os.environ["RAW_URL"], safe=""))')" | |
| enc_title="$(python3 -c 'import os,urllib.parse; print(urllib.parse.quote(os.environ["TITLE"], safe=""))')" | |
| relay="${PERFETTO_RELAY_URL%/}" | |
| RELAY_URL="${relay}/?src=${enc_src}&title=${enc_title}" | |
| echo "raw_url=$RAW_URL" >> "$GITHUB_OUTPUT" | |
| echo "relay_url=$RELAY_URL" >> "$GITHUB_OUTPUT" | |
| - name: Print Perfetto link (relay) | |
| if: ${{ steps.push.outputs.relay_url != '' }} | |
| env: | |
| RELAY_URL: ${{ steps.push.outputs.relay_url }} | |
| RAW_URL: ${{ steps.push.outputs.raw_url }} | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| echo "RAW trace URL: $RAW_URL" | |
| echo "Perfetto Relay URL: $RELAY_URL" | |
| printf "\n**Perfetto (Relay):** %s\n" "$RELAY_URL" >> "$GITHUB_STEP_SUMMARY" |