SemiAnalysisAI
diff --git a/‎.claude/mcp/mcp_utils.py‎
Lines changed: 17 additions & 12 deletions b/‎.claude/mcp/mcp_utils.py‎
Lines changed: 17 additions & 12 deletions
diff --git a/‎.github/workflows/claude.yml‎
Lines changed: 44 additions & 0 deletions b/‎.github/workflows/claude.yml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎.github/workflows/profile.yml‎
Lines changed: 296 additions & 0 deletions b/‎.github/workflows/profile.yml‎
Lines changed: 296 additions & 0 deletions
@@ -163,20 +163,25 @@ def initialize_repo(name: str, url: str, path: Path) -> git.Repo:
         GitPython Repo object
     """
     if path.exists():
-        # Repository exists, update it
+        # Repository exists, try to open and update it
         logger.info(f"Updating {name} repository at {path}")
-        repo = git.Repo(path)
         try:
-            origin = repo.remotes.origin
-            origin.fetch()
-        except Exception as e:
-            logger.warning(f"Failed to fetch updates for {name}: {e}")
-    else:
-        # Clone repository
-        logger.info(f"Cloning {name} from {url}")
-        path.parent.mkdir(parents=True, exist_ok=True)
-        repo = git.Repo.clone_from(url, path)
-
+            repo = git.Repo(path)
+            try:
+                origin = repo.remotes.origin
+                origin.fetch()
+            except Exception as e:
+                logger.warning(f"Failed to fetch updates for {name}: {e}")
+            return repo
+        except git.exc.InvalidGitRepositoryError:
+            logger.warning(f"Corrupt/invalid git repo at {path}, removing and re-cloning")
+            import shutil
+            shutil.rmtree(path)
+
+    # Clone repository
+    logger.info(f"Cloning {name} from {url}")
+    path.parent.mkdir(parents=True, exist_ok=True)
+    repo = git.Repo.clone_from(url, path)
     return repo
 
 
 
@@ -161,6 +161,50 @@ jobs:
             - If jobs cannot be run, say exactly what you could not run and why
             - **Important** Modify perf-changelog.yaml for any config changes affecting performance
 
+            ## Profiling (SGLang only)
+            When asked to profile a config, dispatch the `profile.yml` workflow. **Only SGLang configs can be profiled** — the profiler uses SGLang's `/start_profile` and `/stop_profile` HTTP endpoints. Reject profiling requests for vLLM, TRT, or other frameworks.
+
+            **Syntax:**
+            ```
+            mcp__github__run_workflow(
+                owner="SemiAnalysisAI",
+                repo="InferenceX",
+                workflow_id="profile.yml",
+                ref="main",
+                inputs={
+                    "config-key": "<config-key-ending-in-sglang>",
+                    "config-file": "<.github/configs/nvidia-master.yaml or amd-master.yaml>",
+                    "conc": "<concurrency>"
+                }
+            )
+            ```
+
+            **How to map a natural-language request to inputs:**
+            The user will say something like "profile sglang b200 deepseek fp4 conc=4". Parse it as:
+            - Model: "deepseek" / "dsr1" → model-prefix `dsr1`; "gptoss" → `gptoss`; "qwen" → `qwen3.5`
+            - Precision: "fp4" / "fp8" / "bf16"
+            - Runner/hardware: "b200", "h200", "h100", "mi300x", "mi325x", "mi355x", etc.
+            - Framework: must be "sglang" (reject if not)
+            - Concurrency: "conc=N" → `"conc": "N"`. Default to `"64"` if not specified.
+
+            Construct the config-key as: `{model-prefix}-{precision}-{runner}-sglang`
+            Choose config-file: NVIDIA runners (b200, h200, h100, gb200, gb300) → `nvidia-master.yaml`; AMD runners (mi300x, mi325x, mi355x) → `amd-master.yaml`
+
+            **Available SGLang config keys:**
+            NVIDIA: `dsr1-fp4-b200-sglang`, `dsr1-fp8-b200-sglang`, `dsr1-fp8-h200-sglang`, `qwen3.5-bf16-b200-sglang`
+            AMD: `dsr1-fp4-mi355x-sglang`, `dsr1-fp8-mi300x-sglang`, `dsr1-fp8-mi325x-sglang`, `dsr1-fp8-mi355x-sglang`, `qwen3.5-bf16-mi355x-sglang`, `qwen3.5-fp8-mi355x-sglang`
+
+            **Examples:**
+            - "profile sglang b200 deepseek fp4 conc=4" → `config-key: dsr1-fp4-b200-sglang`, `config-file: .github/configs/nvidia-master.yaml`, `conc: 4`
+            - "profile sglang mi355x dsr1 fp8" → `config-key: dsr1-fp8-mi355x-sglang`, `config-file: .github/configs/amd-master.yaml`, `conc: 64`
+
+            **After dispatch:**
+            Monitor with `mcp__github__get_workflow_run`. The profile workflow takes ~15-30 minutes. When complete, the **Perfetto relay link** is in the workflow run's step summary. Retrieve it with:
+            ```bash
+            gh run view <RUN_ID> --repo SemiAnalysisAI/InferenceX --log | grep "Perfetto Relay URL:"
+            ```
+            Post the Perfetto relay link back to the user in the comment.
+
             ## vLLM and SGLang Source Code Access
 
             You have access to vLLM and SGLang source code via the inferencemax-repos MCP server:
 
@@ -0,0 +1,296 @@
+name: Profile
+
+on:
+  workflow_dispatch:
+    inputs:
+      config-key:
+        description: "Config key from config yaml"
+        required: true
+        type: string
+      config-file:
+        description: "Config file to use"
+        required: false
+        type: string
+        default: '.github/configs/nvidia-master.yaml'
+      conc:
+        description: "Concurrency value (must exist in config's conc-range/list)"
+        required: false
+        type: string
+        default: '64'
+      moe-debug:
+        description: "Enable MoE debug patch and log (MOE_DEBUG_LOG)"
+        required: false
+        type: boolean
+        default: false
+      ref:
+        description: "Ref (branch/sha) to checkout"
+        required: false
+        type: string
+
+permissions:
+  contents: read
+
+env:
+  HF_TOKEN: ${{ secrets.HF_TOKEN }}
+  HF_HUB_CACHE: '/mnt/hf_hub_cache/'
+  RANDOM_RANGE_RATIO: '0.8'
+  PERFETTO_RELAY_URL: https://semianalysisai.github.io/InferenceX-trace-storage
+
+jobs:
+  get-jobs:
+    runs-on: ubuntu-latest
+    outputs:
+      filtered-matrix: ${{ steps.filter.outputs.filtered }}
+      count: ${{ steps.filter.outputs.count }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
+        with:
+          ref: ${{ inputs.ref || github.ref }}
+
+      - id: gen
+        name: Generate matrix via script
+        run: |
+          pip install pydantic
+          CLI_ARGS="test-config --config-files ${{ inputs.config-file }} --config-keys ${{ inputs.config-key }} --conc ${{ inputs.conc }}"
+          CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix_logic/generate_sweep_configs.py $CLI_ARGS)
+          echo "raw=$CONFIG_JSON" >> $GITHUB_OUTPUT
+
+      - id: filter
+        name: Take first generated job
+        shell: python
+        run: |
+          import json, os, sys
+          raw = '${{ steps.gen.outputs.raw }}'
+          try:
+            data = json.loads(raw)
+          except Exception as e:
+            print('Invalid generator output:', e, file=sys.stderr)
+            with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
+              f.write("filtered=[]\ncount=0\n")
+            raise
+
+          if not isinstance(data, list):
+            print('Generator output is not a list.', file=sys.stderr)
+            with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
+              f.write("filtered=[]\ncount=0\n")
+            raise SystemExit(1)
+
+          filt = data[:1]
+
+          out = json.dumps(filt)
+          print(out)
+          with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
+            f.write(f"filtered={out}\n")
+            f.write(f"count={len(filt)}\n")
+
+      - name: Fail if no matching entries
+        if: ${{ steps.filter.outputs.count == '0' }}
+        run: |
+          echo "No entries produced for config-key=${{ inputs.config-key }}, seq-lens=${{ inputs.seq-lens }}, conc=${{ inputs.conc }}." >&2
+          exit 1
+
+  profile:
+    needs: get-jobs
+    strategy:
+      fail-fast: false
+      matrix:
+        config: ${{ fromJson(needs.get-jobs.outputs.filtered-matrix) }}
+    runs-on: ${{ matrix.config.runner }}
+    env:
+      EXP_NAME: ${{ matrix.config.exp-name }}
+      MODEL: ${{ matrix.config.model }}
+      MODEL_PREFIX: ${{ matrix.config.model-prefix }}
+      ISL: ${{ matrix.config.isl }}
+      OSL: ${{ matrix.config.osl }}
+      MAX_MODEL_LEN: ${{ matrix.config.max-model-len }}
+      IMAGE: ${{ matrix.config.image }}
+      FRAMEWORK: ${{ matrix.config.framework }}
+      PRECISION: ${{ matrix.config.precision }}
+      TP: ${{ matrix.config.tp }}
+      EP_SIZE: ${{ matrix.config.ep }}
+      DP_ATTENTION: ${{ matrix.config['dp-attn'] }}
+      CONC: ${{ matrix.config.conc }}
+      SPEC_DECODING: ${{ matrix.config.spec-decoding }}
+      DISAGG: ${{ matrix.config.disagg }}
+      MOE_DEBUG: '0'
+      MOE_DEBUG_LOG: ${{ (inputs.moe-debug) && '/workspace/moe_debug.tp0.log' || '' }}
+    steps:
+      - name: Resource cleanup
+        run: |
+          # Cleanup Docker resources
+          if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
+            echo "[Docker] Cleaning up resources ..."
+            docker ps -aq | xargs -r docker rm -f
+            docker network prune -f
+            while [ -n "$(docker ps -aq)" ]; do
+              docker ps -a
+              sleep 5
+            done
+          fi
+
+          # Cleanup SLURM resources
+          if command -v squeue >/dev/null 2>&1; then
+            if [[ "${{ runner.name }}" == mi355x-amds* || "${{ runner.name }}" == gb200-nv* || "${{ runner.name }}" == gb300-nv* || "${{ runner.name }}" == h100-dgxc-slurm* || "${{ runner.name }}" == h200-dgxc-slurm* || "${{ runner.name }}" == b200-dgxc-slurm* ]]; then
+              echo "[Slurm] Cleaning up jobs with name: ${{ runner.name }} ..."
+              scancel --name="${{ runner.name }}" || true
+              while [ -n "$(squeue --name='${{ runner.name }}' --noheader --format='%i')" ]; do
+                squeue --name="${{ runner.name }}"
+                sleep 5
+              done
+            else
+              echo "[Slurm] Cleaning up jobs for user: $USER ..."
+              scancel -u "$USER" || true
+              while [ -n "$(squeue -u "$USER" --noheader --format='%i')" ]; do
+                squeue -u "$USER"
+                sleep 5
+              done
+            fi
+          fi
+
+      - name: Checkout code
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
+        with:
+          fetch-depth: 0
+          ref: ${{ inputs.ref || github.ref }}
+
+      - name: Launch + Profile (single-node sglang/vllm)
+        id: run
+        env:
+          RUNNER_NAME: ${{ runner.name }}
+          PROFILE: '1'
+          SGLANG_TORCH_PROFILER_DIR: /workspace/
+          VLLM_TORCH_PROFILER_DIR: /workspace/
+          VLLM_RPC_TIMEOUT: '1800000'
+        shell: bash
+        run: |
+          set -euo pipefail
+          ep_val="${EP_SIZE:-1}"
+          res_name="${EXP_NAME}_${PRECISION}_${FRAMEWORK}_tp${TP}_ep${ep_val}_dpa_${DP_ATTENTION}_conc${CONC}_${RUNNER_NAME}"
+          export RESULT_FILENAME="${res_name}"
+          echo "RESULT_FILENAME=${res_name}" >> "$GITHUB_ENV"
+
+          bash ./runners/launch_${RUNNER_NAME%%_*}.sh
+
+          if [ ! -f "${res_name}.json" ]; then
+            echo "Run failed: Benchmark result ${res_name}.json not found." >&2
+            exit 1
+          fi
+
+          trace_path="profile_${res_name}.trace.json.gz"
+          if [ -f "$trace_path" ]; then
+            echo "trace=$trace_path" >> "$GITHUB_OUTPUT"
+            if [ "${FRAMEWORK}" = "sglang" ]; then
+              # Try to locate corresponding TP-0 traces produced by SGLang profiler
+              merged_latest=$(ls -t profiles/merged-*.trace.json.gz 2>/dev/null | head -n1 || true)
+              if [ -n "${merged_latest}" ] && [ -f "${merged_latest}" ]; then
+                ts_name="${merged_latest##*/}"
+                ts_name="${ts_name#merged-}"
+                ts_name="${ts_name%.trace.json.gz}"
+                tp0_decode="profiles/${ts_name}-TP-0-DECODE.trace.json.gz"
+                tp0_extend="profiles/${ts_name}-TP-0-EXTEND.trace.json.gz"
+                if [ -f "${tp0_decode}" ]; then
+                  echo "tp0_decode=${tp0_decode}" >> "$GITHUB_OUTPUT"
+                fi
+                if [ -f "${tp0_extend}" ]; then
+                  echo "tp0_extend=${tp0_extend}" >> "$GITHUB_OUTPUT"
+                fi
+              fi
+            fi
+          else
+            echo "Profile trace not found: $trace_path" >&2
+          fi
+
+      - name: Process result (json -> agg)
+        env:
+          RUNNER_TYPE: ${{ matrix.config.runner }}
+        run: |
+          python3 utils/process_result.py
+
+      - name: Upload profile as artifact
+        if: ${{ steps.run.outputs.trace != '' }}
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+        with:
+          name: profile_${{ env.RESULT_FILENAME }}
+          path: profile_${{ env.RESULT_FILENAME }}.trace.json.gz
+          if-no-files-found: ignore
+
+      - name: Upload TP-0-DECODE trace as artifact
+        if: ${{ steps.run.outputs.tp0_decode != '' }}
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+        with:
+          name: profile_${{ env.RESULT_FILENAME }}_TP0_DECODE
+          path: ${{ steps.run.outputs.tp0_decode }}
+          if-no-files-found: ignore
+
+      - name: Upload TP-0-EXTEND trace as artifact
+        if: ${{ steps.run.outputs.tp0_extend != '' }}
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+        with:
+          name: profile_${{ env.RESULT_FILENAME }}_TP0_EXTEND
+          path: ${{ steps.run.outputs.tp0_extend }}
+          if-no-files-found: ignore
+
+      - name: Upload MoE debug log as artifact
+        if: ${{ env.MOE_DEBUG == '1' }}
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+        with:
+          name: moe_debug_${{ env.RESULT_FILENAME }}
+          path: "moe_debug.tp0.log"
+          if-no-files-found: ignore
+
+      - name: Checkout storage repo
+        if: ${{ steps.run.outputs.trace != '' }}
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
+        with:
+          repository: SemiAnalysisAI/InferenceX-trace-storage
+          path: storage
+          ref: master
+          ssh-key: ${{ secrets.PROFILER_STORAGE_DEPLOY_KEY }}
+          fetch-depth: 0
+
+      - name: Push profile to storage repo
+        if: ${{ steps.run.outputs.trace != '' }}
+        id: push
+        env:
+          TRACE_LOCAL: ${{ steps.run.outputs.trace }}
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          dest_dir="storage/profiles/${GITHUB_SHA}/${{ matrix.config.runner }}/${{ matrix.config.framework }}/${{ matrix.config['exp-name'] }}_${{ matrix.config.precision }}_tp${{ matrix.config.tp }}_ep${{ matrix.config.ep || 1 }}_conc${{ matrix.config.conc }}"
+          mkdir -p "$dest_dir"
+          cp "$TRACE_LOCAL" "$dest_dir/trace.json.gz"
+
+          pushd storage >/dev/null
+          git config user.name "github-actions"
+          git config user.email "github-actions@github.com"
+          git add -A
+          git commit -m "Add profile: ${GITHUB_SHA} ${{ matrix.config['exp-name'] }} tp${{ matrix.config.tp }} ep${{ matrix.config.ep || 1 }} conc${{ matrix.config.conc }}" || echo "Nothing to commit"
+          git push
+          STORAGE_SHA="$(git rev-parse HEAD)"
+          popd >/dev/null
+
+          export RAW_URL="https://raw.githubusercontent.com/SemiAnalysisAI/InferenceX-trace-storage/${STORAGE_SHA}/profiles/${GITHUB_SHA}/${{ matrix.config.runner }}/${{ matrix.config.framework }}/${{ matrix.config['exp-name'] }}_${{ matrix.config.precision }}_tp${{ matrix.config.tp }}_ep${{ matrix.config.ep || 1 }}_conc${{ matrix.config.conc }}/trace.json.gz"
+          export TITLE="${{ matrix.config['exp-name'] }}_${{ matrix.config.precision }}_tp${{ matrix.config.tp }}_ep${{ matrix.config.ep || 1 }}_conc${{ matrix.config.conc }}"
+
+          enc_src="$(python3 -c 'import os,urllib.parse; print(urllib.parse.quote(os.environ["RAW_URL"], safe=""))')"
+          enc_title="$(python3 -c 'import os,urllib.parse; print(urllib.parse.quote(os.environ["TITLE"], safe=""))')"
+
+          relay="${PERFETTO_RELAY_URL%/}"
+          RELAY_URL="${relay}/?src=${enc_src}&title=${enc_title}"
+
+          echo "raw_url=$RAW_URL" >> "$GITHUB_OUTPUT"
+          echo "relay_url=$RELAY_URL" >> "$GITHUB_OUTPUT"
+
+      - name: Print Perfetto link (relay)
+        if: ${{ steps.push.outputs.relay_url != '' }}
+        env:
+          RELAY_URL: ${{ steps.push.outputs.relay_url }}
+          RAW_URL: ${{ steps.push.outputs.raw_url }}
+        shell: bash
+        run: |
+          set -euo pipefail
+          echo "RAW trace URL: $RAW_URL"
+          echo "Perfetto Relay URL: $RELAY_URL"
+          printf "\n**Perfetto (Relay):** %s\n" "$RELAY_URL" >> "$GITHUB_STEP_SUMMARY"