SemiAnalysisAI
diff --git a/‎.github/workflows/benchmark-tmpl.yml‎
Lines changed: 1 addition & 2 deletions b/‎.github/workflows/benchmark-tmpl.yml‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎.github/workflows/collect-evals.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/collect-evals.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/drain-b200-nvd2.yml‎
Lines changed: 0 additions & 32 deletions b/‎.github/workflows/drain-b200-nvd2.yml‎
Lines changed: 0 additions & 32 deletions
diff --git a/‎.github/workflows/full-sweep-1k1k-scheduler.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/full-sweep-1k1k-scheduler.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/full-sweep-1k8k-scheduler.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/full-sweep-1k8k-scheduler.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/full-sweep-8k1k-scheduler.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/full-sweep-8k1k-scheduler.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/full-sweep-test.yml‎
Lines changed: 6 additions & 6 deletions b/‎.github/workflows/full-sweep-test.yml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎benchmarks/benchmark_lib.sh‎
Lines changed: 25 additions & 26 deletions b/‎benchmarks/benchmark_lib.sh‎
Lines changed: 25 additions & 26 deletions
diff --git a/‎benchmarks/gptoss_fp4_b200_trt_slurm.sh‎
Lines changed: 7 additions & 0 deletions b/‎benchmarks/gptoss_fp4_b200_trt_slurm.sh‎
Lines changed: 7 additions & 0 deletions
@@ -173,14 +173,13 @@ jobs:
         with:
           name: eval_${{ env.EXP_NAME }}_${{ env.RESULT_FILENAME }}
           path: |
-            SUMMARY.md
             meta_env.json
             results*.json
           if-no-files-found: ignore
 
       - name: Cleanup eval outputs (post-upload)
         if: ${{ env.RUN_EVAL == 'true' }}
         run: |
-          rm -f SUMMARY.md meta_env.json || true
+          rm -f meta_env.json || true
           # Remove any eval results JSONs that were moved into workspace
           rm -f results*.json || true
@@ -29,7 +29,7 @@ jobs:
 
       - name: Summarize evals
         run: |
-          echo "## 📋 Eval Summary - ${{ inputs.exp-name || 'all' }}" >> $GITHUB_STEP_SUMMARY
+          echo "## Eval Summary - ${{ inputs.exp-name || 'all' }}" >> $GITHUB_STEP_SUMMARY
           echo "" >> $GITHUB_STEP_SUMMARY
           python3 utils/collect_eval_results.py eval_results/ ${{ inputs.exp-name || 'all' }} >> $GITHUB_STEP_SUMMARY
 
 
@@ -17,7 +17,7 @@ jobs:
             - id: get-dsr1-configs
               run: |
                   pip install pydantic
-                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix dsr1)
+                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix dsr1 --run-evals)
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
     get-gptoss-configs:
@@ -31,7 +31,7 @@ jobs:
             - id: get-gptoss-configs
               run: |
                   pip install pydantic
-                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix gptoss)
+                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix gptoss --run-evals)
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
     benchmark-dsr1:
 
@@ -17,7 +17,7 @@ jobs:
             - id: get-dsr1-configs
               run: |
                   pip install pydantic
-                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix dsr1)
+                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix dsr1 --run-evals)
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
     get-gptoss-configs:
@@ -31,7 +31,7 @@ jobs:
             - id: get-gptoss-configs
               run: |
                   pip install pydantic
-                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix gptoss)
+                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix gptoss --run-evals)
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
     benchmark-dsr1:
 
@@ -17,7 +17,7 @@ jobs:
             - id: get-dsr1-configs
               run: |
                   pip install pydantic
-                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix dsr1)
+                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix dsr1 --run-evals)
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
     get-gptoss-configs:
@@ -31,7 +31,7 @@ jobs:
             - id: get-gptoss-configs
               run: |
                   pip install pydantic
-                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix gptoss)
+                  CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix gptoss --run-evals)
                   echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
 
     benchmark-dsr1:
 
@@ -63,43 +63,43 @@ jobs:
 
                   # Generate dsr1 configs (only if we have valid runner types for DSR1)
                   if [ "${{ inputs.run_1k1k }}" = "true" ] && [ -n "$DSR1_RUNNER_TYPES" ]; then
-                      DSR1_1K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
+                      DSR1_1K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
                       echo "dsr1-1k1k=$DSR1_1K1K" >> $GITHUB_OUTPUT
                   else
                       echo "dsr1-1k1k=[]" >> $GITHUB_OUTPUT
                   fi
 
                   if [ "${{ inputs.run_1k8k }}" = "true" ] && [ -n "$DSR1_RUNNER_TYPES" ]; then
-                      DSR1_1K8K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
+                      DSR1_1K8K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
                       echo "dsr1-1k8k=$DSR1_1K8K" >> $GITHUB_OUTPUT
                   else
                       echo "dsr1-1k8k=[]" >> $GITHUB_OUTPUT
                   fi
 
                   if [ "${{ inputs.run_8k1k }}" = "true" ] && [ -n "$DSR1_RUNNER_TYPES" ]; then
-                      DSR1_8K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
+                      DSR1_8K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
                       echo "dsr1-8k1k=$DSR1_8K1K" >> $GITHUB_OUTPUT
                   else
                       echo "dsr1-8k1k=[]" >> $GITHUB_OUTPUT
                   fi
 
                   # Generate gptoss configs (only if we have runner types selected)
                   if [ "${{ inputs.run_1k1k }}" = "true" ] && [ -n "$RUNNER_TYPES" ]; then
-                      GPTOSS_1K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
+                      GPTOSS_1K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
                       echo "gptoss-1k1k=$GPTOSS_1K1K" >> $GITHUB_OUTPUT
                   else
                       echo "gptoss-1k1k=[]" >> $GITHUB_OUTPUT
                   fi
 
                   if [ "${{ inputs.run_1k8k }}" = "true" ] && [ -n "$RUNNER_TYPES" ]; then
-                      GPTOSS_1K8K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
+                      GPTOSS_1K8K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
                       echo "gptoss-1k8k=$GPTOSS_1K8K" >> $GITHUB_OUTPUT
                   else
                       echo "gptoss-1k8k=[]" >> $GITHUB_OUTPUT
                   fi
 
                   if [ "${{ inputs.run_8k1k }}" = "true" ] && [ -n "$RUNNER_TYPES" ]; then
-                      GPTOSS_8K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
+                      GPTOSS_8K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
                       echo "gptoss-8k1k=$GPTOSS_8K1K" >> $GITHUB_OUTPUT
                   else
                       echo "gptoss-8k1k=[]" >> $GITHUB_OUTPUT
 
@@ -288,55 +288,54 @@ append_lm_eval_summary() {
     local results_dir="${EVAL_RESULT_DIR}"
     local task="${EVAL_TASK:-gsm8k}"
     local out_dir="${results_dir}"
-    local summary_md="${out_dir}/SUMMARY.md"
     mkdir -p "$out_dir" || true
 
     # Write minimal meta for collectors that expect it
     local meta_json="${out_dir}/meta_env.json"
     local model_name="${MODEL_NAME:-$MODEL}"
     local dp_json="false"
     if [ "${DP_ATTENTION}" = "true" ]; then dp_json="true"; fi
+
+    # Derive framework/precision from env, fallback to parsing RESULT_FILENAME
+    # RESULT_FILENAME format (from workflow):
+    #   <exp_name>_<precision>_<framework>_tp<...>_ep<...>_dpa_<...>_conc<...>_<runner>
+    local fw="${FRAMEWORK:-}"
+    local prec="${PRECISION:-}"
+    if [[ -z "$fw" || -z "$prec" ]]; then
+        if [[ -n "${RESULT_FILENAME}" ]]; then
+            # Extract the two fields immediately before "_tp"
+            # Handles arbitrary underscores in exp_name by matching from the end
+            local parsed
+            parsed=$(echo "${RESULT_FILENAME}" | sed -n 's/.*_\([^_][^_]*\)_\([^_][^_]*\)_tp.*/\1 \2/p')
+            local p1="${parsed%% *}"
+            local p2="${parsed#* }"
+            if [[ -z "$prec" && -n "$p1" && "$p1" != "$parsed" ]]; then
+                prec="$p1"
+            fi
+            if [[ -z "$fw" && -n "$p2" && "$p2" != "$parsed" ]]; then
+                fw="$p2"
+            fi
+        fi
+    fi
     cat > "${meta_json}" <<META
 {
-  "framework": "${FRAMEWORK:-unknown}",
-  "precision": "${PRECISION:-unknown}",
+  "framework": "${fw:-unknown}",
+  "precision": "${prec:-unknown}",
   "tp": ${TP:-1},
   "ep": ${EP_SIZE:-1},
   "dp_attention": ${dp_json},
   "model": "${model_name:-}"
 }
 META
 
-    PYTHONNOUSERSITE=1 PYTHONPATH="" python3 -S utils/lm_eval_to_md.py \
-        --results-dir "$out_dir" \
-        --task "${task}" \
-        --framework "${FRAMEWORK}" \
-        --precision "${PRECISION}" \
-        --tp "${TP:-1}" \
-        --ep "${EP_SIZE:-1}" \
-        --dp-attention "${DP_ATTENTION:-false}" \
-        > "$summary_md" || true
-
-    # If running inside a GitHub Actions step on this same machine, append there too
-    if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then
-        local GH_SUM_DIR
-        GH_SUM_DIR="$(dirname "$GITHUB_STEP_SUMMARY")"
-        if [ -d "$GH_SUM_DIR" ] && [ -w "$GH_SUM_DIR" ]; then
-            cat "$summary_md" >> "$GITHUB_STEP_SUMMARY" || true
-        fi
-    fi
-
     # Move eval artifacts into PWD (no new directories in workspace)
-    if [ -f "${summary_md}" ]; then
-        mv -f "${summary_md}" ./ || true
-    fi
     if [ -f "${meta_json}" ]; then
         mv -f "${meta_json}" ./ || true
     fi
     if [ -d "${out_dir}" ]; then
         while IFS= read -r -d '' jf; do
             base=$(basename "$jf")
-            if [ "$base" != "meta_env.json" ] && [ "$base" != "SUMMARY.md" ]; then
+            if [ "$base" != "meta_env.json" ]; then
                 mv -f "$jf" ./ || true
             fi
         done < <(find "${out_dir}" -type f -name "*.json" -print0 2>/dev/null)
 
@@ -94,3 +94,10 @@ run_benchmark_serving \
     --max-concurrency "$CONC" \
     --result-filename "$RESULT_FILENAME" \
     --result-dir /workspace/
+
+# After throughput, run evaluation only if RUN_EVAL is true
+if [ "${RUN_EVAL}" = "true" ]; then
+    run_eval --framework lm-eval --port "$PORT" --concurrent-requests $(( $CONC * 2 ))
+    append_lm_eval_summary
+fi
+set +x