Skip to content

Commit 544e698

Browse files
committed
Resolve issues
1 parent 1390c52 commit 544e698

14 files changed

Lines changed: 158 additions & 389 deletions

.github/workflows/benchmark-tmpl.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,14 +173,13 @@ jobs:
173173
with:
174174
name: eval_${{ env.EXP_NAME }}_${{ env.RESULT_FILENAME }}
175175
path: |
176-
SUMMARY.md
177176
meta_env.json
178177
results*.json
179178
if-no-files-found: ignore
180179

181180
- name: Cleanup eval outputs (post-upload)
182181
if: ${{ env.RUN_EVAL == 'true' }}
183182
run: |
184-
rm -f SUMMARY.md meta_env.json || true
183+
rm -f meta_env.json || true
185184
# Remove any eval results JSONs that were moved into workspace
186185
rm -f results*.json || true

.github/workflows/collect-evals.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ jobs:
2929

3030
- name: Summarize evals
3131
run: |
32-
echo "## 📋 Eval Summary - ${{ inputs.exp-name || 'all' }}" >> $GITHUB_STEP_SUMMARY
32+
echo "## Eval Summary - ${{ inputs.exp-name || 'all' }}" >> $GITHUB_STEP_SUMMARY
3333
echo "" >> $GITHUB_STEP_SUMMARY
3434
python3 utils/collect_eval_results.py eval_results/ ${{ inputs.exp-name || 'all' }} >> $GITHUB_STEP_SUMMARY
3535

.github/workflows/drain-b200-nvd2.yml

Lines changed: 0 additions & 32 deletions
This file was deleted.

.github/workflows/full-sweep-1k1k-scheduler.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
- id: get-dsr1-configs
1818
run: |
1919
pip install pydantic
20-
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix dsr1)
20+
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix dsr1 --run-evals)
2121
echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
2222
2323
get-gptoss-configs:
@@ -31,7 +31,7 @@ jobs:
3131
- id: get-gptoss-configs
3232
run: |
3333
pip install pydantic
34-
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix gptoss)
34+
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix gptoss --run-evals)
3535
echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
3636
3737
benchmark-dsr1:

.github/workflows/full-sweep-1k8k-scheduler.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
- id: get-dsr1-configs
1818
run: |
1919
pip install pydantic
20-
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix dsr1)
20+
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix dsr1 --run-evals)
2121
echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
2222
2323
get-gptoss-configs:
@@ -31,7 +31,7 @@ jobs:
3131
- id: get-gptoss-configs
3232
run: |
3333
pip install pydantic
34-
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix gptoss)
34+
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix gptoss --run-evals)
3535
echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
3636
3737
benchmark-dsr1:

.github/workflows/full-sweep-8k1k-scheduler.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
- id: get-dsr1-configs
1818
run: |
1919
pip install pydantic
20-
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix dsr1)
20+
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix dsr1 --run-evals)
2121
echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
2222
2323
get-gptoss-configs:
@@ -31,7 +31,7 @@ jobs:
3131
- id: get-gptoss-configs
3232
run: |
3333
pip install pydantic
34-
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix gptoss)
34+
CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix gptoss --run-evals)
3535
echo "search-space-config=$CONFIG_JSON" >> $GITHUB_OUTPUT
3636
3737
benchmark-dsr1:

.github/workflows/full-sweep-test.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,43 +63,43 @@ jobs:
6363
6464
# Generate dsr1 configs (only if we have valid runner types for DSR1)
6565
if [ "${{ inputs.run_1k1k }}" = "true" ] && [ -n "$DSR1_RUNNER_TYPES" ]; then
66-
DSR1_1K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
66+
DSR1_1K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
6767
echo "dsr1-1k1k=$DSR1_1K1K" >> $GITHUB_OUTPUT
6868
else
6969
echo "dsr1-1k1k=[]" >> $GITHUB_OUTPUT
7070
fi
7171
7272
if [ "${{ inputs.run_1k8k }}" = "true" ] && [ -n "$DSR1_RUNNER_TYPES" ]; then
73-
DSR1_1K8K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
73+
DSR1_1K8K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
7474
echo "dsr1-1k8k=$DSR1_1K8K" >> $GITHUB_OUTPUT
7575
else
7676
echo "dsr1-1k8k=[]" >> $GITHUB_OUTPUT
7777
fi
7878
7979
if [ "${{ inputs.run_8k1k }}" = "true" ] && [ -n "$DSR1_RUNNER_TYPES" ]; then
80-
DSR1_8K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
80+
DSR1_8K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix dsr1 --runner-type $DSR1_RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
8181
echo "dsr1-8k1k=$DSR1_8K1K" >> $GITHUB_OUTPUT
8282
else
8383
echo "dsr1-8k1k=[]" >> $GITHUB_OUTPUT
8484
fi
8585
8686
# Generate gptoss configs (only if we have runner types selected)
8787
if [ "${{ inputs.run_1k1k }}" = "true" ] && [ -n "$RUNNER_TYPES" ]; then
88-
GPTOSS_1K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
88+
GPTOSS_1K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k1k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
8989
echo "gptoss-1k1k=$GPTOSS_1K1K" >> $GITHUB_OUTPUT
9090
else
9191
echo "gptoss-1k1k=[]" >> $GITHUB_OUTPUT
9292
fi
9393
9494
if [ "${{ inputs.run_1k8k }}" = "true" ] && [ -n "$RUNNER_TYPES" ]; then
95-
GPTOSS_1K8K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
95+
GPTOSS_1K8K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 1k8k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
9696
echo "gptoss-1k8k=$GPTOSS_1K8K" >> $GITHUB_OUTPUT
9797
else
9898
echo "gptoss-1k8k=[]" >> $GITHUB_OUTPUT
9999
fi
100100
101101
if [ "${{ inputs.run_8k1k }}" = "true" ] && [ -n "$RUNNER_TYPES" ]; then
102-
GPTOSS_8K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml)
102+
GPTOSS_8K1K=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py full-sweep --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml --seq-lens 8k1k --model-prefix gptoss --runner-type $RUNNER_TYPES --runner-config ${GITHUB_WORKSPACE}/.github/configs/runners.yaml --run-evals)
103103
echo "gptoss-8k1k=$GPTOSS_8K1K" >> $GITHUB_OUTPUT
104104
else
105105
echo "gptoss-8k1k=[]" >> $GITHUB_OUTPUT

benchmarks/benchmark_lib.sh

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -288,55 +288,54 @@ append_lm_eval_summary() {
288288
local results_dir="${EVAL_RESULT_DIR}"
289289
local task="${EVAL_TASK:-gsm8k}"
290290
local out_dir="${results_dir}"
291-
local summary_md="${out_dir}/SUMMARY.md"
292291
mkdir -p "$out_dir" || true
293292

294293
# Write minimal meta for collectors that expect it
295294
local meta_json="${out_dir}/meta_env.json"
296295
local model_name="${MODEL_NAME:-$MODEL}"
297296
local dp_json="false"
298297
if [ "${DP_ATTENTION}" = "true" ]; then dp_json="true"; fi
298+
299+
# Derive framework/precision from env, fallback to parsing RESULT_FILENAME
300+
# RESULT_FILENAME format (from workflow):
301+
# <exp_name>_<precision>_<framework>_tp<...>_ep<...>_dpa_<...>_conc<...>_<runner>
302+
local fw="${FRAMEWORK:-}"
303+
local prec="${PRECISION:-}"
304+
if [[ -z "$fw" || -z "$prec" ]]; then
305+
if [[ -n "${RESULT_FILENAME}" ]]; then
306+
# Extract the two fields immediately before "_tp"
307+
# Handles arbitrary underscores in exp_name by matching from the end
308+
local parsed
309+
parsed=$(echo "${RESULT_FILENAME}" | sed -n 's/.*_\([^_][^_]*\)_\([^_][^_]*\)_tp.*/\1 \2/p')
310+
local p1="${parsed%% *}"
311+
local p2="${parsed#* }"
312+
if [[ -z "$prec" && -n "$p1" && "$p1" != "$parsed" ]]; then
313+
prec="$p1"
314+
fi
315+
if [[ -z "$fw" && -n "$p2" && "$p2" != "$parsed" ]]; then
316+
fw="$p2"
317+
fi
318+
fi
319+
fi
299320
cat > "${meta_json}" <<META
300321
{
301-
"framework": "${FRAMEWORK:-unknown}",
302-
"precision": "${PRECISION:-unknown}",
322+
"framework": "${fw:-unknown}",
323+
"precision": "${prec:-unknown}",
303324
"tp": ${TP:-1},
304325
"ep": ${EP_SIZE:-1},
305326
"dp_attention": ${dp_json},
306327
"model": "${model_name:-}"
307328
}
308329
META
309330

310-
PYTHONNOUSERSITE=1 PYTHONPATH="" python3 -S utils/lm_eval_to_md.py \
311-
--results-dir "$out_dir" \
312-
--task "${task}" \
313-
--framework "${FRAMEWORK}" \
314-
--precision "${PRECISION}" \
315-
--tp "${TP:-1}" \
316-
--ep "${EP_SIZE:-1}" \
317-
--dp-attention "${DP_ATTENTION:-false}" \
318-
> "$summary_md" || true
319-
320-
# If running inside a GitHub Actions step on this same machine, append there too
321-
if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then
322-
local GH_SUM_DIR
323-
GH_SUM_DIR="$(dirname "$GITHUB_STEP_SUMMARY")"
324-
if [ -d "$GH_SUM_DIR" ] && [ -w "$GH_SUM_DIR" ]; then
325-
cat "$summary_md" >> "$GITHUB_STEP_SUMMARY" || true
326-
fi
327-
fi
328-
329331
# Move eval artifacts into PWD (no new directories in workspace)
330-
if [ -f "${summary_md}" ]; then
331-
mv -f "${summary_md}" ./ || true
332-
fi
333332
if [ -f "${meta_json}" ]; then
334333
mv -f "${meta_json}" ./ || true
335334
fi
336335
if [ -d "${out_dir}" ]; then
337336
while IFS= read -r -d '' jf; do
338337
base=$(basename "$jf")
339-
if [ "$base" != "meta_env.json" ] && [ "$base" != "SUMMARY.md" ]; then
338+
if [ "$base" != "meta_env.json" ]; then
340339
mv -f "$jf" ./ || true
341340
fi
342341
done < <(find "${out_dir}" -type f -name "*.json" -print0 2>/dev/null)

benchmarks/gptoss_fp4_b200_trt_slurm.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,10 @@ run_benchmark_serving \
9494
--max-concurrency "$CONC" \
9595
--result-filename "$RESULT_FILENAME" \
9696
--result-dir /workspace/
97+
98+
# After throughput, run evaluation only if RUN_EVAL is true
99+
if [ "${RUN_EVAL}" = "true" ]; then
100+
run_eval --framework lm-eval --port "$PORT" --concurrent-requests $(( $CONC * 2 ))
101+
append_lm_eval_summary
102+
fi
103+
set +x

0 commit comments

Comments
 (0)