Skip to content

Commit 6c29d32

Browse files
committed
Merge remote-tracking branch 'origin/main' into feat/m3-mi300x-blockfp8-clean
# Conflicts: # perf-changelog.yaml
2 parents 7521394 + 60bf726 commit 6c29d32

4 files changed

Lines changed: 217 additions & 17 deletions

File tree

.github/configs/amd-master.yaml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2261,15 +2261,8 @@ dsv4-fp4-mi355x-vllm-mtp:
22612261
search-space:
22622262
- { tp: 8, conc-start: 4, conc-end: 512, spec-decoding: mtp }
22632263

2264-
# Day-0 single-sequence marker for DeepSeek-V4 on ATOM (ROCm/ATOM#650).
2265-
# PR1 of the ATOM DSv4 series still uses torch sparse-attention fallbacks
2266-
# that OOM once warmup/prefill batches multiple requests; keep CONC=1 until
2267-
# the AITER sparse-attention kernel / multi-request path lands upstream.
2268-
# --enforce-eager and ATOM_USE_TRITON_MOE=1 are required on gfx950. Image is
2269-
# the standard atom0.1.2.post MI355X base (matching qwen3.5-fp8-mi355x-atom);
2270-
# the DSv4 PR is overlaid at runtime by dsv4_fp4_mi355x_atom.sh at a pinned SHA.
22712264
dsv4-fp4-mi355x-atom:
2272-
image: rocm/atom:rocm7.2.4_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.3
2265+
image: rocm/atom-dev:nightly_202606161823
22732266
model: deepseek-ai/DeepSeek-V4-Pro
22742267
model-prefix: dsv4
22752268
runner: mi355x
@@ -2281,13 +2274,20 @@ dsv4-fp4-mi355x-atom:
22812274
- isl: 1024
22822275
osl: 1024
22832276
search-space:
2277+
# conc4-64, TP8
2278+
# conc128-512, DPA
2279+
# conc1024-2048, DPA TBO
22842280
- { tp: 8, ep: 1, conc-start: 1, conc-end: 64 }
2285-
- { tp: 8, ep: 1, dp-attn: true, conc-start: 64, conc-end: 1024 }
2281+
- { tp: 8, ep: 1, dp-attn: true, conc-start: 64, conc-end: 2048 }
22862282
- isl: 8192
22872283
osl: 1024
22882284
search-space:
2289-
- { tp: 8, ep: 1, conc-start: 1, conc-end: 64 }
2290-
- { tp: 8, ep: 1, dp-attn: true, conc-start: 64, conc-end: 512 }
2285+
# conc4-64, TP8
2286+
# conc128, DPA
2287+
# conc256-2048, DPA TBO
2288+
- { tp: 4, ep: 1, conc-list: [8, 16, 32, 64] }
2289+
- { tp: 8, ep: 1, conc-list: [1, 2, 4, 8, 16, 32, 64] }
2290+
- { tp: 8, ep: 1, dp-attn: true, conc-start: 128, conc-end: 2048 }
22912291

22922292
dsv4-fp4-mi355x-atom-mtp:
22932293
image: rocm/atom:rocm7.2.4_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.3
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
name: Recover PR 1767 ingest
2+
run-name: "Recover PR #1767 ingest from run 27595478969"
3+
4+
on:
5+
workflow_dispatch:
6+
inputs:
7+
confirm:
8+
description: "Enter recover-pr-1767 to run the artifact-only recovery"
9+
required: true
10+
type: string
11+
12+
permissions:
13+
actions: read
14+
contents: read
15+
16+
jobs:
17+
recover-ingest:
18+
if: ${{ inputs.confirm == 'recover-pr-1767' }}
19+
runs-on: ubuntu-latest
20+
env:
21+
SOURCE_REPO: SemiAnalysisAI/InferenceX
22+
SOURCE_RUN_ID: "27595478969"
23+
SOURCE_PR_NUMBER: "1767"
24+
SOURCE_HEAD_SHA: 728eb321dd4b1decd81b2d460cb39aa369a0c9c8
25+
ORIGINAL_BASE_SHA: d99c824b1c4f0b1b007631191657e458ef2a332c
26+
ORIGINAL_MERGE_SHA: 7b9843d3a6e1fe7a2d92d327e25aae57ed3506c5
27+
steps:
28+
- name: Checkout recovery code
29+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
30+
with:
31+
fetch-depth: 0
32+
33+
- name: Validate reusable source run
34+
env:
35+
GH_TOKEN: ${{ secrets.REPO_PAT || github.token }}
36+
run: |
37+
run_json=$(gh api "repos/${SOURCE_REPO}/actions/runs/${SOURCE_RUN_ID}")
38+
jq -e \
39+
--arg expected_head "$SOURCE_HEAD_SHA" \
40+
'.event == "pull_request" and
41+
.status == "completed" and
42+
.conclusion == "success" and
43+
.path == ".github/workflows/run-sweep.yml" and
44+
.head_sha == $expected_head' \
45+
<<<"$run_json" >/dev/null
46+
47+
gh api "repos/${SOURCE_REPO}/pulls/${SOURCE_PR_NUMBER}/commits" \
48+
--paginate --jq '.[].sha' \
49+
| grep -Fxq "$SOURCE_HEAD_SHA"
50+
51+
artifacts_json=$(gh api \
52+
"repos/${SOURCE_REPO}/actions/runs/${SOURCE_RUN_ID}/artifacts?per_page=100")
53+
for required in results_bmk eval_results_all run-stats; do
54+
jq -e --arg name "$required" \
55+
'.artifacts[] | select(.name == $name and (.expired | not))' \
56+
<<<"$artifacts_json" >/dev/null
57+
done
58+
59+
- name: Reconstruct corrected merge configuration
60+
run: |
61+
git checkout --detach "$ORIGINAL_MERGE_SHA"
62+
test "$(git rev-parse "${ORIGINAL_MERGE_SHA}^")" = "$ORIGINAL_BASE_SHA"
63+
64+
perl -0pi -e \
65+
's/^ - config-keys:\n( - dsr1-fp8-gb300-dynamo-trt\n)/- config-keys:\n$1/m' \
66+
perf-changelog.yaml
67+
grep -A1 '^- config-keys:$' perf-changelog.yaml \
68+
| grep -q 'dsr1-fp8-gb300-dynamo-trt'
69+
if grep -A1 '^ - config-keys:$' perf-changelog.yaml \
70+
| grep -q 'dsr1-fp8-gb300-dynamo-trt'; then
71+
echo "PR #1767 changelog indentation is still malformed" >&2
72+
exit 1
73+
fi
74+
75+
git add perf-changelog.yaml
76+
fixed_tree=$(git write-tree)
77+
fixed_sha=$(printf '%s\n' 'Synthetic corrected PR #1767 merge tree' \
78+
| git -c user.name='InferenceX Recovery' \
79+
-c user.email='actions@users.noreply.github.com' \
80+
commit-tree "$fixed_tree" -p "$ORIGINAL_BASE_SHA")
81+
82+
pip install pydantic
83+
python3 utils/process_changelog.py \
84+
--changelog-file perf-changelog.yaml \
85+
--base-ref "$ORIGINAL_BASE_SHA" \
86+
--head-ref "$fixed_sha" \
87+
> "$RUNNER_TEMP/full-config.json"
88+
jq empty "$RUNNER_TEMP/full-config.json"
89+
90+
mkdir -p "$RUNNER_TEMP/changelog-metadata"
91+
jq \
92+
--arg base "$ORIGINAL_BASE_SHA" \
93+
--arg head "$ORIGINAL_MERGE_SHA" \
94+
'.changelog_metadata | .base_ref = $base | .head_ref = $head' \
95+
"$RUNNER_TEMP/full-config.json" \
96+
> "$RUNNER_TEMP/changelog-metadata/changelog_metadata.json"
97+
98+
- name: Download reusable benchmark artifacts
99+
env:
100+
GH_TOKEN: ${{ secrets.REPO_PAT || github.token }}
101+
run: |
102+
artifacts_dir="$RUNNER_TEMP/source-artifacts"
103+
gh run download "$SOURCE_RUN_ID" \
104+
--repo "$SOURCE_REPO" \
105+
-D "$artifacts_dir"
106+
107+
rm -rf "$artifacts_dir/changelog-metadata"
108+
for artifact_dir in "$artifacts_dir"/*; do
109+
[ -e "$artifact_dir" ] || continue
110+
name=$(basename "$artifact_dir")
111+
case "$name" in
112+
results_bmk|eval_results_all|run-stats|bmk_*|eval_*|server_logs_*|multinode_server_logs_*|agentic_aggregated)
113+
;;
114+
*)
115+
rm -rf "$artifact_dir"
116+
;;
117+
esac
118+
done
119+
120+
mkdir -p "$artifacts_dir/reused-ingest-metadata"
121+
jq -n \
122+
--arg source_run_id "$SOURCE_RUN_ID" \
123+
--arg source_run_attempt "1" \
124+
--arg source_run_url "https://github.com/${SOURCE_REPO}/actions/runs/${SOURCE_RUN_ID}" \
125+
--arg source_pr_number "$SOURCE_PR_NUMBER" \
126+
--arg source_head_sha "$SOURCE_HEAD_SHA" \
127+
--arg ingest_run_id "$GITHUB_RUN_ID" \
128+
--arg ingest_run_attempt "$GITHUB_RUN_ATTEMPT" \
129+
--arg ingest_run_url "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" \
130+
'{
131+
source_run_id: $source_run_id,
132+
source_run_attempt: $source_run_attempt,
133+
source_run_url: $source_run_url,
134+
source_pr_number: $source_pr_number,
135+
source_head_sha: $source_head_sha,
136+
ingest_run_id: $ingest_run_id,
137+
ingest_run_attempt: $ingest_run_attempt,
138+
ingest_run_url: $ingest_run_url
139+
}' \
140+
> "$artifacts_dir/reused-ingest-metadata/reuse_source_run.json"
141+
142+
- name: Validate reusable artifacts
143+
run: |
144+
python3 utils/validate_reusable_sweep_artifacts.py \
145+
--config-json "$RUNNER_TEMP/full-config.json" \
146+
--artifacts-dir "$RUNNER_TEMP/source-artifacts"
147+
148+
- name: Upload reusable ingest artifacts
149+
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
150+
with:
151+
name: reused-ingest-artifacts
152+
path: ${{ runner.temp }}/source-artifacts/*
153+
154+
- name: Upload corrected changelog metadata
155+
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
156+
with:
157+
name: changelog-metadata
158+
path: ${{ runner.temp }}/changelog-metadata/changelog_metadata.json
159+
160+
- name: Trigger database ingest
161+
run: |
162+
curl -sSf -X POST \
163+
-H "Authorization: Bearer ${{ secrets.INFX_FRONTEND_PAT }}" \
164+
-H "Accept: application/vnd.github+v3+json" \
165+
https://api.github.com/repos/SemiAnalysisAI/InferenceX-app/dispatches \
166+
-d '{
167+
"event_type": "ingest-results",
168+
"client_payload": {
169+
"run-id": "${{ github.run_id }}",
170+
"run-attempt": "${{ github.run_attempt }}"
171+
}
172+
}'

benchmarks/single_node/fixed_seq_len/dsv4_fp4_mi355x_atom.sh

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,31 +22,51 @@ echo "TP: $TP, CONC: $CONC, ISL: $ISL, OSL: $OSL, EP_SIZE: $EP_SIZE, DP_ATTENTIO
2222
SERVER_LOG=/workspace/server.log
2323

2424
PARALLEL_ARGS=(-tp "$TP") #TP
25+
CUDAGRAPH_SIZES='[1, 2, 4, 8, 16, 32, 48, 64, 128, 256, 512]'
2526
if [ "$DP_ATTENTION" = "true" ]; then
2627
if [ "$EP_SIZE" -gt 1 ]; then #DP+EP
2728
PARALLEL_ARGS=(-tp "$TP" --enable-expert-parallel --enable-dp-attention )
28-
else #DP+TP
29-
PARALLEL_ARGS=(-tp "$TP" --enable-dp-attention )
29+
else #DPA+TP
30+
#DPA+TP+TBO
31+
if [ "$ISL" -eq 1024 ] && [ "$OSL" -eq 1024 ] && [ "$CONC" -ge 1024 ]; then
32+
PARALLEL_ARGS=(-tp "$TP" --enable-dp-attention --enable-tbo)
33+
export GPU_MAX_HW_QUEUES=5
34+
elif [ "$ISL" -eq 8192 ] && [ "$OSL" -eq 1024 ] && [ "$CONC" -ge 256 ]; then
35+
PARALLEL_ARGS=(-tp "$TP" --enable-dp-attention --enable-tbo)
36+
export GPU_MAX_HW_QUEUES=5
37+
else
38+
PARALLEL_ARGS=(-tp "$TP" --enable-dp-attention )
39+
fi
3040
fi
3141
fi
3242

43+
BENCHMARK_MAX_MODEL_LEN="$MAX_MODEL_LEN"
44+
45+
if [ "${EVAL_ONLY}" = "true" ]; then
46+
EVAL_MAX_MODEL_LEN=$(compute_eval_context_length "$MODEL" "$BENCHMARK_MAX_MODEL_LEN")
47+
export EVAL_MAX_MODEL_LEN
48+
fi
3349
# Start GPU monitoring (power, temperature, clocks every second)
3450
start_gpu_monitor
3551

3652
set -x
3753
export ATOM_DISABLE_MMAP=true
3854
export AITER_BF16_FP8_MOE_BOUND=0
3955
export ATOM_MOE_GU_ITLV=1
40-
# TODO: add --no-enable_chunked_prefill, when dsv4 prefix caching is supported
41-
#https://github.com/ROCm/ATOM/commit/7df93a181da4d3c3250c2441c7d5e2745a03d0cd#diff-61b1ba0b8b74523530d2d5cdc739d4f3a23a43bedf69015a5235844d46e9373bL1127
56+
MEM_FRAC_STATIC=0.9
57+
OPT_ARGS=(--hf-overrides '{"use_index_cache": true, "index_topk_freq": 4}')
58+
4259
python3 -m atom.entrypoints.openai_server \
4360
--model $MODEL \
4461
--server-port $PORT \
4562
"${PARALLEL_ARGS[@]}" \
4663
--kv_cache_dtype fp8 \
4764
--trust-remote-code \
48-
--gpu-memory-utilization 0.85 \
49-
> $SERVER_LOG 2>&1 &
65+
--gpu-memory-utilization $MEM_FRAC_STATIC \
66+
--no-enable_prefix_caching \
67+
--cudagraph-capture-sizes "${CUDAGRAPH_SIZES}" \
68+
"${OPT_ARGS[@]}" \
69+
> "$SERVER_LOG" 2>&1 &
5070

5171
SERVER_PID=$!
5272

perf-changelog.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3928,6 +3928,14 @@
39283928
- "Runner script launch_gb300-nv.sh: added dynamo-trt-specific glm5-fp4 case with SERVED_MODEL_NAME and SRT_SLURM_MODEL_PREFIX=nvidia/GLM-5-NVFP4"
39293929
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1798
39303930

3931+
- config-keys:
3932+
- dsv4-fp4-mi355x-atom
3933+
description:
3934+
- "Update image to rocm/atom:rocm7.2.4_ubuntu24.04_py3.12_pytorch_release_2.10.0_atom0.1.4_20260612"
3935+
- "Update ISL=8192 search-space: TP8-only from conc=4-64, DPA from conc=128-1024 (previously conc=1-64 and DPA conc=64-512)"
3936+
- "Update Applied TBO on high concurrencies"
3937+
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1717
3938+
39313939
- config-keys:
39323940
- minimaxm3-fp8-mi300x-vllm
39333941
description:

0 commit comments

Comments
 (0)