Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
433 changes: 433 additions & 0 deletions .claude/commands/recover-failed-ingest.md

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions .github/workflows/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,12 @@ must still contain complete artifacts for the merge run's expected matrix.

The comment is the reuse authorization, so adding it does not trigger or cancel
a PR sweep. Once the comment is present, later commits pushed to a PR with a
full-sweep label do not start another benchmark sweep. GitHub still creates a
lightweight `pull_request` workflow run so it can inspect the PR comments, but
the sweep setup and benchmark jobs are skipped. Removing and re-adding a sweep
label explicitly starts a new sweep.
full-sweep label do not start another benchmark sweep. GitHub still runs the
CPU-only `check-changelog` job on the new commit before inspecting the reuse
authorization. That job validates the complete YAML/schema, append-only entry
ordering, changed-line whitespace, PR links, and the generated sweep config.
Only after it passes can the reuse gate skip sweep setup and benchmark jobs.
Removing and re-adding a sweep label explicitly starts a new sweep.

On the push-to-main run, `run-sweep.yml` resolves the merged PR from the merge
commit, verifies the source run is an eligible `pull_request` `run-sweep.yml`
Expand Down
84 changes: 43 additions & 41 deletions .github/workflows/run-sweep.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,47 @@
- "perf-changelog.yaml"

jobs:
check-changelog:
runs-on: ubuntu-latest
if: >-
github.event_name != 'pull_request' ||
!github.event.pull_request.draft
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0

- name: Validate perf-changelog.yaml
run: |
pip install pydantic pyyaml

PR_ARGS=()
if [ "${{ github.event_name }}" = "pull_request" ]; then
BASE_REF="origin/${{ github.base_ref }}"
HEAD_REF="${{ github.event.pull_request.head.sha }}"
PR_ARGS=(--pr-number "${{ github.event.pull_request.number }}")
else
BASE_REF="${{ github.event.before }}"
HEAD_REF="${{ github.event.after }}"
fi

python3 utils/validate_perf_changelog.py \
--changelog-file perf-changelog.yaml \
--base-ref "$BASE_REF" \
--head-ref "$HEAD_REF" \
"${PR_ARGS[@]}"

reuse-sweep-gate:

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {contents: read}
needs: check-changelog
runs-on: ubuntu-latest
permissions:
contents: read
issues: read
pull-requests: read
if: >-
always() &&
needs.check-changelog.result == 'success' &&
github.event_name == 'pull_request' &&
github.event.action == 'synchronize' &&
!github.event.pull_request.draft &&
Expand Down Expand Up @@ -70,150 +104,117 @@
--ref "${{ github.ref }}" \
--workflow-id "run-sweep.yml"

check-newline:
needs: reuse-sweep-gate
runs-on: ubuntu-latest
if: >-
always() &&
(
needs.reuse-sweep-gate.result == 'skipped' ||
(
needs.reuse-sweep-gate.result == 'success' &&
needs.reuse-sweep-gate.outputs.skip-pr-sweep != 'true'
)
) &&
github.event_name == 'pull_request' &&
!github.event.pull_request.draft &&
(
(github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
github.event.label.name == 'sweep-enabled' ||
github.event.label.name == 'full-sweep-enabled' ||
github.event.label.name == 'non-canary-full-sweep-enabled' ||
github.event.label.name == 'full-sweep-fail-fast' ||
github.event.label.name == 'full-sweep-fail-fast-no-canary'
)
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Check perf-changelog.yaml ends with newline
run: |
if [ -n "$(tail -c 1 perf-changelog.yaml)" ]; then
echo "::error::perf-changelog.yaml must end with a newline character"
echo "Please add a newline at the end of the file to avoid diff issues in subsequent PRs."
exit 1
fi

setup:
needs: reuse-sweep-gate
needs: [check-changelog, reuse-sweep-gate]
runs-on: ubuntu-latest
if: >-
always() &&
needs.check-changelog.result == 'success' &&
(
needs.reuse-sweep-gate.result == 'skipped' ||
(
needs.reuse-sweep-gate.result == 'success' &&
needs.reuse-sweep-gate.outputs.skip-pr-sweep != 'true'
)
) &&
(
(
github.event_name == 'pull_request' &&
!github.event.pull_request.draft &&
(
contains(github.event.pull_request.labels.*.name, 'sweep-enabled') ||
contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') ||
contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled') ||
contains(github.event.pull_request.labels.*.name, 'full-sweep-fail-fast') ||
contains(github.event.pull_request.labels.*.name, 'full-sweep-fail-fast-no-canary')
) &&
(
(github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
github.event.label.name == 'sweep-enabled' ||
github.event.label.name == 'full-sweep-enabled' ||
github.event.label.name == 'non-canary-full-sweep-enabled' ||
github.event.label.name == 'full-sweep-fail-fast' ||
github.event.label.name == 'full-sweep-fail-fast-no-canary'
)
) ||
(
github.event_name != 'pull_request' &&
!contains(github.event.head_commit.message, '[skip-sweep]')
)
)
outputs:
search-space-config: ${{ steps.setup.outputs.search-space-config }}
reuse-enabled: ${{ steps.setup.outputs.reuse-enabled }}
reuse-source-run-id: ${{ steps.setup.outputs.reuse-source-run-id }}
reuse-source-run-attempt: ${{ steps.setup.outputs.reuse-source-run-attempt }}
reuse-source-run-url: ${{ steps.setup.outputs.reuse-source-run-url }}
reuse-source-pr-number: ${{ steps.setup.outputs.reuse-source-pr-number }}
reuse-source-head-sha: ${{ steps.setup.outputs.reuse-source-head-sha }}
steps:
- name: Reject conflicting sweep labels
if: >-
github.event_name == 'pull_request' &&
(
(contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')) ||
(contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')) ||
(contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-fail-fast')) ||
(contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')) ||
(contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-fail-fast')) ||
(contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-fail-fast')) ||
(contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-fail-fast-no-canary')) ||
(contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-fail-fast-no-canary')) ||
(contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-fail-fast-no-canary')) ||
(contains(github.event.pull_request.labels.*.name, 'full-sweep-fail-fast') && contains(github.event.pull_request.labels.*.name, 'full-sweep-fail-fast-no-canary'))
)
run: |
echo "::error::PR has multiple conflicting sweep labels. Pick exactly one of: 'sweep-enabled' (trims to min(conc) per parallelism config), 'full-sweep-enabled' (full intermediate concurrency sweep, with canary gate), 'non-canary-full-sweep-enabled' (full sweep, no canary gate), 'full-sweep-fail-fast' (full sweep behind the canary gate, first failure in a matrix cancels that matrix's remaining jobs), or 'full-sweep-fail-fast-no-canary' (the same, without the canary gate)."
exit 1

- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0

- id: setup
env:
GH_TOKEN: ${{ github.token }}
TRIM_CONC: >-
${{
github.event_name == 'pull_request' &&
contains(github.event.pull_request.labels.*.name, 'sweep-enabled')
}}
run: |
pip install pydantic

if [ "${{ github.event_name }}" == "pull_request" ]; then
BASE_REF="origin/${{ github.base_ref }}"
HEAD_REF="${{ github.event.pull_request.head.sha }}"
else
BASE_REF="${{ github.event.before }}"
HEAD_REF="${{ github.event.after }}"
fi

CMD=(
python3 "${GITHUB_WORKSPACE}/utils/process_changelog.py"
--changelog-file "${GITHUB_WORKSPACE}/perf-changelog.yaml"
--base-ref "$BASE_REF"
--head-ref "$HEAD_REF"
)
if [ "$TRIM_CONC" = "true" ]; then
CMD+=(--trim-conc)
fi

CONFIG_JSON=$("${CMD[@]}")

echo "search-space-config=$CONFIG_JSON" >> "$GITHUB_OUTPUT"
python3 "${GITHUB_WORKSPACE}/utils/find_reusable_sweep_run.py" \
--repo "${{ github.repository }}" \
--commit-sha "${{ github.sha }}" \
--event-name "${{ github.event_name }}" \
--ref "${{ github.ref }}" \
--workflow-id "run-sweep.yml"

canary-select:

Check warning

Code scanning / CodeQL

Workflow does not contain permissions Medium

Actions job or workflow does not limit the permissions of the GITHUB_TOKEN. Consider setting an explicit permissions block, using the following as a minimal starting point: {contents: read}
needs: setup
if: >-
needs.setup.outputs.reuse-enabled != 'true' &&
Expand Down Expand Up @@ -614,12 +615,13 @@

reuse-ingest-artifacts:
needs: setup
# `setup` runs via always() and depends on `reuse-sweep-gate`, which is
# skipped on every push-to-main. Without always() here, that skipped
# gate poisons this job's implicit success() check and it is skipped
# even when setup succeeded with reuse-enabled=true — silently breaking
# the merge-time ingest. Guard explicitly on setup success instead
# (same pattern as comment-unofficial-run-visualizer).
# `setup` runs via always() and depends on `check-changelog` plus
# `reuse-sweep-gate`, which is skipped on every push-to-main. Without
# always() here, that skipped gate poisons this job's implicit
# success() check and it is skipped even when setup succeeded with
# reuse-enabled=true — silently breaking the merge-time ingest. Guard
# explicitly on setup success instead (same pattern as
# comment-unofficial-run-visualizer).
if: >-
always() &&
needs.setup.result == 'success' &&
Expand Down
5 changes: 5 additions & 0 deletions KLAUD_DEBUG.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ python3 -c "import yaml; yaml.safe_load(open('perf-changelog.yaml'))"

Do **not** try a 3-way merge of `perf-changelog.yaml` — whitespace edits will silently re-trigger the deletion check.

After committing and pushing the resolution, wait for `check-changelog` on the
new head SHA. `/reuse-sweep-run` skips setup and GPU jobs only after this
CPU-only validation succeeds. `utils/merge_with_reuse.sh <PR>` performs this
wait automatically.

---

## 2. vLLM v0.21.x / v0.20.x: GPU OOM at model-load
Expand Down
6 changes: 3 additions & 3 deletions perf-changelog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2528,7 +2528,7 @@
- minimaxm2.5-fp8-mi355x-vllm
description:
- "Update vLLM ROCm image from v0.19.0 to v0.21.0"
pr-link: XXX
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1410

- config-keys:
- gptoss-fp4-h200-vllm
Expand Down Expand Up @@ -2582,7 +2582,7 @@
- dsr1-fp4-b300-sglang
description:
- "Update SGLang image from v0.5.11-cu130 to v0.5.12-cu130"
pr-link: XXX
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1418

- config-keys:
- kimik2.5-int4-h200-vllm
Expand Down Expand Up @@ -3699,7 +3699,7 @@
description:
- "Add MiniMax-M2.5 NVFP4 B200 TensorRT-LLM single-node benchmark (1k1k and 8k1k)"
- "Image: nvcr.io#nvidia/tensorrt-llm/release:1.3.0rc18"
pr-link: https://github.com/NVIDIA/InferenceMAX/pull/1722
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1722

- config-keys:
- minimaxm3-fp8-h200-vllm-mtp
Expand Down
45 changes: 41 additions & 4 deletions utils/merge_with_reuse.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,20 @@
# 2. Merge origin/main into the PR branch. Any `perf-changelog.yaml`
# conflict is auto-resolved by accepting main's entries and re-appending
# the PR's entry at the bottom with `XXX` -> the PR number.
# 3. Push the merge commit. The PR synchronize run observes the reuse
# authorization and skips sweep setup and benchmark jobs.
# 4. Squash-merge the PR to main (--admin).
# 3. Push the merge commit and wait for `check-changelog` on that exact SHA.
# The PR synchronize run then observes the reuse authorization and skips
# sweep setup and benchmark jobs.
# 4. Squash-merge the PR to main (--admin) only after validation succeeds.
#
# Usage: utils/merge_with_reuse.sh <pr-number>
# Env: REPO (default SemiAnalysisAI/InferenceX)
# CHECK_TIMEOUT_SECONDS (default 900)

set -euo pipefail

REPO="${REPO:-SemiAnalysisAI/InferenceX}"
CHANGELOG="perf-changelog.yaml"
CHECK_TIMEOUT_SECONDS="${CHECK_TIMEOUT_SECONDS:-900}"

if [ $# -ne 1 ] || ! [[ "$1" =~ ^[0-9]+$ ]]; then
echo "Usage: $0 <pr-number>" >&2
Expand All @@ -28,6 +31,38 @@ log() { printf '\033[1;36m→\033[0m %s\n' "$*"; }
ok() { printf '\033[1;32m✓\033[0m %s\n' "$*"; }
die() { printf '\033[1;31m✗\033[0m %s\n' "$*" >&2; exit 1; }

wait_for_check() {
local sha="$1"
local check_name="$2"
local deadline=$((SECONDS + CHECK_TIMEOUT_SECONDS))

log "Waiting for ${check_name} on ${sha:0:8}"
while ((SECONDS < deadline)); do
local checks check status conclusion details
checks="$(gh api "repos/${REPO}/commits/${sha}/check-runs?per_page=100")"
check="$(jq -c --arg name "$check_name" '
[.check_runs[] | select(.name == $name)]
| sort_by(.started_at)
| last // {}
' <<<"$checks")"
status="$(jq -r '.status // ""' <<<"$check")"
conclusion="$(jq -r '.conclusion // ""' <<<"$check")"
details="$(jq -r '.details_url // ""' <<<"$check")"

if [ "$status" = "completed" ]; then
if [ "$conclusion" = "success" ]; then
ok "${check_name} passed${details:+ — ${details}}"
return 0
fi
die "${check_name} concluded ${conclusion:-unknown}${details:+ — ${details}}"
fi

sleep 5
done

die "Timed out after ${CHECK_TIMEOUT_SECONDS}s waiting for ${check_name} on ${sha}"
}

ORIGINAL_BRANCH="$(git symbolic-ref --quiet --short HEAD || git rev-parse HEAD)"
cleanup() { git checkout --quiet "$ORIGINAL_BRANCH" 2>/dev/null || true; }
trap cleanup EXIT
Expand Down Expand Up @@ -192,9 +227,11 @@ if [ "$PRE_MERGE" = "$POST_MERGE" ]; then
else
log "Pushing merge commit ${POST_MERGE:0:8}"
git push origin "${LOCAL_BRANCH}:${HEAD_BRANCH}"
ok "Push complete; the reuse authorization will suppress the synchronize sweep"
ok "Push complete; changelog validation will run before the reuse gate"
fi

wait_for_check "$POST_MERGE" "check-changelog"
Comment thread
cursor[bot] marked this conversation as resolved.

# --- step 4: squash-merge to main -------------------------------------------
log "Squash-merging PR #${PR} into main"
gh pr merge "$PR" --repo "$REPO" --squash --admin >/dev/null
Expand Down
126 changes: 126 additions & 0 deletions utils/test_validate_perf_changelog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
from __future__ import annotations

import pytest
import yaml

from validate_perf_changelog import (
ChangelogValidationError,
compare_entries,
parse_changelog,
)


def entry(
key: str,
link: str = "https://github.com/SemiAnalysisAI/InferenceX/pull/1",
) -> dict[str, object]:
return {
"config-keys": [key],
"description": [f"Update {key}"],
"pr-link": link,
}


def render(entries: list[dict[str, object]]) -> bytes:
return yaml.safe_dump(entries, sort_keys=False).encode()


def test_parse_changelog_validates_complete_file() -> None:
parsed = parse_changelog(render([entry("config-a")]), "test changelog")

assert parsed == [entry("config-a")]


def test_parse_changelog_rejects_missing_final_newline() -> None:
raw = render([entry("config-a")]).rstrip(b"\n")

with pytest.raises(ChangelogValidationError, match="end with a newline"):
parse_changelog(raw, "test changelog")


def test_parse_changelog_rejects_malformed_nested_entry() -> None:
raw = b"""- config-keys:
- config-a
description:
- Update config-a
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1
- config-keys:
- config-b
description:
- Update config-b
pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/2
"""

with pytest.raises(ChangelogValidationError, match="not valid YAML"):
parse_changelog(raw, "test changelog")


def test_compare_entries_allows_appended_pr_entry() -> None:
base = [entry("config-a")]
added = entry("config-b", "XXX")

additions, corrections = compare_entries(base, [*base, added], 42)

assert additions == [added]
assert corrections == 0


def test_compare_entries_rejects_wrong_pr_link_on_append() -> None:
base = [entry("config-a")]
added = entry(
"config-b",
"https://github.com/SemiAnalysisAI/InferenceX/pull/41",
)

with pytest.raises(ChangelogValidationError, match="new PR entry"):
compare_entries(base, [*base, added], 42)


def test_compare_entries_requires_canonical_link_on_main() -> None:
base = [entry("config-a")]

with pytest.raises(ChangelogValidationError, match="main-branch entry"):
compare_entries(base, [*base, entry("config-b", "XXX")], None)


def test_compare_entries_allows_pr_link_only_correction() -> None:
base = [entry("config-a", "XXX")]
head = [
entry(
"config-a",
"https://github.com/SemiAnalysisAI/InferenceX/pull/42",
)
]

additions, corrections = compare_entries(base, head, 99)

assert additions == []
assert corrections == 1


def test_compare_entries_rejects_existing_content_change() -> None:
base = [entry("config-a")]
head = [entry("config-a")]
head[0]["description"] = ["Different description"]

with pytest.raises(ChangelogValidationError, match="entry 1 changed"):
compare_entries(base, head, 42)


def test_compare_entries_rejects_deleted_entry() -> None:
with pytest.raises(ChangelogValidationError, match="entries were deleted"):
compare_entries([entry("config-a")], [], 42)


def test_compare_entries_rejects_correction_mixed_with_append() -> None:
base = [entry("config-a", "XXX")]
head = [
entry(
"config-a",
"https://github.com/SemiAnalysisAI/InferenceX/pull/42",
),
entry("config-b", "XXX"),
]

with pytest.raises(ChangelogValidationError, match="do not mix"):
compare_entries(base, head, 42)
Loading