diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index c5ece9804..77c99c974 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -88,7 +88,7 @@ jobs: contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') run: | - echo "::error::PR has both 'sweep-enabled' and 'full-sweep-enabled' labels. Remove one — 'full-sweep-enabled' runs the full intermediate concurrency sweep; 'sweep-enabled' trims to max(conc) per parallelism config." + echo "::error::PR has both 'sweep-enabled' and 'full-sweep-enabled' labels. Remove one — 'full-sweep-enabled' runs the full intermediate concurrency sweep; 'sweep-enabled' trims to min(conc) per parallelism config." exit 1 - name: Checkout code diff --git a/AGENTS.md b/AGENTS.md index cd057f4d3..173353b3d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -61,12 +61,12 @@ Git: conventional commit messages. `[skip-sweep]` in commit message skips benchm PRs do not run the sweep automatically - `run-sweep.yml` is gated on a label. Pick exactly one; setting both is rejected by the workflow's `setup` job. -- `sweep-enabled` - runs the sweep with `--trim-conc` (each parallelism config reduced to its single highest concurrency). Default for most PRs. +- `sweep-enabled` - runs the sweep with `--trim-conc` (each parallelism config reduced to its single lowest concurrency). Default for most PRs. - `full-sweep-enabled` - runs the full intermediate concurrency sweep, identical to push-to-main. Use when intermediate points matter (e.g. a recipe change shifts the throughput/latency curve, not just its endpoints). **The sweep does not trigger while the PR has merge conflicts.** Even with `sweep-enabled` / `full-sweep-enabled` applied, the `run-sweep.yml` workflow will not start until the PR cleanly merges into main — a stale claude/* or update-* branch with a `perf-changelog.yaml` conflict (the common case) will sit in NO_SWEEP / NO_SUCCESS until rebased. Resolution recipe is documented in `KLAUD_DEBUG.md §1.1`: `git merge origin/main`, then `git checkout origin/main -- perf-changelog.yaml`, then re-append the PR's own changelog entry at the tail. Don't 3-way merge `perf-changelog.yaml`; whitespace edits silently re-trigger the deletion check. -Push-to-main always runs the full untrimmed sweep unless `[skip-sweep]` is in the commit message. Trim logic lives in `trim_conc()` in `utils/process_changelog.py`: single-node entries are grouped by every non-`conc` field and only the highest-`conc` entry per group is kept; multi-node entries have their `conc` list collapsed to `[max(conc)]`. +Push-to-main always runs the full untrimmed sweep unless `[skip-sweep]` is in the commit message. Trim logic lives in `trim_conc()` in `utils/process_changelog.py`: single-node entries are grouped by every non-`conc` field and only the lowest-`conc` entry per group is kept; multi-node entries have their `conc` list collapsed to `[min(conc)]`. ## Common Tasks diff --git a/utils/process_changelog.py b/utils/process_changelog.py index 4c8c07864..1514f8d36 100644 --- a/utils/process_changelog.py +++ b/utils/process_changelog.py @@ -42,13 +42,13 @@ def get_added_lines(base_ref: str, head_ref: str, filepath: str) -> str: def trim_conc(entries: list[dict]) -> list[dict]: - """Trim each parallelism config's concurrency sweep to its highest point. + """Trim each parallelism config's concurrency sweep to its lowest point. Non-full-sweep PRs only need a single concurrency point per parallelism config to validate a change runs end-to-end, so the shared cluster stays clear. Push-to-main and ``full-sweep-enabled`` PRs skip this reduction. - The retained value is the maximum configured concurrency — independent of + The retained value is the minimum configured concurrency — independent of the source ordering of ``conc-list`` / ``conc-start``. Input comes from ``json.loads(subprocess.stdout)`` so ``conc`` is always @@ -56,8 +56,8 @@ def trim_conc(entries: list[dict]) -> list[dict]: are hashable scalars. - Single-node entries: group by every other field and keep only the entry - with the highest ``conc`` per group. - - Multi-node entries: trim the ``conc`` list in place to ``[max(conc)]``. + with the lowest ``conc`` per group. + - Multi-node entries: trim the ``conc`` list in place to ``[min(conc)]``. """ groups: dict[tuple, list[int]] = {} out: list[dict] = [] @@ -66,7 +66,7 @@ def trim_conc(entries: list[dict]) -> list[dict]: if entry.get("prefill") is not None: conc = entry.get("conc") if isinstance(conc, list) and len(conc) > 1: - entry = {**entry, "conc": [max(conc)]} + entry = {**entry, "conc": [min(conc)]} out.append(entry) continue @@ -77,7 +77,7 @@ def trim_conc(entries: list[dict]) -> list[dict]: drop: set[int] = set() for idxs in groups.values(): if len(idxs) > 1: - keep = max(idxs, key=lambda i: out[i]["conc"]) + keep = min(idxs, key=lambda i: out[i]["conc"]) drop.update(i for i in idxs if i != keep) return [e for i, e in enumerate(out) if i not in drop]