Skip to content

Run Sweep - chore(sweep): re-run MiniMax-M2.5 vLLM sweeps to capture power telemetry #4664

Run Sweep - chore(sweep): re-run MiniMax-M2.5 vLLM sweeps to capture power telemetry

Run Sweep - chore(sweep): re-run MiniMax-M2.5 vLLM sweeps to capture power telemetry #4664

Workflow file for this run

name: "Run Sweep"
run-name: Run Sweep - ${{ github.event.pull_request.title || github.event.head_commit.message }}
concurrency:
group: >-
sweep-${{ github.event.pull_request.number || github.sha }}-${{
github.event_name == 'pull_request' &&
(github.event.action == 'labeled' || github.event.action == 'unlabeled') &&
github.event.label.name != 'sweep-enabled' &&
github.event.label.name != 'full-sweep-enabled' &&
github.event.label.name != 'non-canary-full-sweep-enabled' &&
github.run_id ||
'active'
}}
cancel-in-progress: true
on:
push:
branches:
- main
paths:
- "perf-changelog.yaml"
pull_request:
branches:
- main
types:
- ready_for_review
- synchronize
- labeled
- unlabeled
paths:
- "perf-changelog.yaml"
jobs:
check-newline:
runs-on: ubuntu-latest
if: >-
github.event_name == 'pull_request' &&
!github.event.pull_request.draft &&
(
(github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
github.event.label.name == 'sweep-enabled' ||
github.event.label.name == 'full-sweep-enabled' ||
github.event.label.name == 'non-canary-full-sweep-enabled'
)
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Check perf-changelog.yaml ends with newline
run: |
if [ -n "$(tail -c 1 perf-changelog.yaml)" ]; then
echo "::error::perf-changelog.yaml must end with a newline character"
echo "Please add a newline at the end of the file to avoid diff issues in subsequent PRs."
exit 1
fi
setup:
runs-on: ubuntu-latest
if: >-
(
github.event_name == 'pull_request' &&
!github.event.pull_request.draft &&
(
contains(github.event.pull_request.labels.*.name, 'sweep-enabled') ||
contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') ||
contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')
) &&
(
(github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
github.event.label.name == 'sweep-enabled' ||
github.event.label.name == 'full-sweep-enabled' ||
github.event.label.name == 'non-canary-full-sweep-enabled'
)
) ||
(
github.event_name != 'pull_request' &&
!contains(github.event.head_commit.message, '[skip-sweep]')
)
outputs:
search-space-config: ${{ steps.setup.outputs.search-space-config }}
reuse-enabled: ${{ steps.setup.outputs.reuse-enabled }}
reuse-source-run-id: ${{ steps.setup.outputs.reuse-source-run-id }}
reuse-source-run-attempt: ${{ steps.setup.outputs.reuse-source-run-attempt }}
reuse-source-run-url: ${{ steps.setup.outputs.reuse-source-run-url }}
reuse-source-pr-number: ${{ steps.setup.outputs.reuse-source-pr-number }}
reuse-source-head-sha: ${{ steps.setup.outputs.reuse-source-head-sha }}
steps:
- name: Reject conflicting sweep labels
if: >-
github.event_name == 'pull_request' &&
(
(contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')) ||
(contains(github.event.pull_request.labels.*.name, 'sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')) ||
(contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') && contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled'))
)
run: |
echo "::error::PR has multiple conflicting sweep labels. Pick exactly one of: 'sweep-enabled' (trims to min(conc) per parallelism config), 'full-sweep-enabled' (full intermediate concurrency sweep, with canary gate), or 'non-canary-full-sweep-enabled' (full sweep, no canary gate)."
exit 1
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0
- id: setup
env:
GH_TOKEN: ${{ github.token }}
TRIM_CONC: >-
${{
github.event_name == 'pull_request' &&
contains(github.event.pull_request.labels.*.name, 'sweep-enabled')
}}
run: |
pip install pydantic
if [ "${{ github.event_name }}" == "pull_request" ]; then
BASE_REF="origin/${{ github.base_ref }}"
HEAD_REF="${{ github.event.pull_request.head.sha }}"
else
BASE_REF="${{ github.event.before }}"
HEAD_REF="${{ github.event.after }}"
fi
CMD=(
python3 "${GITHUB_WORKSPACE}/utils/process_changelog.py"
--changelog-file "${GITHUB_WORKSPACE}/perf-changelog.yaml"
--base-ref "$BASE_REF"
--head-ref "$HEAD_REF"
)
if [ "$TRIM_CONC" = "true" ]; then
CMD+=(--trim-conc)
fi
CONFIG_JSON=$("${CMD[@]}")
echo "search-space-config=$CONFIG_JSON" >> "$GITHUB_OUTPUT"
python3 "${GITHUB_WORKSPACE}/utils/find_reusable_sweep_run.py" \
--repo "${{ github.repository }}" \
--commit-sha "${{ github.sha }}" \
--event-name "${{ github.event_name }}" \
--ref "${{ github.ref }}" \
--workflow-id "run-sweep.yml"
canary-select:
needs: setup
if: >-
needs.setup.outputs.reuse-enabled != 'true' &&
github.event_name == 'pull_request' &&
contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
runs-on: ubuntu-latest
outputs:
canary-config: ${{ steps.pick.outputs.canary-config }}
remaining-search-space-config: ${{ steps.pick.outputs.remaining-search-space-config }}
steps:
- id: pick
env:
SEARCH_SPACE: ${{ needs.setup.outputs.search-space-config }}
run: |
selection=$(jq -c '
def remove_one($needle):
if $needle == null then .
else
(index($needle)) as $idx
| if $idx == null then . else del(.[$idx]) end
end;
# Canary is a benchmark-only smoke test — exclude entries
# whose primary purpose is eval (run-eval == true) so the
# picked canary never runs an eval pass.
(((.single_node["1k1k"] // []) + (.single_node["8k1k"] // []))
| map(select(.["run-eval"] != true))) as $candidates
| (if ($candidates | length) == 0 then null else ($candidates | min_by(.conc)) end) as $canary
| {
canary: (if $canary == null then [] else [$canary] end),
remaining: (
.
| .single_node = (.single_node // {})
| .single_node["1k1k"] = ((.single_node["1k1k"] // []) | remove_one($canary))
| .single_node["8k1k"] = ((.single_node["8k1k"] // []) | remove_one($canary))
)
}
' <<<"$SEARCH_SPACE")
echo "canary-config=$(jq -c '.canary' <<<"$selection")" >> "$GITHUB_OUTPUT"
echo "remaining-search-space-config=$(jq -c '.remaining' <<<"$selection")" >> "$GITHUB_OUTPUT"
canary-sweep:
needs: canary-select
if: ${{ needs.canary-select.outputs.canary-config != '' && needs.canary-select.outputs.canary-config != '[]' }}
uses: ./.github/workflows/benchmark-tmpl.yml
name: canary /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.canary-select.outputs.canary-config) }}
secrets: inherit
with:
exp-name: ${{ matrix.config.exp-name }}
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
tp: ${{ matrix.config.tp }}
ep: ${{ matrix.config.ep }}
dp-attn: ${{ matrix.config.dp-attn }}
conc: ${{ matrix.config.conc }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
run-eval: false
sweep-multi-node-1k1k:
needs: [setup, canary-select, canary-sweep]
if: >-
${{
!cancelled() &&
needs.setup.result == 'success' &&
needs.setup.outputs.reuse-enabled != 'true' &&
(needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null'
}}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node 1k1k /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}
secrets: inherit
with: &multi-node-inputs
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
exp-name: ${{ matrix.config.exp-name }}
conc-list: ${{ toJson(matrix.config.conc) }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
prefill-tp: ${{ matrix.config.prefill.tp }}
prefill-ep: ${{ matrix.config.prefill.ep }}
prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
decode-num-worker: ${{ matrix.config.decode.num-worker }}
decode-tp: ${{ matrix.config.decode.tp }}
decode-ep: ${{ matrix.config.decode.ep }}
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
run-eval: false
sweep-multi-node-8k1k:
needs: [setup, canary-select, canary-sweep]
if: >-
${{
!cancelled() &&
needs.setup.result == 'success' &&
needs.setup.outputs.reuse-enabled != 'true' &&
(needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null'
}}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node 8k1k /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k'] }}
secrets: inherit
with: *multi-node-inputs
sweep-single-node-1k1k:
needs: [setup, canary-select, canary-sweep]
if: >-
${{
!cancelled() &&
needs.setup.result == 'success' &&
needs.setup.outputs.reuse-enabled != 'true' &&
(needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' &&
toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != '[]'
}}
uses: ./.github/workflows/benchmark-tmpl.yml
name: single-node 1k1k /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k'] }}
secrets: inherit
with: &single-node-inputs
exp-name: ${{ matrix.config.exp-name }}
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
tp: ${{ matrix.config.tp }}
ep: ${{ matrix.config.ep }}
dp-attn: ${{ matrix.config.dp-attn }}
conc: ${{ matrix.config.conc }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
run-eval: ${{ matrix.config.run-eval }}
sweep-single-node-8k1k:
needs: [setup, canary-select, canary-sweep]
if: >-
${{
!cancelled() &&
needs.setup.result == 'success' &&
needs.setup.outputs.reuse-enabled != 'true' &&
(needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' &&
toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != '[]'
}}
uses: ./.github/workflows/benchmark-tmpl.yml
name: single-node 8k1k /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k'] }}
secrets: inherit
with: *single-node-inputs
sweep-agentic:
needs: [setup, canary-select, canary-sweep]
if: >-
${{
!cancelled() &&
needs.setup.result == 'success' &&
needs.setup.outputs.reuse-enabled != 'true' &&
(needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null'
}}
uses: ./.github/workflows/benchmark-tmpl.yml
name: agentic /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['agentic'] }}
secrets: inherit
with:
exp-name: ${{ matrix.config.exp-name }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
tp: ${{ matrix.config.tp }}
ep: ${{ matrix.config.ep }}
dp-attn: ${{ matrix.config.dp-attn }}
conc: ${{ matrix.config.conc }}
offloading: ${{ matrix.config.offloading }}
duration: ${{ matrix.config.duration }}
isl: '0'
osl: '0'
max-model-len: '0'
spec-decoding: 'none'
disagg: ${{ 'false' }}
run-eval: false
scenario-type: agentic-coding
sweep-multi-node-agentic:
needs: [setup, canary-select, canary-sweep]
if: >-
${{
!cancelled() &&
needs.setup.result == 'success' &&
needs.setup.outputs.reuse-enabled != 'true' &&
(needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null'
}}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node agentic /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['agentic'] }}
secrets: inherit
with:
exp-name: ${{ matrix.config.exp-name }}
isl: '0'
osl: '0'
max-model-len: '0'
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
conc-list: '[${{ matrix.config.conc }}]'
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
prefill-tp: ${{ matrix.config.prefill.tp }}
prefill-ep: ${{ matrix.config.prefill.ep }}
prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
decode-num-worker: ${{ matrix.config.decode.num-worker }}
decode-tp: ${{ matrix.config.decode.tp }}
decode-ep: ${{ matrix.config.decode.ep }}
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
conc: ${{ matrix.config.conc }}
duration: ${{ matrix.config.duration }}
run-eval: false
scenario-type: agentic-coding
sweep-evals:
needs: [setup, canary-select, canary-sweep]
if: >-
${{
!cancelled() &&
needs.setup.result == 'success' &&
needs.setup.outputs.reuse-enabled != 'true' &&
(needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' &&
toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null'
}}
uses: ./.github/workflows/benchmark-tmpl.yml
name: eval /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).evals }}
secrets: inherit
with:
exp-name: ${{ matrix.config.exp-name }}
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
tp: ${{ matrix.config.tp }}
ep: ${{ matrix.config.ep }}
dp-attn: ${{ matrix.config.dp-attn }}
conc: ${{ matrix.config.conc }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
run-eval: true
eval-only: true
sweep-multi-node-evals:
needs: [setup, canary-select, canary-sweep]
if: >-
${{
!cancelled() &&
needs.setup.result == 'success' &&
needs.setup.outputs.reuse-enabled != 'true' &&
(needs.canary-sweep.result == 'success' || needs.canary-sweep.result == 'skipped') &&
toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' &&
toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null'
}}
uses: ./.github/workflows/benchmark-multinode-tmpl.yml
name: multi-node eval /
strategy:
fail-fast: false
matrix:
config: ${{ fromJson(needs.setup.outputs.search-space-config).multinode_evals }}
secrets: inherit
with:
exp-name: ${{ matrix.config.exp-name }}
isl: ${{ matrix.config.isl }}
osl: ${{ matrix.config.osl }}
max-model-len: ${{ matrix.config.max-model-len }}
runner: ${{ matrix.config.runner }}
image: ${{ matrix.config.image }}
model: ${{ matrix.config.model }}
model-prefix: ${{ matrix.config.model-prefix }}
framework: ${{ matrix.config.framework }}
precision: ${{ matrix.config.precision }}
conc-list: ${{ toJson(matrix.config.conc) }}
spec-decoding: ${{ matrix.config.spec-decoding }}
disagg: ${{ matrix.config.disagg }}
prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
prefill-tp: ${{ matrix.config.prefill.tp }}
prefill-ep: ${{ matrix.config.prefill.ep }}
prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
decode-num-worker: ${{ matrix.config.decode.num-worker }}
decode-tp: ${{ matrix.config.decode.tp }}
decode-ep: ${{ matrix.config.decode.ep }}
decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
run-eval: true
eval-only: true
eval-conc: ${{ matrix.config.eval-conc }}
collect-results:
needs:
[
canary-sweep,
sweep-single-node-1k1k,
sweep-single-node-8k1k,
sweep-agentic,
sweep-multi-node-1k1k,
sweep-multi-node-8k1k,
sweep-multi-node-agentic,
setup,
]
if: >-
${{
always() &&
needs.setup.result == 'success' &&
(
needs.canary-sweep.result == 'success' ||
needs.sweep-single-node-1k1k.result != 'skipped' ||
needs.sweep-single-node-8k1k.result != 'skipped' ||
needs.sweep-multi-node-1k1k.result != 'skipped' ||
needs.sweep-multi-node-8k1k.result != 'skipped'
)
}}
uses: ./.github/workflows/collect-results.yml
secrets: inherit
with:
result-prefix: "bmk"
collect-evals:
needs: [sweep-evals, sweep-multi-node-evals, setup]
if: ${{ always() && needs.setup.result != 'skipped' && (needs.sweep-evals.result != 'skipped' || needs.sweep-multi-node-evals.result != 'skipped') }}
uses: ./.github/workflows/collect-evals.yml
secrets: inherit
reuse-ingest-artifacts:
needs: setup
if: ${{ needs.setup.outputs.reuse-enabled == 'true' }}
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Download reusable source artifacts
env:
GH_TOKEN: ${{ secrets.REPO_PAT || github.token }}
SOURCE_RUN_ID: ${{ needs.setup.outputs.reuse-source-run-id }}
run: |
gh run download "$SOURCE_RUN_ID" \
--repo "${{ github.repository }}" \
-D source-artifacts
# Keep only artifacts consumed by the official ingest path.
# The merge run uploads its own changelog metadata; reusable
# benchmark/eval rows are attributed to the source PR sweep.
rm -rf source-artifacts/changelog-metadata
for artifact_dir in source-artifacts/*; do
[ -e "$artifact_dir" ] || continue
name=$(basename "$artifact_dir")
case "$name" in
results_bmk|eval_results_all|run-stats|bmk_*|eval_*|server_logs_*|multinode_server_logs_*|agentic_aggregated)
;;
*)
rm -rf "$artifact_dir"
;;
esac
done
mkdir -p source-artifacts/reused-ingest-metadata
cat > source-artifacts/reused-ingest-metadata/reuse_source_run.json <<'JSON'
{
"source_run_id": "${{ needs.setup.outputs.reuse-source-run-id }}",
"source_run_attempt": "${{ needs.setup.outputs.reuse-source-run-attempt }}",
"source_run_url": "${{ needs.setup.outputs.reuse-source-run-url }}",
"source_pr_number": "${{ needs.setup.outputs.reuse-source-pr-number }}",
"source_head_sha": "${{ needs.setup.outputs.reuse-source-head-sha }}",
"ingest_run_id": "${{ github.run_id }}",
"ingest_run_attempt": "${{ github.run_attempt }}",
"ingest_run_url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
}
JSON
echo "Reusing artifacts from $SOURCE_RUN_ID:"
find source-artifacts -maxdepth 1 -mindepth 1 -type d -printf ' %f\n' | sort
- name: Validate reusable artifacts
run: |
cat <<'CONFIGEOF' > _full_config.json
${{ needs.setup.outputs.search-space-config }}
CONFIGEOF
python3 utils/validate_reusable_sweep_artifacts.py \
--config-json _full_config.json \
--artifacts-dir source-artifacts
- name: Upload reusable ingest artifacts
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: reused-ingest-artifacts
path: source-artifacts/*
upload-changelog-metadata:
needs: [setup, collect-results]
if: ${{ always() && needs.setup.result == 'success' }}
runs-on: ubuntu-latest
steps:
- name: Extract and save changelog metadata
run: |
cat <<'CONFIGEOF' > _full_config.json
${{ needs.setup.outputs.search-space-config }}
CONFIGEOF
jq '.changelog_metadata' _full_config.json > changelog_metadata.json
rm -f _full_config.json
- name: Upload changelog artifact
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: changelog-metadata
path: changelog_metadata.json
calc-success-rate:
needs: collect-results
if: ${{ always() && needs.collect-results.result != 'skipped'}}
runs-on: ubuntu-latest
env:
RESULTS_DIR: "results/"
STATS_FILENAME: "run_stats"
GITHUB_TOKEN: ${{ secrets.REPO_PAT }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
token: ${{ secrets.REPO_PAT }}
fetch-depth: 0
- name: Download results artifacts
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
path: ${{ env.RESULTS_DIR }}
pattern: results_*
- name: Install python dependencies
run: pip install PyGithub
- name: Calculate success rate
run: python3 utils/calc_success_rate.py "$STATS_FILENAME"
- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: "run-stats"
path: ${{ env.STATS_FILENAME }}.json
compare-results:
needs:
[
collect-results,
setup,
]
if: >-
always() &&
github.event_name == 'pull_request' &&
needs.collect-results.result == 'success'
runs-on: ubuntu-latest
env:
DATABASE_URL: ${{ secrets.NEON_PROD_RO_URL }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Download results artifacts
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
path: results/
pattern: results_bmk
- name: Install dependencies
run: pip install psycopg2-binary tabulate
- name: Compare results against main
run: python3 utils/compare_results.py results/ >> "$GITHUB_STEP_SUMMARY"
trigger-ingest:
needs:
[
collect-results,
collect-evals,
calc-success-rate,
upload-changelog-metadata,
reuse-ingest-artifacts,
]
if: >-
always() &&
github.event_name == 'push' &&
github.ref == 'refs/heads/main' &&
(
needs.collect-results.result != 'skipped' ||
needs.collect-evals.result != 'skipped' ||
needs.reuse-ingest-artifacts.result == 'success'
)
runs-on: ubuntu-latest
steps:
- name: Trigger database ingest
run: |
curl -sSf -X POST \
-H "Authorization: Bearer ${{ secrets.INFX_FRONTEND_PAT }}" \
-H "Accept: application/vnd.github+v3+json" \
https://api.github.com/repos/SemiAnalysisAI/InferenceX-app/dispatches \
-d '{
"event_type": "ingest-results",
"client_payload": {
"run-id": "${{ github.run_id }}",
"run-attempt": "${{ github.run_attempt }}"
}
}'
comment-unofficial-run-visualizer:
needs:
[
collect-results,
collect-evals,
calc-success-rate,
upload-changelog-metadata,
]
if: >-
always() &&
github.event_name == 'pull_request' &&
!github.event.pull_request.draft &&
(
contains(github.event.pull_request.labels.*.name, 'sweep-enabled') ||
contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') ||
contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')
) &&
(
(github.event.action != 'labeled' && github.event.action != 'unlabeled') ||
github.event.label.name == 'sweep-enabled' ||
github.event.label.name == 'full-sweep-enabled' ||
github.event.label.name == 'non-canary-full-sweep-enabled'
)
runs-on: ubuntu-latest
permissions:
pull-requests: write
steps:
- name: Comment unofficial run visualizer link on PR
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
with:
github-token: ${{ github.token }}
script: |
const inferenceUrl = `https://inferencex.semianalysis.com/inference?unofficialRun=${context.runId}`;
const evaluationUrl = `https://inferencex.semianalysis.com/evaluation?unofficialRun=${context.runId}`;
const body = [
`see unofficial run visualizer at ${inferenceUrl}`,
`see unofficial run visualizer at ${evaluationUrl}`,
].join('\n');
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body,
});