Run Sweep - Throwaway: conc-64 gsm8k eval for DEP8+MTP3 dispatch token bug #4613

Workflow file for this run

.github/workflows/run-sweep.yml at 45f69f5

	name: "Run Sweep"
	run-name: Run Sweep - ${{ github.event.pull_request.title \|\| github.event.head_commit.message }}

	concurrency:
	group: >-
	sweep-${{ github.event.pull_request.number \|\| github.sha }}-${{
	github.event_name == 'pull_request' &&
	(github.event.action == 'labeled' \|\| github.event.action == 'unlabeled') &&
	github.event.label.name != 'sweep-enabled' &&
	github.event.label.name != 'full-sweep-enabled' &&
	github.event.label.name != 'non-canary-full-sweep-enabled' &&
	github.run_id \|\|
	'active'
	}}
	cancel-in-progress: true

	on:
	push:
	branches:
	- main
	paths:
	- "perf-changelog.yaml"
	pull_request:
	branches:
	- main
	types:
	- ready_for_review
	- synchronize
	- labeled
	- unlabeled
	paths:
	- "perf-changelog.yaml"

	jobs:
	check-newline:
	runs-on: ubuntu-latest
	if: >-
	github.event_name == 'pull_request' &&
	!github.event.pull_request.draft &&
	(
	(github.event.action != 'labeled' && github.event.action != 'unlabeled') \|\|
	github.event.label.name == 'sweep-enabled' \|\|
	github.event.label.name == 'full-sweep-enabled' \|\|
	github.event.label.name == 'non-canary-full-sweep-enabled'
	)
	steps:
	- name: Checkout code
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

	- name: Check perf-changelog.yaml ends with newline
	run: \|
	if [ -n "$(tail -c 1 perf-changelog.yaml)" ]; then
	echo "::error::perf-changelog.yaml must end with a newline character"
	echo "Please add a newline at the end of the file to avoid diff issues in subsequent PRs."
	exit 1
	fi

	setup:
	runs-on: ubuntu-latest
	if: >-
	(
	github.event_name == 'pull_request' &&
	!github.event.pull_request.draft &&
	(
	contains(github.event.pull_request.labels.*.name, 'sweep-enabled') \|\|
	contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') \|\|
	contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')
	) &&
	(
	(github.event.action != 'labeled' && github.event.action != 'unlabeled') \|\|
	github.event.label.name == 'sweep-enabled' \|\|
	github.event.label.name == 'full-sweep-enabled' \|\|
	github.event.label.name == 'non-canary-full-sweep-enabled'
	)
	) \|\|
	(
	github.event_name != 'pull_request' &&
	!contains(github.event.head_commit.message, '[skip-sweep]')
	)
	outputs:
	search-space-config: ${{ steps.setup.outputs.search-space-config }}
	reuse-enabled: ${{ steps.setup.outputs.reuse-enabled }}
	reuse-source-run-id: ${{ steps.setup.outputs.reuse-source-run-id }}
	reuse-source-run-attempt: ${{ steps.setup.outputs.reuse-source-run-attempt }}
	reuse-source-run-url: ${{ steps.setup.outputs.reuse-source-run-url }}
	reuse-source-pr-number: ${{ steps.setup.outputs.reuse-source-pr-number }}
	reuse-source-head-sha: ${{ steps.setup.outputs.reuse-source-head-sha }}
	steps:
	- name: Reject conflicting sweep labels
	if: >-
	github.event_name == 'pull_request' &&
	(
	(contains(github.event.pull_request.labels..name, 'sweep-enabled') && contains(github.event.pull_request.labels..name, 'full-sweep-enabled')) \|\|
	(contains(github.event.pull_request.labels..name, 'sweep-enabled') && contains(github.event.pull_request.labels..name, 'non-canary-full-sweep-enabled')) \|\|
	(contains(github.event.pull_request.labels..name, 'full-sweep-enabled') && contains(github.event.pull_request.labels..name, 'non-canary-full-sweep-enabled'))
	)
	run: \|
	echo "::error::PR has multiple conflicting sweep labels. Pick exactly one of: 'sweep-enabled' (trims to min(conc) per parallelism config), 'full-sweep-enabled' (full intermediate concurrency sweep, with canary gate), or 'non-canary-full-sweep-enabled' (full sweep, no canary gate)."
	exit 1

	- name: Checkout code
	uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	fetch-depth: 0

	- id: setup
	env:
	GH_TOKEN: ${{ github.token }}
	TRIM_CONC: >-
	${{
	github.event_name == 'pull_request' &&
	contains(github.event.pull_request.labels.*.name, 'sweep-enabled')
	}}
	run: \|
	pip install pydantic

	if [ "${{ github.event_name }}" == "pull_request" ]; then
	BASE_REF="origin/${{ github.base_ref }}"
	HEAD_REF="${{ github.event.pull_request.head.sha }}"
	else
	BASE_REF="${{ github.event.before }}"
	HEAD_REF="${{ github.event.after }}"
	fi

	CMD=(
	python3 "${GITHUB_WORKSPACE}/utils/process_changelog.py"
	--changelog-file "${GITHUB_WORKSPACE}/perf-changelog.yaml"
	--base-ref "$BASE_REF"
	--head-ref "$HEAD_REF"
	)
	if [ "$TRIM_CONC" = "true" ]; then
	CMD+=(--trim-conc)
	fi

	CONFIG_JSON=$("${CMD[@]}")

	echo "search-space-config=$CONFIG_JSON" >> "$GITHUB_OUTPUT"
	python3 "${GITHUB_WORKSPACE}/utils/find_reusable_sweep_run.py" \
	--repo "${{ github.repository }}" \
	--commit-sha "${{ github.sha }}" \
	--event-name "${{ github.event_name }}" \
	--ref "${{ github.ref }}" \
	--workflow-id "run-sweep.yml"

	canary-select:
	needs: setup
	if: >-
	needs.setup.outputs.reuse-enabled != 'true' &&
	github.event_name == 'pull_request' &&
	contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled')
	runs-on: ubuntu-latest
	outputs:
	canary-config: ${{ steps.pick.outputs.canary-config }}
	remaining-search-space-config: ${{ steps.pick.outputs.remaining-search-space-config }}
	steps:
	- id: pick
	env:
	SEARCH_SPACE: ${{ needs.setup.outputs.search-space-config }}
	run: \|
	selection=$(jq -c '
	def remove_one($needle):
	if $needle == null then .
	else
	(index($needle)) as $idx
	\| if $idx == null then . else del(.[$idx]) end
	end;

	# Canary is a benchmark-only smoke test — exclude entries
	# whose primary purpose is eval (run-eval == true) so the
	# picked canary never runs an eval pass.
	(((.single_node["1k1k"] // []) + (.single_node["8k1k"] // []))
	\| map(select(.["run-eval"] != true))) as $candidates
	\| (if ($candidates \| length) == 0 then null else ($candidates \| min_by(.conc)) end) as $canary
	\| {
	canary: (if $canary == null then [] else [$canary] end),
	remaining: (
	.
	\| .single_node = (.single_node // {})
	\| .single_node["1k1k"] = ((.single_node["1k1k"] // []) \| remove_one($canary))
	\| .single_node["8k1k"] = ((.single_node["8k1k"] // []) \| remove_one($canary))
	)
	}
	' <<<"$SEARCH_SPACE")
	echo "canary-config=$(jq -c '.canary' <<<"$selection")" >> "$GITHUB_OUTPUT"
	echo "remaining-search-space-config=$(jq -c '.remaining' <<<"$selection")" >> "$GITHUB_OUTPUT"

	canary-sweep:
	needs: canary-select
	if: ${{ needs.canary-select.outputs.canary-config != '' && needs.canary-select.outputs.canary-config != '[]' }}
	uses: ./.github/workflows/benchmark-tmpl.yml
	name: canary /
	strategy:
	fail-fast: false
	matrix:
	config: ${{ fromJson(needs.canary-select.outputs.canary-config) }}
	secrets: inherit
	with:
	exp-name: ${{ matrix.config.exp-name }}
	isl: ${{ matrix.config.isl }}
	osl: ${{ matrix.config.osl }}
	max-model-len: ${{ matrix.config.max-model-len }}
	runner: ${{ matrix.config.runner }}
	image: ${{ matrix.config.image }}
	model: ${{ matrix.config.model }}
	model-prefix: ${{ matrix.config.model-prefix }}
	framework: ${{ matrix.config.framework }}
	precision: ${{ matrix.config.precision }}
	tp: ${{ matrix.config.tp }}
	ep: ${{ matrix.config.ep }}
	dp-attn: ${{ matrix.config.dp-attn }}
	conc: ${{ matrix.config.conc }}
	spec-decoding: ${{ matrix.config.spec-decoding }}
	disagg: ${{ matrix.config.disagg }}
	run-eval: false

	sweep-multi-node-1k1k:
	needs: [setup, canary-select, canary-sweep]
	if: >-
	${{
	!cancelled() &&
	needs.setup.result == 'success' &&
	needs.setup.outputs.reuse-enabled != 'true' &&
	(needs.canary-sweep.result == 'success' \|\| needs.canary-sweep.result == 'skipped') &&
	toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null'
	}}
	uses: ./.github/workflows/benchmark-multinode-tmpl.yml
	name: multi-node 1k1k /
	strategy:
	fail-fast: false
	matrix:
	config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k'] }}
	secrets: inherit
	with: &multi-node-inputs
	isl: ${{ matrix.config.isl }}
	osl: ${{ matrix.config.osl }}
	max-model-len: ${{ matrix.config.max-model-len }}
	runner: ${{ matrix.config.runner }}
	image: ${{ matrix.config.image }}
	model: ${{ matrix.config.model }}
	model-prefix: ${{ matrix.config.model-prefix }}
	framework: ${{ matrix.config.framework }}
	precision: ${{ matrix.config.precision }}
	exp-name: ${{ matrix.config.exp-name }}
	conc-list: ${{ toJson(matrix.config.conc) }}
	spec-decoding: ${{ matrix.config.spec-decoding }}
	disagg: ${{ matrix.config.disagg }}

	prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
	prefill-tp: ${{ matrix.config.prefill.tp }}
	prefill-ep: ${{ matrix.config.prefill.ep }}
	prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
	prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}

	decode-num-worker: ${{ matrix.config.decode.num-worker }}
	decode-tp: ${{ matrix.config.decode.tp }}
	decode-ep: ${{ matrix.config.decode.ep }}
	decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
	decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
	run-eval: false

	sweep-multi-node-8k1k:
	needs: [setup, canary-select, canary-sweep]
	if: >-
	${{
	!cancelled() &&
	needs.setup.result == 'success' &&
	needs.setup.outputs.reuse-enabled != 'true' &&
	(needs.canary-sweep.result == 'success' \|\| needs.canary-sweep.result == 'skipped') &&
	toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null'
	}}
	uses: ./.github/workflows/benchmark-multinode-tmpl.yml
	name: multi-node 8k1k /
	strategy:
	fail-fast: false
	matrix:
	config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k'] }}
	secrets: inherit
	with: *multi-node-inputs

	sweep-single-node-1k1k:
	needs: [setup, canary-select, canary-sweep]
	if: >-
	${{
	!cancelled() &&
	needs.setup.result == 'success' &&
	needs.setup.outputs.reuse-enabled != 'true' &&
	(needs.canary-sweep.result == 'success' \|\| needs.canary-sweep.result == 'skipped') &&
	toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) \|\| needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' &&
	toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) \|\| needs.setup.outputs.search-space-config).single_node['1k1k']) != '[]'
	}}
	uses: ./.github/workflows/benchmark-tmpl.yml
	name: single-node 1k1k /
	strategy:
	fail-fast: false
	matrix:
	config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) \|\| needs.setup.outputs.search-space-config).single_node['1k1k'] }}
	secrets: inherit
	with: &single-node-inputs
	exp-name: ${{ matrix.config.exp-name }}
	isl: ${{ matrix.config.isl }}
	osl: ${{ matrix.config.osl }}
	max-model-len: ${{ matrix.config.max-model-len }}
	runner: ${{ matrix.config.runner }}
	image: ${{ matrix.config.image }}
	model: ${{ matrix.config.model }}
	model-prefix: ${{ matrix.config.model-prefix }}
	framework: ${{ matrix.config.framework }}
	precision: ${{ matrix.config.precision }}
	tp: ${{ matrix.config.tp }}
	ep: ${{ matrix.config.ep }}
	dp-attn: ${{ matrix.config.dp-attn }}
	conc: ${{ matrix.config.conc }}
	spec-decoding: ${{ matrix.config.spec-decoding }}
	disagg: ${{ matrix.config.disagg }}
	run-eval: ${{ matrix.config.run-eval }}

	sweep-single-node-8k1k:
	needs: [setup, canary-select, canary-sweep]
	if: >-
	${{
	!cancelled() &&
	needs.setup.result == 'success' &&
	needs.setup.outputs.reuse-enabled != 'true' &&
	(needs.canary-sweep.result == 'success' \|\| needs.canary-sweep.result == 'skipped') &&
	toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) \|\| needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' &&
	toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) \|\| needs.setup.outputs.search-space-config).single_node['8k1k']) != '[]'
	}}
	uses: ./.github/workflows/benchmark-tmpl.yml
	name: single-node 8k1k /
	strategy:
	fail-fast: false
	matrix:
	config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) \|\| needs.setup.outputs.search-space-config).single_node['8k1k'] }}
	secrets: inherit
	with: *single-node-inputs

	sweep-agentic:
	needs: [setup, canary-select, canary-sweep]
	if: >-
	${{
	!cancelled() &&
	needs.setup.result == 'success' &&
	needs.setup.outputs.reuse-enabled != 'true' &&
	(needs.canary-sweep.result == 'success' \|\| needs.canary-sweep.result == 'skipped') &&
	toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null'
	}}
	uses: ./.github/workflows/benchmark-tmpl.yml
	name: agentic /
	strategy:
	fail-fast: false
	matrix:
	config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['agentic'] }}
	secrets: inherit
	with:
	exp-name: ${{ matrix.config.exp-name }}
	runner: ${{ matrix.config.runner }}
	image: ${{ matrix.config.image }}
	model: ${{ matrix.config.model }}
	model-prefix: ${{ matrix.config.model-prefix }}
	framework: ${{ matrix.config.framework }}
	precision: ${{ matrix.config.precision }}
	tp: ${{ matrix.config.tp }}
	ep: ${{ matrix.config.ep }}
	dp-attn: ${{ matrix.config.dp-attn }}
	conc: ${{ matrix.config.conc }}
	offloading: ${{ matrix.config.offloading }}
	duration: ${{ matrix.config.duration }}
	isl: '0'
	osl: '0'
	max-model-len: '0'
	spec-decoding: 'none'
	disagg: ${{ 'false' }}
	run-eval: false
	scenario-type: agentic-coding

	sweep-multi-node-agentic:
	needs: [setup, canary-select, canary-sweep]
	if: >-
	${{
	!cancelled() &&
	needs.setup.result == 'success' &&
	needs.setup.outputs.reuse-enabled != 'true' &&
	(needs.canary-sweep.result == 'success' \|\| needs.canary-sweep.result == 'skipped') &&
	toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null'
	}}
	uses: ./.github/workflows/benchmark-multinode-tmpl.yml
	name: multi-node agentic /
	strategy:
	fail-fast: false
	matrix:
	config: ${{ fromJson(needs.setup.outputs.search-space-config).multi_node['agentic'] }}
	secrets: inherit
	with:
	exp-name: ${{ matrix.config.exp-name }}
	isl: '0'
	osl: '0'
	max-model-len: '0'
	runner: ${{ matrix.config.runner }}
	image: ${{ matrix.config.image }}
	model: ${{ matrix.config.model }}
	model-prefix: ${{ matrix.config.model-prefix }}
	framework: ${{ matrix.config.framework }}
	precision: ${{ matrix.config.precision }}
	conc-list: '[${{ matrix.config.conc }}]'
	spec-decoding: ${{ matrix.config.spec-decoding }}
	disagg: ${{ matrix.config.disagg }}
	prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
	prefill-tp: ${{ matrix.config.prefill.tp }}
	prefill-ep: ${{ matrix.config.prefill.ep }}
	prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
	prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
	decode-num-worker: ${{ matrix.config.decode.num-worker }}
	decode-tp: ${{ matrix.config.decode.tp }}
	decode-ep: ${{ matrix.config.decode.ep }}
	decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
	decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
	conc: ${{ matrix.config.conc }}
	duration: ${{ matrix.config.duration }}
	run-eval: false
	scenario-type: agentic-coding

	sweep-evals:
	needs: [setup, canary-select, canary-sweep]
	if: >-
	${{
	!cancelled() &&
	needs.setup.result == 'success' &&
	needs.setup.outputs.reuse-enabled != 'true' &&
	(needs.canary-sweep.result == 'success' \|\| needs.canary-sweep.result == 'skipped') &&
	toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' &&
	toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null'
	}}
	uses: ./.github/workflows/benchmark-tmpl.yml
	name: eval /
	strategy:
	fail-fast: false
	matrix:
	config: ${{ fromJson(needs.setup.outputs.search-space-config).evals }}
	secrets: inherit
	with:
	exp-name: ${{ matrix.config.exp-name }}
	isl: ${{ matrix.config.isl }}
	osl: ${{ matrix.config.osl }}
	max-model-len: ${{ matrix.config.max-model-len }}
	runner: ${{ matrix.config.runner }}
	image: ${{ matrix.config.image }}
	model: ${{ matrix.config.model }}
	model-prefix: ${{ matrix.config.model-prefix }}
	framework: ${{ matrix.config.framework }}
	precision: ${{ matrix.config.precision }}
	tp: ${{ matrix.config.tp }}
	ep: ${{ matrix.config.ep }}
	dp-attn: ${{ matrix.config.dp-attn }}
	conc: ${{ matrix.config.conc }}
	spec-decoding: ${{ matrix.config.spec-decoding }}
	disagg: ${{ matrix.config.disagg }}
	run-eval: true
	eval-only: true

	sweep-multi-node-evals:
	needs: [setup, canary-select, canary-sweep]
	if: >-
	${{
	!cancelled() &&
	needs.setup.result == 'success' &&
	needs.setup.outputs.reuse-enabled != 'true' &&
	(needs.canary-sweep.result == 'success' \|\| needs.canary-sweep.result == 'skipped') &&
	toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' &&
	toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null'
	}}
	uses: ./.github/workflows/benchmark-multinode-tmpl.yml
	name: multi-node eval /
	strategy:
	fail-fast: false
	matrix:
	config: ${{ fromJson(needs.setup.outputs.search-space-config).multinode_evals }}
	secrets: inherit
	with:
	exp-name: ${{ matrix.config.exp-name }}
	isl: ${{ matrix.config.isl }}
	osl: ${{ matrix.config.osl }}
	max-model-len: ${{ matrix.config.max-model-len }}
	runner: ${{ matrix.config.runner }}
	image: ${{ matrix.config.image }}
	model: ${{ matrix.config.model }}
	model-prefix: ${{ matrix.config.model-prefix }}
	framework: ${{ matrix.config.framework }}
	precision: ${{ matrix.config.precision }}
	conc-list: ${{ toJson(matrix.config.conc) }}
	spec-decoding: ${{ matrix.config.spec-decoding }}
	disagg: ${{ matrix.config.disagg }}
	prefill-num-worker: ${{ matrix.config.prefill.num-worker }}
	prefill-tp: ${{ matrix.config.prefill.tp }}
	prefill-ep: ${{ matrix.config.prefill.ep }}
	prefill-dp-attn: ${{ matrix.config.prefill.dp-attn }}
	prefill-additional-settings: ${{ toJson(matrix.config.prefill.additional-settings) }}
	decode-num-worker: ${{ matrix.config.decode.num-worker }}
	decode-tp: ${{ matrix.config.decode.tp }}
	decode-ep: ${{ matrix.config.decode.ep }}
	decode-dp-attn: ${{ matrix.config.decode.dp-attn }}
	decode-additional-settings: ${{ toJson(matrix.config.decode.additional-settings) }}
	run-eval: true
	eval-only: true
	eval-conc: ${{ matrix.config.eval-conc }}

	collect-results:
	needs:
	[
	canary-sweep,
	sweep-single-node-1k1k,
	sweep-single-node-8k1k,
	sweep-agentic,
	sweep-multi-node-1k1k,
	sweep-multi-node-8k1k,
	sweep-multi-node-agentic,
	setup,
	]
	if: >-
	${{
	always() &&
	needs.setup.result == 'success' &&
	(
	needs.canary-sweep.result == 'success' \|\|
	needs.sweep-single-node-1k1k.result != 'skipped' \|\|
	needs.sweep-single-node-8k1k.result != 'skipped' \|\|
	needs.sweep-multi-node-1k1k.result != 'skipped' \|\|
	needs.sweep-multi-node-8k1k.result != 'skipped'
	)
	}}
	uses: ./.github/workflows/collect-results.yml
	secrets: inherit
	with:
	result-prefix: "bmk"

	collect-evals:
	needs: [sweep-evals, sweep-multi-node-evals, setup]
	if: ${{ always() && needs.setup.result != 'skipped' && (needs.sweep-evals.result != 'skipped' \|\| needs.sweep-multi-node-evals.result != 'skipped') }}
	uses: ./.github/workflows/collect-evals.yml
	secrets: inherit

	reuse-ingest-artifacts:
	needs: setup
	if: ${{ needs.setup.outputs.reuse-enabled == 'true' }}
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

	- name: Download reusable source artifacts
	env:
	GH_TOKEN: ${{ secrets.REPO_PAT \|\| github.token }}
	SOURCE_RUN_ID: ${{ needs.setup.outputs.reuse-source-run-id }}
	run: \|
	gh run download "$SOURCE_RUN_ID" \
	--repo "${{ github.repository }}" \
	-D source-artifacts

	# Keep only artifacts consumed by the official ingest path.
	# The merge run uploads its own changelog metadata; reusable
	# benchmark/eval rows are attributed to the source PR sweep.
	rm -rf source-artifacts/changelog-metadata
	for artifact_dir in source-artifacts/*; do
	[ -e "$artifact_dir" ] \|\| continue
	name=$(basename "$artifact_dir")
	case "$name" in
	results_bmk\|eval_results_all\|run-stats\|bmk_\|eval_\|server_logs_\|multinode_server_logs_\|agentic_aggregated)
	;;
	*)
	rm -rf "$artifact_dir"
	;;
	esac
	done

	mkdir -p source-artifacts/reused-ingest-metadata
	cat > source-artifacts/reused-ingest-metadata/reuse_source_run.json <<'JSON'
	{
	"source_run_id": "${{ needs.setup.outputs.reuse-source-run-id }}",
	"source_run_attempt": "${{ needs.setup.outputs.reuse-source-run-attempt }}",
	"source_run_url": "${{ needs.setup.outputs.reuse-source-run-url }}",
	"source_pr_number": "${{ needs.setup.outputs.reuse-source-pr-number }}",
	"source_head_sha": "${{ needs.setup.outputs.reuse-source-head-sha }}",
	"ingest_run_id": "${{ github.run_id }}",
	"ingest_run_attempt": "${{ github.run_attempt }}",
	"ingest_run_url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
	}
	JSON

	echo "Reusing artifacts from $SOURCE_RUN_ID:"
	find source-artifacts -maxdepth 1 -mindepth 1 -type d -printf ' %f\n' \| sort

	- name: Validate reusable artifacts
	run: \|
	cat <<'CONFIGEOF' > _full_config.json
	${{ needs.setup.outputs.search-space-config }}
	CONFIGEOF
	python3 utils/validate_reusable_sweep_artifacts.py \
	--config-json _full_config.json \
	--artifacts-dir source-artifacts

	- name: Upload reusable ingest artifacts
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: reused-ingest-artifacts
	path: source-artifacts/*

	upload-changelog-metadata:
	needs: [setup, collect-results]
	if: ${{ always() && needs.setup.result == 'success' }}
	runs-on: ubuntu-latest
	steps:
	- name: Extract and save changelog metadata
	run: \|
	cat <<'CONFIGEOF' > _full_config.json
	${{ needs.setup.outputs.search-space-config }}
	CONFIGEOF
	jq '.changelog_metadata' _full_config.json > changelog_metadata.json
	rm -f _full_config.json

	- name: Upload changelog artifact
	uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: changelog-metadata
	path: changelog_metadata.json

	calc-success-rate:
	needs: collect-results
	if: ${{ always() && needs.collect-results.result != 'skipped'}}
	runs-on: ubuntu-latest

	env:
	RESULTS_DIR: "results/"
	STATS_FILENAME: "run_stats"
	GITHUB_TOKEN: ${{ secrets.REPO_PAT }}

	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
	with:
	token: ${{ secrets.REPO_PAT }}
	fetch-depth: 0

	- name: Download results artifacts
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
	with:
	path: ${{ env.RESULTS_DIR }}
	pattern: results_*

	- name: Install python dependencies
	run: pip install PyGithub

	- name: Calculate success rate
	run: python3 utils/calc_success_rate.py "$STATS_FILENAME"

	- uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
	with:
	name: "run-stats"
	path: ${{ env.STATS_FILENAME }}.json

	compare-results:
	needs:
	[
	collect-results,
	setup,
	]
	if: >-
	always() &&
	github.event_name == 'pull_request' &&
	needs.collect-results.result == 'success'
	runs-on: ubuntu-latest

	env:
	DATABASE_URL: ${{ secrets.NEON_PROD_RO_URL }}

	steps:
	- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

	- name: Download results artifacts
	uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
	with:
	path: results/
	pattern: results_bmk

	- name: Install dependencies
	run: pip install psycopg2-binary tabulate

	- name: Compare results against main
	run: python3 utils/compare_results.py results/ >> "$GITHUB_STEP_SUMMARY"

	trigger-ingest:
	needs:
	[
	collect-results,
	collect-evals,
	calc-success-rate,
	upload-changelog-metadata,
	reuse-ingest-artifacts,
	]
	if: >-
	always() &&
	github.event_name == 'push' &&
	github.ref == 'refs/heads/main' &&
	(
	needs.collect-results.result != 'skipped' \|\|
	needs.collect-evals.result != 'skipped' \|\|
	needs.reuse-ingest-artifacts.result == 'success'
	)
	runs-on: ubuntu-latest
	steps:
	- name: Trigger database ingest
	run: \|
	curl -sSf -X POST \
	-H "Authorization: Bearer ${{ secrets.INFX_FRONTEND_PAT }}" \
	-H "Accept: application/vnd.github+v3+json" \
	https://api.github.com/repos/SemiAnalysisAI/InferenceX-app/dispatches \
	-d '{
	"event_type": "ingest-results",
	"client_payload": {
	"run-id": "${{ github.run_id }}",
	"run-attempt": "${{ github.run_attempt }}"
	}
	}'

	comment-unofficial-run-visualizer:
	needs:
	[
	collect-results,
	collect-evals,
	calc-success-rate,
	upload-changelog-metadata,
	]
	if: >-
	always() &&
	github.event_name == 'pull_request' &&
	!github.event.pull_request.draft &&
	(
	contains(github.event.pull_request.labels.*.name, 'sweep-enabled') \|\|
	contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') \|\|
	contains(github.event.pull_request.labels.*.name, 'non-canary-full-sweep-enabled')
	) &&
	(
	(github.event.action != 'labeled' && github.event.action != 'unlabeled') \|\|
	github.event.label.name == 'sweep-enabled' \|\|
	github.event.label.name == 'full-sweep-enabled' \|\|
	github.event.label.name == 'non-canary-full-sweep-enabled'
	)
	runs-on: ubuntu-latest
	permissions:
	pull-requests: write
	steps:
	- name: Comment unofficial run visualizer link on PR
	uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
	with:
	github-token: ${{ github.token }}
	script: \|
	const inferenceUrl = `https://inferencex.semianalysis.com/inference?unofficialRun=${context.runId}`;
	const evaluationUrl = `https://inferencex.semianalysis.com/evaluation?unofficialRun=${context.runId}`;
	const body = [
	`see unofficial run visualizer at ${inferenceUrl}`,
	`see unofficial run visualizer at ${evaluationUrl}`,
	].join('\n');
	await github.rest.issues.createComment({
	owner: context.repo.owner,
	repo: context.repo.repo,
	issue_number: context.issue.number,
	body,
	});

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Run Sweep - Throwaway: conc-64 gsm8k eval for DEP8+MTP3 dispatch token bug #4613

Workflow file

Run Sweep - Throwaway: conc-64 gsm8k eval for DEP8+MTP3 dispatch token bug #4613

Uh oh!

Workflow file for this run