Skip to content

Commit e33bb2f

Browse files
committed
feat(ci3): run uploadable benchmarks on a dedicated on-demand instance
Spot diversification means build instances land on variable EC2 types, so the in-build benchmark phase produced hardware-dependent numbers that tripped the 105% regression alert. Move the uploadable benchmark series onto a dedicated, fixed, on-demand m6a.16xlarge for stable single-run results. - bootstrap.sh: drop the inline `bench` from ci-full/ci-full-no-test-cache. In build_and_test (full builds only), once the build completes: on uploadable runs (SHOULD_UPLOAD_BENCHMARKS=1) launch the dedicated box via `./ci.sh bench` as a backgrounded, colored, denoised job (waited on, non-fatally, before return); otherwise append bench_cmds to the test stream so benches run as ordinary tests under contention — a breakage check, no upload. New `ci-bench` mode = cache-hit `make full` + `bench` (no test engine), uploading the existing bench-<treehash> cache key. - ci.sh: new `bench` launcher — AWS_INSTANCE=m6a.16xlarge NO_SPOT=1 pins a fixed on-demand type (CPUS unneeded; AWS_INSTANCE bypasses pool sizing). - bench_engine: drop the 8-core OS isolation / HT-disable / pinning. The box is dedicated, so benches run against the full machine honouring per-bench CPUS via the strict scheduler (which already defaults to nproc/2 without BENCH_CPU_COUNT). - ci3_labels_to_env.sh: scope SHOULD_UPLOAD_BENCHMARKS to merge-queue->next (it now also gates the dedicated box). bootstrap_ec2: pass it through to the instance. Results reach the GA upload step unchanged via the bench-<treehash> cache key (ci3_success.sh `gh-bench`). Requires the ci3-build-instance-role launch perms (separate iac PR) before enabling. Expect a one-time baseline shift in bench/next (different machine + no isolation).
1 parent 0193510 commit e33bb2f

5 files changed

Lines changed: 43 additions & 52 deletions

File tree

.github/ci3_labels_to_env.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,11 @@ function main {
149149
echo "CI_MODE=$ci_mode" >> $GITHUB_ENV
150150
echo "CI mode: $ci_mode"
151151

152-
# Determine if benchmarks should be uploaded (merge-queue, full, or full-no-test-cache modes)
153-
if [[ "$ci_mode" == "merge-queue" || "$ci_mode" == "merge-queue-heavy" || "$ci_mode" == "full" || "$ci_mode" == "full-no-test-cache" ]]; then
152+
# Only the canonical "about to land on next" series produces uploadable benchmark
153+
# numbers. This flag now also gates spinning up the dedicated on-demand bench box
154+
# (build_and_test reads it on the instance), so keep it scoped to merge-queue->next.
155+
# Other full/merge-queue runs run benches inline as a breakage check, no upload.
156+
if [[ ("$ci_mode" == "merge-queue" || "$ci_mode" == "merge-queue-heavy") && "$target_branch" == "next" ]]; then
154157
echo "SHOULD_UPLOAD_BENCHMARKS=1" >> $GITHUB_ENV
155158
fi
156159

bootstrap.sh

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,18 @@ function build_and_test {
435435
start_txes
436436
make noir-projects-txe-tests
437437

438+
# Benches (full builds only). For uploadable runs we want stable numbers, so
439+
# launch a dedicated fixed on-demand instance to run them (backgrounded and
440+
# logged like the test engine, waited on below). Otherwise the benches just
441+
# become tests, run by this engine under contention as a breakage check.
442+
if [ "$1" == full ]; then
443+
if [ "${SHOULD_UPLOAD_BENCHMARKS:-0}" == 1 ]; then
444+
setsid color_prefix "bench" "denoise './ci.sh bench'" & bench_pid=$!
445+
else
446+
bench_cmds >> $test_cmds_file
447+
fi
448+
fi
449+
438450
# Signal tests complete, handled by parallel -E STOP.
439451
echo STOP >> $test_cmds_file
440452
fi
@@ -447,6 +459,13 @@ function build_and_test {
447459

448460
stop_txes
449461

462+
# Wait for the dedicated bench instance, if one was launched. Non-fatal: bench
463+
# infra shouldn't block the run — a failure just means no fresh numbers to upload.
464+
if [ -n "${bench_pid:-}" ]; then
465+
echo "Waiting for dedicated bench run..."
466+
wait "$bench_pid" || echo_stderr "Dedicated bench run failed (non-fatal)."
467+
fi
468+
450469
return 0
451470
}
452471

@@ -750,13 +769,22 @@ case "$cmd" in
750769
export USE_TEST_CACHE=1
751770
export CI_FULL=1
752771
build_and_test full
753-
bench
754772
;;
755773
"ci-full-no-test-cache")
756774
export CI=1
757775
export USE_TEST_CACHE=0
758776
export CI_FULL=1
759777
build_and_test full
778+
;;
779+
"ci-bench")
780+
# Run on a dedicated, fixed, on-demand instance (launched by the build
781+
# instance via './ci.sh bench') for stable benchmark numbers. The build is a
782+
# near-instant cache pull, as the launching build instance already populated
783+
# the cache for this commit. No test engine; bench uploads bench-<treehash>.
784+
export CI=1
785+
export CI_FULL=1
786+
prep
787+
make full
760788
bench
761789
;;
762790
"ci-chonk-input-update")

ci.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,14 @@ case "$cmd" in
122122
# GitHub status check name is unchanged.
123123
multi_job_run "x-$cmd amd64 ci-$cmd"
124124
;;
125+
bench)
126+
# Launched by the build instance on uploadable runs to produce stable benchmark
127+
# numbers on a dedicated, fixed, on-demand instance. AWS_INSTANCE pins the exact
128+
# type (bypasses spot pool diversification); NO_SPOT forces on-demand. CI_DASHBOARD
129+
# and PARENT_LOG_ID are inherited from the launching run so it nests as a sibling job.
130+
AWS_INSTANCE=m6a.16xlarge NO_SPOT=1 JOB_ID=bench INSTANCE_POSTFIX=bench \
131+
bootstrap_ec2 "./bootstrap.sh ci-bench"
132+
;;
125133
socket-fix)
126134
export CI_DASHBOARD="prs"
127135
export JOB_ID="x-socket-fix"

ci3/bench_engine

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,57 +1,10 @@
11
#!/usr/bin/env bash
22
# Uses strict scheduling to run benchmarks in parallel on their own cpus.
33
# For benchmarks that can't be parallelized, runs them one at a time to avoid resource contention.
4-
# Isolates benchmark CPUs from OS and pins all other processes to non-bench CPUs to avoid interference.
54
NO_CD=1 source $(git rev-parse --show-toplevel)/ci3/source
65

76
bench_cmds_file=$1
87

9-
function isolate_bench_cpus {
10-
[ "$CI" -eq 0 ] && return
11-
12-
# CPU layout assumption: physical cores are 0..N/2-1, hyperthreads are N/2..N-1.
13-
local total_cpus=$(nproc)
14-
local total_physical=$((total_cpus / 2))
15-
local os_reserve=8
16-
local bench_count=$((total_physical - os_reserve))
17-
18-
# Disable hyperthread siblings of benchmark cores (N/2 .. N/2+bench_count-1).
19-
# OS cores' hyperthreads (N/2+bench_count .. N-1) stay on for extra OS capacity.
20-
for cpu in $(seq $total_physical $((total_physical + bench_count - 1))); do
21-
sudo sh -c "echo 0 > /sys/devices/system/cpu/cpu$cpu/online" 2>/dev/null || true
22-
done
23-
24-
# Pin all container processes to OS CPUs so they can't land on benchmark cores.
25-
# exec_test's taskset overrides this for each benchmark with its allocated CPUs.
26-
local os_cpu_list="$bench_count-$((total_physical - 1)),$((total_physical + bench_count))-$((total_cpus - 1))"
27-
echo "Pinning container processes to OS CPUs ($os_cpu_list)..."
28-
for pid in $(ps -eo pid= 2>/dev/null); do
29-
taskset -apc "$os_cpu_list" $pid &>/dev/null || true
30-
done
31-
32-
export BENCH_CPU_COUNT=$bench_count
33-
34-
echo "Benchmark CPU isolation: CPUs 0-$((bench_count - 1)) ($bench_count cores, hyperthreads off) for benchmarks."
35-
echo "OS CPUs: $os_cpu_list."
36-
}
37-
38-
function unisolate_bench_cpus {
39-
[ "$CI" -eq 0 ] && return
40-
41-
echo "Re-enabling all CPUs..."
42-
local total_cpus=$(nproc --all)
43-
for cpu in $(seq 1 $((total_cpus - 1))); do
44-
sudo sh -c "echo 1 > /sys/devices/system/cpu/cpu$cpu/online" 2>/dev/null || true
45-
done
46-
# Unpin all processes (were pinned to OS CPUs during bench).
47-
for pid in $(ps -eo pid= 2>/dev/null); do
48-
taskset -apc 0-$((total_cpus - 1)) $pid &>/dev/null || true
49-
done
50-
echo "All CPUs re-enabled. Online CPUs: $(nproc)"
51-
}
52-
53-
isolate_bench_cpus
54-
558
# Clean up old benchmark outputs to avoid confusion with new results.
569
find . -type d -iname bench-out | xargs rm -rf
5710

@@ -67,5 +20,3 @@ if [ -n "$serial_cmds" ]; then
6720
run_test_cmd "$cmd"
6821
done <<< "$serial_cmds"
6922
fi
70-
71-
unisolate_bench_cpus

ci3/bootstrap_ec2

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,7 @@ start_build() {
367367
-e GITHUB_ACTOR=${GITHUB_ACTOR:-} \
368368
-e EC2_INSTANCE_TYPE=${EC2_INSTANCE_TYPE:-unknown} \
369369
-e EC2_SPOT=${EC2_SPOT:-unknown} \
370+
-e SHOULD_UPLOAD_BENCHMARKS=${SHOULD_UPLOAD_BENCHMARKS:-} \
370371
-e AZTEC_TOOLCHAIN_DEFAULT_MAJOR_VERSION=${AZTEC_TOOLCHAIN_DEFAULT_MAJOR_VERSION:-} \
371372
-e DRY_RUN=${DRY_RUN:-} \
372373
--pids-limit=65536 \

0 commit comments

Comments
 (0)