diff --git a/.github/ci3_labels_to_env.sh b/.github/ci3_labels_to_env.sh index 73fd31c7d222..fa527b7d6b6f 100755 --- a/.github/ci3_labels_to_env.sh +++ b/.github/ci3_labels_to_env.sh @@ -149,9 +149,13 @@ function main { echo "CI_MODE=$ci_mode" >> $GITHUB_ENV echo "CI mode: $ci_mode" - # Determine if benchmarks should be uploaded (merge-queue, full, or full-no-test-cache modes) + # Benching modes run their benches on a dedicated, fixed-hardware box (stable numbers) + # and publish the result; ci-fast never benches. For grind runs (merge-queue-heavy fires + # ~10 instances) only the first instance keeps BENCH_UPLOAD=1 — multi_job_run forces the + # rest to 0 so they bench inline as a breakage check without racing the upload. The + # destination (bench/next vs bench/prs) is BENCH_BRANCH below. if [[ "$ci_mode" == "merge-queue" || "$ci_mode" == "merge-queue-heavy" || "$ci_mode" == "full" || "$ci_mode" == "full-no-test-cache" ]]; then - echo "SHOULD_UPLOAD_BENCHMARKS=1" >> $GITHUB_ENV + echo "BENCH_UPLOAD=1" >> $GITHUB_ENV fi # Determine the branch label for benchmark publishing. diff --git a/.github/ci3_success.sh b/.github/ci3_success.sh index 7684cdd71410..066571ee02a9 100755 --- a/.github/ci3_success.sh +++ b/.github/ci3_success.sh @@ -42,17 +42,17 @@ function handle_squash_merge { } function handle_benchmarks { - if [ "${SHOULD_UPLOAD_BENCHMARKS:-0}" -eq 0 ]; then + if [ "${BENCH_UPLOAD:-0}" -eq 0 ]; then return fi # Handle benchmarks download (internal only) echo "Downloading benchmarks..." if ./ci.sh gh-bench && [ -f "./bench-out/bench.json" ] && [ "$(cat ./bench-out/bench.json)" != "[]" ]; then echo "Benchmarks downloaded successfully" - echo "SHOULD_UPLOAD_BENCHMARKS=1" >> $GITHUB_ENV + echo "BENCH_UPLOAD=1" >> $GITHUB_ENV else echo "No benchmarks to upload" - echo "SHOULD_UPLOAD_BENCHMARKS=0" >> $GITHUB_ENV + echo "BENCH_UPLOAD=0" >> $GITHUB_ENV fi } diff --git a/.github/workflows/ci3-external.yml b/.github/workflows/ci3-external.yml index 32b130c6a7c5..a94701736765 100644 --- a/.github/workflows/ci3-external.yml +++ b/.github/workflows/ci3-external.yml @@ -128,7 +128,7 @@ jobs: if: always() env: SHOULD_SQUASH_MERGE: ${{ contains(github.event.pull_request.labels.*.name, 'ci-squash-and-merge') && '1' || '0' }} - SHOULD_UPLOAD_BENCHMARKS: "0" + BENCH_UPLOAD: "0" # For updating success cache. AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/ci3.yml b/.github/workflows/ci3.yml index 3234f4d9bf7e..24b7d9db8a26 100644 --- a/.github/workflows/ci3.yml +++ b/.github/workflows/ci3.yml @@ -158,7 +158,7 @@ jobs: fi - name: Upload benchmarks - if: env.SHOULD_UPLOAD_BENCHMARKS == '1' + if: env.BENCH_UPLOAD == '1' uses: benchmark-action/github-action-benchmark@52576c92bccf6ac60c8223ec7eb2565637cae9ba # v1.22.1 with: &ci_benchmark_args name: Aztec Benchmarks diff --git a/Makefile b/Makefile index f616809a0c05..242eee1e55cb 100644 --- a/Makefile +++ b/Makefile @@ -60,6 +60,14 @@ fast: release-image barretenberg boxes playground docs aztec-up \ # Full bootstrap. full: fast bb-full-tests bb-cpp-full yarn-project-benches +# Everything required to run the full benchmark suite (see bootstrap.sh bench_cmds), +# and nothing more. yarn-project-benches transitively builds the bb native/wasm bench +# binaries (via bb-ts -> bb-cpp-native/wasm-threads), the e2e bench inputs, noir-projects +# and l1-contracts; bb-sol adds the Solidity gas benchmark's generated verifier; bb-acir +# builds barretenberg/acir_tests, whose headless-test harness (ts-node) the bb browser +# memory bench (ci_benchmark_browser_memory.sh) drives. +bench: yarn-project-benches bb-sol bb-acir + # Release. Everything plus copy bb cross compiles to ts projects. release: fast bb-cpp-release-dir bb-ts-cross-copy diff --git a/bootstrap.sh b/bootstrap.sh index 919c1a331448..6caddbfcabad 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -435,6 +435,18 @@ function build_and_test { start_txes make noir-projects-txe-tests + # Benches (full builds only). Uploadable runs (BENCH_UPLOAD=1 — the first instance of + # a run) bench on a dedicated fixed-hardware box for stable numbers: launched here, + # logged like the test engine, waited on below, and the sole uploader. Everything + # else benches inline as ordinary tests — a breakage check only, no upload. + if [ "$1" == full ]; then + if [ "${BENCH_UPLOAD:-0}" == 1 ]; then + setsid color_prefix "bench" "denoise './ci.sh bench'" & bench_pid=$! + else + bench_cmds >> $test_cmds_file + fi + fi + # Signal tests complete, handled by parallel -E STOP. echo STOP >> $test_cmds_file fi @@ -447,6 +459,14 @@ function build_and_test { stop_txes + # Benches (full builds only). Inline benches above are a breakage check only — the + # dedicated box is the sole uploader. Wait on it here: fatal, matching the old inline + # `bench`, since a benchmark that fails to build/run is a real breakage. + if [ "$1" == full ] && [ -n "${bench_pid:-}" ]; then + echo "Waiting for dedicated bench run..." + wait "$bench_pid" + fi + return 0 } @@ -468,6 +488,16 @@ function bench_merge { } +# Merge all component bench-out/*.bench.json into one and upload it to the +# bench- cache key, which the GA "Upload benchmarks" step then publishes. +# Used both by `bench` (dedicated box) and by the inline benches-as-tests path. +function bench_publish { + rm -rf bench-out + mkdir -p bench-out + bench_merge + cache_upload bench-$(git rev-parse HEAD^{tree}).tar.gz bench-out/bench.json +} + function bench { # TODO bench for arm64. if [ $(arch) == arm64 ]; then @@ -476,12 +506,7 @@ function bench { echo_header "bench all" bench_cmds > $bench_cmds_file denoise "bench_engine $bench_cmds_file" - - rm -rf bench-out - mkdir -p bench-out - bench_merge - cache_upload bench-$(git rev-parse HEAD^{tree}).tar.gz bench-out/bench.json - + bench_publish } ### RELEASING ########################################################################################################## @@ -750,13 +775,22 @@ case "$cmd" in export USE_TEST_CACHE=1 export CI_FULL=1 build_and_test full - bench ;; "ci-full-no-test-cache") export CI=1 export USE_TEST_CACHE=0 export CI_FULL=1 build_and_test full + ;; + "ci-bench") + # Run on a dedicated, fixed, on-demand instance (launched by the build + # instance via './ci.sh bench') for stable benchmark numbers. The build is a + # near-instant cache pull, as the launching build instance already populated + # the cache for this commit. No test engine; bench uploads bench-. + export CI=1 + export CI_FULL=1 + prep + make bench bench ;; "ci-chonk-input-update") diff --git a/ci.sh b/ci.sh index e7842dad7f8b..e1357000a161 100755 --- a/ci.sh +++ b/ci.sh @@ -39,10 +39,10 @@ function print_usage { echo_cmd "chonk-input-update" "Spin up an EC2 instance to update pinned Chonk IVC inputs and push the diff." echo_cmd "release" "Spin up an EC2 instance and run bootstrap release." echo_cmd "shell-new" "Spin up an EC2 instance, clone the repo, and drop into a shell." - echo_cmd "shell" "Drop into a shell in the current running build instance container." - echo_cmd "shell-host" "Drop into a shell in the current running build host." + echo_cmd "shell-container" "Shell into a running build container. Optional filter tokens (e.g. 'pr-123 bench') select the instance; defaults to the current branch." + echo_cmd "shell-host" "Shell into a running build host. Same instance selection as shell-container." echo_cmd "log" "Display the log of the given log ID." - echo_cmd "kill" "Terminate running EC2 instance with instance_name." + echo_cmd "kill" "Terminate running build instances matching the filter tokens (default: current branch)." echo_cmd "draft" "Mark the current PR as draft (no automatic CI runs when pushing)." echo_cmd "ready" "Mark the current PR as ready (enable automatic CI runs when pushing)." echo_cmd "pr-url" "Print the URL of the current PR associated with the branch." @@ -53,27 +53,69 @@ function print_usage { [ -n "$cmd" ] && shift -# Keep this in sync with bootstrap_ec2's instance_name scheme (repo-scoped) so the -# shell/kill/get-ip helpers find instances launched by a CI run for this repo. -repo=${GITHUB_REPOSITORY:-aztec-packages} -repo=${repo##*/} -instance_name=${INSTANCE_NAME:-${repo}_$(echo -n "$BRANCH" | tr -c 'a-zA-Z0-9-' '_')_${arch}} -[ -n "${INSTANCE_POSTFIX:-}" ] && instance_name+="_$INSTANCE_POSTFIX" +# Connecting to a running build instance: discover by the Group=build-instance tag +# and match filter tokens against the Name (which aws_instance_name builds as +# __[_]), rather than reconstructing the exact name (which +# varies by arch/job/count). This is what lets `shell-container pr-123 bench` etc. work. -function get_ip_for_instance { - ip=$(aws ec2 describe-instances \ +# Echo running build instances as: \t\t\t +function list_build_instances { + aws ec2 describe-instances \ --region us-east-2 \ - --filters "Name=tag:Name,Values=$instance_name" "Name=instance-state-name,Values=running" \ - --query "Reservations[].Instances[0].PublicIpAddress" \ - --output text) + --filters "Name=tag:Group,Values=build-instance" "Name=instance-state-name,Values=running" \ + --query "Reservations[].Instances[].[Tags[?Key=='Name']|[0].Value, InstanceId, PublicIpAddress, LaunchTime]" \ + --output text } -function get_iid_for_instance { - iid=$(aws ec2 describe-instances \ - --region us-east-2 \ - --filters "Name=tag:Name,Values=$instance_name" "Name=instance-state-name,Values=running" \ - --query "Reservations[].Instances[0].InstanceId" \ - --output text | tr -d '\n\r' | xargs) +# Echo running build instances whose Name matches every filter token (case-insensitive +# substring). With no tokens, defaults to the current branch's canonical name. +function filter_build_instances { + local filters=("$@") rows f + [ "${#filters[@]}" -eq 0 ] && filters=("$(aws_instance_name "$BRANCH" "$arch")") + rows=$(list_build_instances) + for f in "${filters[@]}"; do + # Sanitise the token the same way instance names are (e.g. a branch's '/' -> '_'), + # so passing a raw branch name like 'mv/f-669' still matches '..._mv_f-669_...'. + f=$(printf '%s' "$f" | tr -c 'a-zA-Z0-9-' '_') + rows=$(printf '%s\n' "$rows" | awk -v p="$f" 'index(tolower($1), tolower(p))') + done + printf '%s\n' "$rows" | sed '/^$/d' +} + +# Resolve exactly one instance from the filter tokens; sets iid/ip/resolved_name. +# 0 matches -> error + list everything; >1 -> interactive pick on a TTY, else error +# listing the candidates so you can add a narrowing token (e.g. an arch or job id). +function resolve_instance { + local matches chosen sel i + matches=$(filter_build_instances "$@") + if [ -z "$matches" ]; then + echo_stderr "No running build instance matches: ${*:-$BRANCH}" + echo_stderr "Running build instances:" + list_build_instances | awk '{print " " $1}' | sort || true + exit 1 + fi + if [ "$(printf '%s\n' "$matches" | wc -l)" -eq 1 ]; then + chosen=$matches + elif [ -t 0 ]; then + echo_stderr "Multiple build instances match '${*:-$BRANCH}':" + i=1 + while IFS= read -r line; do + echo_stderr " $i) $(printf '%s' "$line" | awk '{print $1}')" + i=$((i + 1)) + done <<< "$matches" + read -r -p "select [1-$((i - 1))]: " sel + [[ "$sel" =~ ^[0-9]+$ ]] || { echo_stderr "Invalid selection."; exit 1; } + chosen=$(printf '%s\n' "$matches" | sed -n "${sel}p") + [ -z "$chosen" ] && echo_stderr "Invalid selection." && exit 1 + else + echo_stderr "Multiple build instances match '${*}' — add a narrowing token (e.g. an arch or job id):" + printf '%s\n' "$matches" | awk '{print " " $1}' + exit 1 + fi + resolved_name=$(printf '%s' "$chosen" | awk '{print $1}') + iid=$(printf '%s' "$chosen" | awk '{print $2}') + ip=$(printf '%s' "$chosen" | awk '{print $3}') + echo_stderr "Connecting to $resolved_name ($iid)." } function get_latest_run_id { @@ -95,16 +137,30 @@ function multi_job_run { export AWS_SHUTDOWN_TIME_ARM=${AWS_SHUTDOWN_TIME_ARM:-90} export DENOISE=1 export DENOISE_WIDTH=32 + # Only the first job (the amd64 full build) runs the dedicated bench box and uploads; + # the rest bench inline as a breakage check (see bootstrap.sh build_and_test). This + # de-races grind runs (e.g. merge-queue-heavy fires ~10 instances) that would otherwise + # all upload to the same bench cache key. + local bench_primary=${1%% *} + export bench_primary run() { [ -n "${4:-}" ] && export REF_NAME=$4 - PARENT_LOG_ID=$RUN_ID JOB_ID=$1 INSTANCE_POSTFIX=$1 ARCH=$2 exec denoise "bootstrap_ec2 './bootstrap.sh $3'" + local bench_upload=0 + [ "$1" == "$bench_primary" ] && bench_upload=${BENCH_UPLOAD:-0} + # Timestamp the bootstrap_ec2 (instance request) sublog. denoise runs the command + # under pipefail and bootstrap_ec2 handles spot-eviction retry internally (exec), so + # piping through add_timestamps preserves its exit code. DENOISE_DISPLAY_NAME keeps + # the parent log's "Executing:" line free of the pipe. + PARENT_LOG_ID=$RUN_ID JOB_ID=$1 INSTANCE_POSTFIX=$1 ARCH=$2 BENCH_UPLOAD=$bench_upload \ + DENOISE_DISPLAY_NAME="bootstrap_ec2 './bootstrap.sh $3'" \ + exec denoise "bootstrap_ec2 './bootstrap.sh $3' 2>&1 | add_timestamps" } export -f run parallel --colsep ' ' --jobs 100 --termseq 'TERM,10000' \ --tagstring '{1}' \ --line-buffered --halt now,fail=1 \ - 'run {1} {2} {3} {4}' ::: "$@" | DUP=1 cache_log "CI run" $RUN_ID + 'run {1} {2} {3} {4}' ::: "$@" | add_timestamps | DUP=1 cache_log "CI run" $RUN_ID } # Jobs in the ci dashboards are grouped on a single line by RUN_ID. @@ -122,6 +178,21 @@ case "$cmd" in # GitHub status check name is unchanged. multi_job_run "x-$cmd amd64 ci-$cmd" ;; + bench) + # Launched by the build instance on uploadable runs to produce stable benchmark + # numbers on a dedicated instance of a FIXED type. AWS_INSTANCE pins the exact type + # (bypasses spot pool diversification) — that's what keeps numbers comparable. Spot + # vs on-demand is the same hardware, so we try spot first and fall back to on-demand + # (the default fleet behaviour); a mid-run spot reclaim is handled by bootstrap_ec2's + # internal on-demand retry. CI_DASHBOARD and PARENT_LOG_ID are inherited from the + # launching run so it nests as a sibling job. + # Timestamp the instance-request output. pipefail (in a subshell, since ci.sh doesn't + # set it globally) keeps bootstrap_ec2's exit code through add_timestamps — the + # launching build instance waits on this fatally. + ( set -o pipefail + AWS_INSTANCE=m6a.32xlarge JOB_ID=x-bench INSTANCE_POSTFIX=x-bench \ + bootstrap_ec2 "./bootstrap.sh ci-bench" 2>&1 | add_timestamps ) + ;; socket-fix) export CI_DASHBOARD="prs" export JOB_ID="x-socket-fix" @@ -352,47 +423,51 @@ case "$cmd" in CI_USE_SSH=1 exec bootstrap_ec2 "$cmd" ;; shell-container) - # Drop into a shell in the current running build instance container. + # Drop into a zsh shell in a running build instance's container. Optional filter + # tokens select the instance, e.g.: + # ci.sh shell-container # the current branch's instance + # ci.sh shell-container pr-12345 bench # the bench box for that merge-queue run + # ci.sh shell-container pr-12345 arm64 # the arm build of that run + resolve_instance "$@" + container_cmd="docker start aztec_build &>/dev/null || true && docker exec -it --user aztec-dev aztec_build zsh" if [ "${CI_USE_SSH:-0}" -eq 1 ]; then - get_ip_for_instance - [ -z "$ip" ] && echo "No instance found: $instance_name" && exit 1 - [ "$#" -eq 0 ] && set -- "zsh" || true - ssh -tq -F $ci3/aws/build_instance_ssh_config ubuntu@$ip \ - "docker start aztec_build &>/dev/null || true && docker exec -it --user aztec-dev aztec_build $@" + if [ -z "$ip" ] || [ "$ip" = "None" ]; then echo_stderr "No public IP for $resolved_name."; exit 1; fi + ssh -tq -F $ci3/aws/build_instance_ssh_config ubuntu@$ip "$container_cmd" else - get_iid_for_instance - [ -z "$iid" ] || [ "$iid" = "None" ] && echo "No instance found: $instance_name" && exit 1 - [ "$#" -eq 0 ] && set -- "zsh" || true + # SSM sessions run as the non-root ssm-user (which has passwordless sudo), so + # use sudo rather than runuser. Running docker as root is fine — the container + # itself drops to aztec-dev via --user. aws ssm start-session \ --region us-east-2 \ --target "$iid" \ --document-name "AWS-StartInteractiveCommand" \ - --parameters "{\"command\":[\"runuser -u ubuntu -- bash -c 'docker start aztec_build &>/dev/null || true && docker exec -it --user aztec-dev aztec_build $@'\"]}" + --parameters "{\"command\":[\"sudo bash -c '$container_cmd'\"]}" fi ;; shell-host) - # Drop into a shell in the current running build host. + # Drop into a shell on a running build host. Optional filter tokens select the + # instance (see shell-container). + resolve_instance "$@" if [ "${CI_USE_SSH:-0}" -eq 1 ]; then - get_ip_for_instance - [ -z "$ip" ] && echo "No instance found: $instance_name" && exit 1 + if [ -z "$ip" ] || [ "$ip" = "None" ]; then echo_stderr "No public IP for $resolved_name."; exit 1; fi ssh -t -F $ci3/aws/build_instance_ssh_config ubuntu@$ip else - get_iid_for_instance - [ -z "$iid" ] || [ "$iid" = "None" ] && echo "No instance found: $instance_name" && exit 1 aws ssm start-session \ --region us-east-2 \ --target "$iid" fi ;; kill) - existing_instance=$(aws ec2 describe-instances \ - --region us-east-2 \ - --filters "Name=tag:Name,Values=$instance_name" \ - --query "Reservations[].Instances[?State.Name!='terminated'].InstanceId[]" \ - --output text) - if [ -n "$existing_instance" ]; then - aws_terminate_instance $existing_instance + # Terminate ALL running build instances matching the filter tokens (default: the + # current branch). E.g. `ci.sh kill pr-12345` ends a whole merge-queue run. + kill_rows=$(filter_build_instances "$@") + if [ -z "$kill_rows" ]; then + echo "No running build instance matches: ${*:-$BRANCH}" + exit 0 fi + echo "Terminating:" + printf '%s\n' "$kill_rows" | awk '{print " " $1 " (" $2 ")"}' + printf '%s\n' "$kill_rows" | awk '{print $2}' | xargs aws ec2 terminate-instances --region us-east-2 --instance-ids >/dev/null ;; ################### diff --git a/ci3/aws_instance_name b/ci3/aws_instance_name new file mode 100755 index 000000000000..6ea76db14552 --- /dev/null +++ b/ci3/aws_instance_name @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Derive the canonical build-instance EC2 "Name" tag. Single source of truth for +# the launch path (bootstrap_ec2) and the connect/kill helpers (ci.sh), so the two +# can never drift. +# +# Usage: aws_instance_name [ref] [arch] [postfix] +# ref default $REF_NAME, else the current git branch. +# arch default $ARCH, else amd64. Normalised: x86_64->amd64, aarch64|arm64->arm64. +# postfix default $INSTANCE_POSTFIX (the per-job suffix, e.g. x-fast, bench); optional. +# +# The repo prefix is $GITHUB_REPOSITORY's basename (default aztec-packages) so that +# aztec-packages and aztec-packages-private don't collide on names. +set -eu + +ref=${1:-${REF_NAME:-$(git rev-parse --abbrev-ref HEAD 2>/dev/null || true)}} +arch=${2:-${ARCH:-amd64}} +postfix=${3:-${INSTANCE_POSTFIX:-}} + +# Normalise to the CI arch convention (amd64/arm64), not the kernel's (x86_64/aarch64). +case "$arch" in + x86_64 | amd64) arch=amd64 ;; + aarch64 | arm64) arch=arm64 ;; +esac + +repo=${GITHUB_REPOSITORY:-aztec-packages} +repo=${repo##*/} + +# Merge-queue refs look like gh-readonly-queue//pr--; key on pr- +# so every attempt for a PR shares one name. Otherwise sanitise the ref (cap to 50). +if [[ "$ref" =~ ^gh-readonly-queue/.*(pr-[0-9]+) ]]; then + name="${repo}_${BASH_REMATCH[1]}_${arch}" +else + name="${repo}_$(printf '%s' "$ref" | head -c 50 | tr -c 'a-zA-Z0-9-' '_')_${arch}" +fi +[ -n "$postfix" ] && name+="_${postfix}" + +printf '%s\n' "$name" diff --git a/ci3/bench_engine b/ci3/bench_engine index 5a38c99b4590..6e0968a28787 100755 --- a/ci3/bench_engine +++ b/ci3/bench_engine @@ -1,38 +1,28 @@ #!/usr/bin/env bash # Uses strict scheduling to run benchmarks in parallel on their own cpus. # For benchmarks that can't be parallelized, runs them one at a time to avoid resource contention. -# Isolates benchmark CPUs from OS and pins all other processes to non-bench CPUs to avoid interference. NO_CD=1 source $(git rev-parse --show-toplevel)/ci3/source bench_cmds_file=$1 +# Turn hyperthreading off and schedule benches over the bottom half of the logical CPUs +# (one thread per physical core). Timing-sensitive benches (e.g. the p2p client benches) +# degrade nonlinearly when a sibling thread on the same physical core runs other work, so +# we offline the siblings outright. The strict scheduler then pools exactly these physical +# cores via BENCH_CPU_COUNT. function isolate_bench_cpus { [ "$CI" -eq 0 ] && return # CPU layout assumption: physical cores are 0..N/2-1, hyperthreads are N/2..N-1. local total_cpus=$(nproc) - local total_physical=$((total_cpus / 2)) - local os_reserve=8 - local bench_count=$((total_physical - os_reserve)) + local physical=$((total_cpus / 2)) - # Disable hyperthread siblings of benchmark cores (N/2 .. N/2+bench_count-1). - # OS cores' hyperthreads (N/2+bench_count .. N-1) stay on for extra OS capacity. - for cpu in $(seq $total_physical $((total_physical + bench_count - 1))); do + for cpu in $(seq $physical $((total_cpus - 1))); do sudo sh -c "echo 0 > /sys/devices/system/cpu/cpu$cpu/online" 2>/dev/null || true done - # Pin all container processes to OS CPUs so they can't land on benchmark cores. - # exec_test's taskset overrides this for each benchmark with its allocated CPUs. - local os_cpu_list="$bench_count-$((total_physical - 1)),$((total_physical + bench_count))-$((total_cpus - 1))" - echo "Pinning container processes to OS CPUs ($os_cpu_list)..." - for pid in $(ps -eo pid= 2>/dev/null); do - taskset -apc "$os_cpu_list" $pid &>/dev/null || true - done - - export BENCH_CPU_COUNT=$bench_count - - echo "Benchmark CPU isolation: CPUs 0-$((bench_count - 1)) ($bench_count cores, hyperthreads off) for benchmarks." - echo "OS CPUs: $os_cpu_list." + export BENCH_CPU_COUNT=$physical + echo "Benchmark CPU isolation: CPUs 0-$((physical - 1)) ($physical cores, hyperthreads off) for benchmarks." } function unisolate_bench_cpus { @@ -43,10 +33,6 @@ function unisolate_bench_cpus { for cpu in $(seq 1 $((total_cpus - 1))); do sudo sh -c "echo 1 > /sys/devices/system/cpu/cpu$cpu/online" 2>/dev/null || true done - # Unpin all processes (were pinned to OS CPUs during bench). - for pid in $(ps -eo pid= 2>/dev/null); do - taskset -apc 0-$((total_cpus - 1)) $pid &>/dev/null || true - done echo "All CPUs re-enabled. Online CPUs: $(nproc)" } diff --git a/ci3/bootstrap_ec2 b/ci3/bootstrap_ec2 index 03b050b0d8b6..d095ced9f6ed 100755 --- a/ci3/bootstrap_ec2 +++ b/ci3/bootstrap_ec2 @@ -64,20 +64,13 @@ if [[ "$(git fetch origin --negotiate-only --negotiation-tip="$current_commit")" exit 1 fi -# Our instance_name acts as a uniqueness key for the instance. -# Instances are terminated if they exist with the same name; this reaps orphans -# left when a GA run is cancelled (e.g. by a new push) on the same ref. -# Scope the key to the repo: aztec-packages and aztec-packages-private can build -# the same tag/ref concurrently under the same role, and must not reap each -# other's instances. The key stays stable across re-runs within a repo, so the -# orphan cleanup still works. -repo=${GITHUB_REPOSITORY:-aztec-packages} -repo=${repo##*/} -if [[ "$REF_NAME" =~ ^gh-readonly-queue/.*(pr-[0-9]+) ]]; then - instance_name="${repo}_${BASH_REMATCH[1]}_$arch" -else - instance_name="${repo}_$(echo -n "$REF_NAME" | head -c 50 | tr -c 'a-zA-Z0-9-' '_')_$arch" -fi +# Our instance_name acts as a uniqueness key for the instance: an existing instance +# with the same name is terminated first, reaping orphans left when a GA run is +# cancelled (e.g. by a new push) on the same ref. It's repo-scoped so +# aztec-packages{,-private} don't reap each other, and stable across re-runs within a +# repo so cleanup still works. Derived by aws_instance_name, shared with ci.sh's +# connect/kill helpers so the launch and connect names never drift. +instance_name=$(aws_instance_name "$REF_NAME" "$arch" "${INSTANCE_POSTFIX:-}") state_dir=$(mktemp -d /tmp/aws_request_instance.XXXXXX) @@ -89,7 +82,6 @@ else key_name="build-instance" fi -[ -n "${INSTANCE_POSTFIX:-}" ] && instance_name+="_$INSTANCE_POSTFIX" docker_hostname=$(echo -n "$instance_name" | tr '_' '-' | cut -c 1-63) if [ "$use_ssh" -eq 1 ]; then @@ -367,6 +359,7 @@ start_build() { -e GITHUB_ACTOR=${GITHUB_ACTOR:-} \ -e EC2_INSTANCE_TYPE=${EC2_INSTANCE_TYPE:-unknown} \ -e EC2_SPOT=${EC2_SPOT:-unknown} \ + -e BENCH_UPLOAD=${BENCH_UPLOAD:-} \ -e AZTEC_TOOLCHAIN_DEFAULT_MAJOR_VERSION=${AZTEC_TOOLCHAIN_DEFAULT_MAJOR_VERSION:-} \ -e DRY_RUN=${DRY_RUN:-} \ --pids-limit=65536 \ diff --git a/ci3/dashboard/rk.py b/ci3/dashboard/rk.py index f3354d146268..fb89613c749d 100644 --- a/ci3/dashboard/rk.py +++ b/ci3/dashboard/rk.py @@ -198,9 +198,8 @@ def root() -> str: f"\n" f"Benchmarks:\n" f"\n{YELLOW}" - f"{hyperlink('https://aztecprotocol.github.io/benchmark-page-data/bench?branch=master', 'master')}\n" - f"{hyperlink('https://aztecprotocol.github.io/benchmark-page-data/bench?branch=staging', 'staging')}\n" f"{hyperlink('https://aztecprotocol.github.io/benchmark-page-data/bench?branch=next', 'next')}\n" + f"{hyperlink('https://aztecprotocol.github.io/benchmark-page-data/bench?branch=prs', 'prs')}\n" f"{hyperlink('/chonk-breakdowns', 'chonk breakdowns')}\n" f"{RESET}" f"\n" diff --git a/ci3/dashboard/rk_cli.py b/ci3/dashboard/rk_cli.py index 7c006b18f68a..998d4759f423 100644 --- a/ci3/dashboard/rk_cli.py +++ b/ci3/dashboard/rk_cli.py @@ -6,7 +6,7 @@ def main(): parser = argparse.ArgumentParser(description='CI Run Viewer (CLI)') parser.add_argument('--section', '-s', type=str, required=True, - help='Section to display (master, staging, next, prs, releases)') + help='Section to display (next, prs, releases)') parser.add_argument('--offset', type=int, default=0, help='Offset for pagination') parser.add_argument('--limit', '-l', type=int, default=200, help='Number of results to fetch') parser.add_argument('--filter', '-f', dest='filter_str', type=str, default='',