Skip to content

Commit 5696384

Browse files
committed
feat: working CPU and IRQ pinning scripts
1 parent 6e1c955 commit 5696384

9 files changed

Lines changed: 154 additions & 165 deletions

File tree

.github/workflows/nightly_bench.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
build_and_test:
1212
runs-on: ubuntu-latest
1313
container:
14-
image: ghcr.io/milsanore/tradercppbuild:v1.7
14+
image: ghcr.io/milsanore/tradercppbuild:v1.8
1515
# set resource limits for some consistency
1616
options: --memory=7g --cpus=2
1717
steps:

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111
build_release:
1212
runs-on: ubuntu-latest
1313
container:
14-
image: ghcr.io/milsanore/tradercppbuild:v1.7
14+
image: ghcr.io/milsanore/tradercppbuild:v1.8
1515
steps:
1616
- name: Checkout repository
1717
uses: actions/checkout@v4

.github/workflows/reusable_build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
build_and_test:
1717
runs-on: ubuntu-latest
1818
container:
19-
image: ghcr.io/milsanore/tradercppbuild:v1.7
19+
image: ghcr.io/milsanore/tradercppbuild:v1.8
2020
steps:
2121
- name: Checkout repository
2222
uses: actions/checkout@v4

Dockerfile_build

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#############################################
2-
# milss/tradercppbuild:v1.7
2+
# milss/tradercppbuild:v1.8
33
# Build container for tradercpp
44
# Published to dockerhub in order to accelerate github actions
55
# Uses:
@@ -56,12 +56,6 @@ RUN apt update && apt install -y --no-install-recommends \
5656

5757

5858
# Install LLVM 18 (clang-format and clang-tidy)
59-
RUN apt update && apt install -y --no-install-recommends \
60-
gnupg \
61-
lsb-release \
62-
software-properties-common \
63-
wget \
64-
&& rm -rf /var/lib/apt/lists/*
6559

6660
RUN wget -qO - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
6761

Makefile

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ withenv:
2525
test -e .env || cp .env.example .env
2626
bash -c 'set -o allexport; source .env; set +o allexport; make "$$RECIPE"'
2727

28-
# TODO: replace ☝️ with `set -a; . .env; set +a;`
29-
3028
## init: 🏌️ initialize the project
3129
.PHONY: init
3230
init:
@@ -99,15 +97,17 @@ run-debug:
9997
## run-release: 🏎️ run the app (prod)
10098
.PHONY: run-release
10199
run-release:
102-
# set -a; . .env; set +a; sudo -E scripts/pin_irqs.sh
103-
set -a; . .env; set +a; sudo -E scripts/pin_cpus.sh build/Release/tradercpp
100+
$(call pp,moving IRQs)
101+
set -o allexport; source .env; set +o allexport; sudo -E scripts/pin_irqs.sh
102+
$(call pp,moving CPUs and starting app)
103+
set -o allexport; source .env; set +o allexport; sudo -E scripts/pin_cpus.sh build/Release/tradercpp
104104

105105
## restore-cpus: 🖥️ hand back pinned CPUs and IRQs to the operating system. (NB app must not be running)
106106
.PHONY: restore-cpus
107107
restore-cpus:
108-
$(call pp,handing CPU and IRQ management back to the kernel. NB: app must not be running`)
109-
set -a; . .env; set +a; sudo -E scripts/unpin_cpus.sh
110-
set -a; . .env; set +a; sudo -E scripts/unpin_irqs.sh
108+
$(call pp,handing CPU management back to the kernel (NB: app must not be running))
109+
$(call pp,NB: for re-assigning IRQs reboot the machine)
110+
set -o allexport; source .env; set +o allexport; sudo -E scripts/unpin_cpus.sh
111111

112112
# CONTAINERISATION RECIPES ----------------------------------------------------
113113

@@ -117,7 +117,7 @@ build-container:
117117
IMAGE_VERSION=
118118
@if [ -z "$(IMAGE_VERSION)" ]; then \
119119
echo "Error: IMAGE_VERSION is not set"; \
120-
echo "(you can set it on the command line like so: \`make build-container IMAGE_VERSION=1.7\`)"; \
120+
echo "(you can set it on the command line like so: \`make build-container IMAGE_VERSION=1.8\`)"; \
121121
exit 1; \
122122
fi
123123
docker build -f Dockerfile_build -t milss/tradercppbuild:latest -t milss/tradercppbuild:v$(IMAGE_VERSION) .

scripts/pin_cpus.sh

Lines changed: 49 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env bash
2-
set -euo pipefail
32

43
# ----------------------------------------------------------
54
# CPU isolation launcher for Linux (cgroup v1 & v2)
@@ -9,64 +8,82 @@ set -euo pipefail
98
# 2. Validation: Ensures CPU_SET_NAME and CPU_SET_RANGE exist.
109
# 3. Cgroup detection: Automatically handles v1 and v2.
1110
# 4. CPU and memory configuration: Writes cpuset.cpus and cpuset.mems.
12-
# 5. Associating the application:
13-
# - $$ is the PID of the script itself.
14-
# - When we exec "$@", the application replaces the script and inherits the cgroup.
15-
# - This is the standard way to bind the app to the cpuset.
11+
# 5. Associating the application
1612
# ----------------------------------------------------------
1713

18-
# Validate environment variables
14+
set -euo pipefail
15+
16+
# validate required environment variables
1917
: "${CPU_SET_NAME:?Environment variable CPU_SET_NAME is required}"
2018
: "${CPU_SET_RANGE:?Environment variable CPU_SET_RANGE is required}"
2119
echo "CPU isolation launcher starting..."
22-
echo "CPU_SET_NAME = $CPU_SET_NAME"
23-
echo "CPU_SET_RANGE = $CPU_SET_RANGE"
20+
echo "CPU_SET_NAME [$CPU_SET_NAME]"
21+
echo "CPU_SET_RANGE [$CPU_SET_RANGE]"
22+
23+
# check for root
24+
if [ "$EUID" -ne 0 ]; then
25+
echo "this script must be run as root."
26+
echo "try: sudo $0"
27+
exit 1
28+
fi
2429

25-
# Check for root privileges
26-
if [[ $EUID -ne 0 ]]; then
27-
echo "WARNING: root privileges are required for true CPU isolation."
28-
echo "Proceeding without isolation..."
29-
exec "$@"
30+
if ! [[ $CPU_SET_RANGE =~ ^[0-9,-]+$ ]]; then
31+
echo "invalid CPU_SET_RANGE format. value [$CPU_SET_RANGE]"
32+
exit 1
3033
fi
3134

32-
# Detect cgroup version
35+
# --- minimal NUMA check ---
36+
# nodes_used=$(for cpu in $(echo "$CPU_SET_RANGE" | sed 's/,/ /g'); do
37+
# for node_dir in /sys/devices/system/node/node*; do
38+
# grep -qE "(^|,)$cpu($|,|-)" "$node_dir/cpulist" && echo $(basename $node_dir | sed 's/node//')
39+
# done
40+
# done | sort -u)
41+
#
42+
# if [[ $(wc -w <<< "$nodes_used") -gt 1 ]]; then
43+
# echo "error: requested CPUs span multiple NUMA nodes. nodes [$nodes_used]"
44+
# exit 1
45+
# fi
46+
# echo "requested CPUs are within NUMA node. node [$nodes_used]"
47+
# --- end minimal NUMA check ---
48+
49+
# build path based on cgroup version
3350
if [[ -f /sys/fs/cgroup/cgroup.controllers ]]; then
3451
CGROUP_VERSION=2
3552
BASE_CGROUP="/sys/fs/cgroup"
3653
else
3754
CGROUP_VERSION=1
3855
BASE_CGROUP="/sys/fs/cgroup/cpuset"
3956
fi
40-
echo "Detected cgroup version: $CGROUP_VERSION"
41-
57+
echo "detected cgroup version. value [$CGROUP_VERSION]"
4258
GROUP_PATH="$BASE_CGROUP/$CPU_SET_NAME"
4359

44-
# Create cpuset directory if it doesn't exist
60+
# create cpuset directory if it doesn't exist
4561
if [[ ! -d "$GROUP_PATH" ]]; then
4662
mkdir -p "$GROUP_PATH"
47-
echo "Created cpuset directory: $GROUP_PATH"
63+
echo "created cpuset directory. path [$GROUP_PATH]"
4864
fi
4965

50-
# Write CPUs
66+
# write CPUs
5167
CPU_FILE="$GROUP_PATH/cpuset.cpus"
5268
echo "$CPU_SET_RANGE" > "$CPU_FILE"
53-
echo "Wrote CPUs to $CPU_FILE"
69+
echo "wrote CPUs to file. path [$CPU_FILE]"
5470

55-
# Write memory nodes (required for cpuset)
71+
# write memory nodes (required for cpuset)
5672
MEM_FILE="$GROUP_PATH/cpuset.mems"
5773
if [[ ! -f "$MEM_FILE" ]]; then
58-
# assume NUMA node 0
59-
echo "0" > "$MEM_FILE"
74+
echo "$nodes_used" > "$MEM_FILE"
6075
fi
61-
echo "Set memory nodes in $MEM_FILE"
62-
63-
# TODO(MILS): WARN ABOUT MULTI-NUMA CPU RANGES AND REFUSE TO CONTINUE
76+
echo "set memory nodes in file. path [$MEM_FILE]"
6477

65-
# Add current process to the cpuset
66-
# - $$ is the PID of the script itself.
67-
# - When we exec "$@", the application replaces the script and inherits the cgroup.
78+
# add current process to the cpuset
6879
PROCS_FILE="$GROUP_PATH/cgroup.procs"
80+
# NB: $$ is the PID of the script itself
6981
echo "$$" > "$PROCS_FILE"
70-
echo "Added PID $$ to $PROCS_FILE"
71-
echo "Launching application in isolated cpuset..."
72-
exec "$@"
82+
echo "added process to cpuset. PID [$$] path [$PROCS_FILE]"
83+
echo "launching application in isolated cpuset..."
84+
# NB: when we exec "$@", the application replaces the script and inherits the cgroup
85+
"$@"
86+
# print application return so that the script doesn't obfuscate it
87+
ret=$?
88+
echo "Application exited with code $ret"
89+
exit $ret

scripts/pin_irqs.sh

Lines changed: 69 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,83 @@
1-
#!/bin/bash
2-
# ------------------------------------------------------------
3-
# Move all IRQs (except NICs) to non-isolated CPUs dynamically
4-
# ------------------------------------------------------------
1+
#!/usr/bin/env bash
2+
3+
# --------------------------------------------------------
4+
# IRQ isolation script
5+
# Moves all IRQs off of the specified isolated CPUs
6+
#
7+
# How it works:
8+
# 1. CPU range → bitmask
9+
# - Converts something like 0-3,5 → 0b00101111 → 0x2F
10+
# 2. Iterates /proc/irq/*/smp_affinity
11+
# - Reads current mask
12+
# - Clears bits for isolated CPUs
13+
# - Writes new mask back
14+
# 3. Safe
15+
# - Only writes if a change is needed
16+
# - Skips files that aren't writable
17+
#
18+
# How to undo: reboot
19+
# --------------------------------------------------------
520

621
set -euo pipefail
722

8-
# Check for root
23+
: "${CPU_SET_RANGE:?Environment variable CPU_SET_RANGE is required}"
24+
echo "IRQ isolation starting..."
25+
echo "CPU_SET_RANGE [$CPU_SET_RANGE]"
26+
27+
# check for root
928
if [ "$EUID" -ne 0 ]; then
10-
echo "This script must be run as root."
11-
echo "Try: sudo $0"
29+
echo "this script must be run as root."
30+
echo "try: sudo $0"
1231
exit 1
1332
fi
1433

15-
# Handle filenames with whitespace
16-
IFS=$'\n\t'
17-
18-
# List of NIC interfaces to protect (keep IRQs on isolated cores)
19-
NIC_IFACES=("eth0" "eth1") # modify as needed
20-
21-
# Determine total CPU count
22-
CPU_COUNT=$(nproc)
23-
24-
# Set last isolated CPU index (adjust if your isolated CPUs differ)
25-
# TODO: detect isolated CPUs dynamically (from /sys/devices/system/cpu/isolated)
26-
ISOLATED_END=1 # e.g., CPUs 0-1 are isolated
27-
28-
# Build CPU mask for CPUs >= ISOLATED_END + 1
29-
# Example: if CPU_COUNT=8, ISOLATED_END=1 -> mask = 11111100 = FC
30-
# The mask is binary, then converted to hex
31-
CPU_MASK=$(printf '%X' "$(( (1 << CPU_COUNT) - 1 - ((1 << (ISOLATED_END + 1)) - 1) ))")
32-
33-
echo "Detected CPU mask for IRQs: $CPU_MASK (CPUs >= $((ISOLATED_END + 1)))"
34-
35-
# ------------------------------------------------------------
36-
# Helper function to check if an IRQ belongs to a NIC
37-
# ------------------------------------------------------------
38-
is_nic_irq() {
39-
local irq_dir="$1"
40-
local hint_file="$irq_dir/affinity_hint"
41-
[ -f "$hint_file" ] || return 1
42-
43-
for nic in "${NIC_IFACES[@]}"; do
44-
if grep -q "$nic" "$hint_file" 2>/dev/null; then
45-
return 0 # It's a NIC IRQ
34+
# --- Convert CPU range to bitmask ---
35+
cpu_to_mask() {
36+
# converts something like 0-3,5 to a hex bitmask string
37+
local cpus=()
38+
IFS=',' read -ra parts <<< "$1"
39+
for part in "${parts[@]}"; do
40+
if [[ $part =~ ^([0-9]+)-([0-9]+)$ ]]; then
41+
for ((i=${BASH_REMATCH[1]}; i<=${BASH_REMATCH[2]}; i++)); do
42+
cpus+=($i)
43+
done
44+
else
45+
cpus+=($part)
4646
fi
4747
done
48-
return 1 # Not a NIC IRQ
49-
}
5048

51-
# ------------------------------------------------------------
52-
# Iterate through all IRQ directories
53-
# ------------------------------------------------------------
54-
for irq_dir in /proc/irq/*; do
55-
[ -d "$irq_dir" ] || continue
56-
irq_num=$(basename "$irq_dir")
57-
smp_file="$irq_dir/smp_affinity"
58-
59-
[ -f "$smp_file" ] || continue
49+
# build bitmask
50+
local mask=0
51+
for cpu in "${cpus[@]}"; do
52+
mask=$((mask | (1 << cpu)))
53+
done
54+
printf "%x" "$mask"
55+
}
56+
ISOLATED_MASK=$(cpu_to_mask "$CPU_SET_RANGE")
57+
echo "isolated CPU mask: 0x[$ISOLATED_MASK]"
6058

61-
if is_nic_irq "$irq_dir"; then
62-
echo "Skipping NIC IRQ $irq_num"
59+
# iterate over all IRQs
60+
for irq in /proc/irq/*/smp_affinity; do
61+
# skip non-files
62+
[[ -f $irq ]] || continue
63+
# Check if writable
64+
if [[ ! -w "$irq" ]]; then
65+
echo "skipping IRQ, smp_affinity not writable. IRQ [$(basename $(dirname $irq))]"
6366
continue
6467
fi
65-
66-
# Move IRQ to CPUs >= ISOLATED_END + 1
67-
echo "$CPU_MASK" > "$smp_file"
68-
echo "Moved IRQ $irq_num to CPUs >= $((ISOLATED_END + 1))"
68+
# read current mask
69+
current=$(cat "$irq")
70+
# convert hex to decimal
71+
current_dec=$((16#$current))
72+
# remove isolated CPUs
73+
new_dec=$((current_dec & ~0x$ISOLATED_MASK))
74+
# calculate new mask
75+
new_hex=$(printf "%x" "$new_dec")
76+
# only write if different
77+
if [[ "$current" != "$new_hex" ]]; then
78+
echo "moving IRQ off isolated CPUs. IRQ [$(basename $(dirname $irq))] old [$current] new [$new_hex]"
79+
echo "$new_hex" > "$irq"
80+
fi
6981
done
82+
83+
echo "IRQ isolation complete."

0 commit comments

Comments
 (0)