Skip to content

Commit 004b638

Browse files
committed
update
1 parent 0042c47 commit 004b638

8 files changed

Lines changed: 444 additions & 17 deletions

File tree

qemu_integration/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,25 @@ sudo CXL_REGION_TYPE=ram CXL_DAX_MODE=devdax ./setup_cxl_numa.sh
170170
sudo CXL_REGION_TYPE=ram CXL_DAX_MODE=system-ram ./setup_cxl_numa.sh
171171
```
172172

173+
When installing `cxl-numa-setup.service` in a guest, put per-VM settings in
174+
`/etc/default/cxl-numa-setup`. The setup helper derives a default static IP as
175+
`192.168.100.(10 + CXL_HOST_ID)/24` when `CXL_CONFIGURE_NET=1` and
176+
`CXL_NET_ADDR` is not set:
177+
178+
```bash
179+
# Primary VM
180+
CXL_CONFIGURE_NET=1
181+
CXL_HOST_ID=0
182+
183+
# Secondary VM
184+
CXL_CONFIGURE_NET=1
185+
CXL_HOST_ID=1
186+
```
187+
188+
Set `CXL_NET_ADDR` explicitly if the VM should use a different address. The
189+
helper auto-detects the first non-loopback network interface by default; set
190+
`CXL_NET_IFACE=enp0s2` or similar to pin it.
191+
173192
For DCD/volatile CXL.mem, use `daxctl`/device-dax or system-ram mode. The old
174193
`ndctl create-namespace -m dax` path is for pmem-style regions and is disabled
175194
by default in `setup_cxl_numa.sh`; enable it only with

qemu_integration/cxl-numa-setup.service

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ DefaultDependencies=no
88
[Service]
99
Type=oneshot
1010
RemainAfterExit=yes
11+
EnvironmentFile=-/etc/default/cxl-numa-setup
1112
ExecStart=/usr/local/bin/setup_cxl_numa.sh
1213
StandardOutput=journal
1314
StandardError=journal
1415
TimeoutSec=60
1516

1617
[Install]
17-
WantedBy=sysinit.target
18+
WantedBy=sysinit.target
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
4+
QEMU_BINARY=${QEMU_BINARY:-/usr/local/bin/qemu-system-x86_64}
5+
ROOT=${ROOT:-/home/victoryang00/CXLMemSim}
6+
KERNEL=${KERNEL:-$ROOT/build/bzImage}
7+
DISK_IMAGE=${DISK_IMAGE:-$ROOT/build/qemu.img}
8+
VM_MEMORY=${VM_MEMORY:-8G}
9+
VM_MAX_MEMORY=${VM_MAX_MEMORY:-16G}
10+
SMP=${SMP:-4}
11+
SSH_PORT=${SSH_PORT:-10022}
12+
CXL_BACKING=${CXL_BACKING:-/dev/shm/cxlmemsim_shared}
13+
CXL_LSA=${CXL_LSA:-/dev/shm/lsa0.raw}
14+
CXL_BACKING_SIZE=${CXL_BACKING_SIZE:-1G}
15+
CXL_LSA_SIZE=${CXL_LSA_SIZE:-256M}
16+
CXL_FMW_SIZE=${CXL_FMW_SIZE:-4G}
17+
18+
truncate -s "$CXL_BACKING_SIZE" "$CXL_BACKING"
19+
truncate -s "$CXL_LSA_SIZE" "$CXL_LSA"
20+
21+
export CXL_TRANSPORT_MODE=${CXL_TRANSPORT_MODE:-shm}
22+
export CXL_PGAS_SHM=${CXL_PGAS_SHM:-/cxlmemsim_pgas}
23+
export CXL_HOST_ID=${CXL_HOST_ID:-0}
24+
export CXL_MEMSIM_HOST=${CXL_MEMSIM_HOST:-127.0.0.1}
25+
export CXL_MEMSIM_PORT=${CXL_MEMSIM_PORT:-9999}
26+
27+
exec "$QEMU_BINARY" \
28+
--enable-kvm \
29+
-cpu qemu64,+xsave,+rdtscp,+avx,+avx2,+sse4.1,+sse4.2,+clflushopt \
30+
-m "$VM_MEMORY",maxmem="$VM_MAX_MEMORY",slots=8 \
31+
-smp "$SMP" \
32+
-M q35,cxl=on \
33+
-kernel "$KERNEL" \
34+
-append "root=/dev/sda rw console=ttyS0,115200 nokaslr" \
35+
-drive file="$DISK_IMAGE",index=0,media=disk,format=raw \
36+
-netdev user,id=net0,hostfwd=tcp:127.0.0.1:"$SSH_PORT"-:22 \
37+
-device virtio-net-pci,netdev=net0,mac=52:54:00:00:10:22 \
38+
-fsdev local,security_model=none,id=fsdev0,path="$ROOT" \
39+
-device virtio-9p-pci,id=fs0,fsdev=fsdev0,mount_tag=hostrepo,bus=pcie.0 \
40+
-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
41+
-device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=0 \
42+
-device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0,sn=0x1 \
43+
-object memory-backend-file,id=cxl-mem1,share=on,mem-path="$CXL_BACKING",size="$CXL_BACKING_SIZE" \
44+
-object memory-backend-file,id=cxl-lsa1,share=on,mem-path="$CXL_LSA",size="$CXL_LSA_SIZE" \
45+
-M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size="$CXL_FMW_SIZE" \
46+
-nographic
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#!/bin/bash
2+
# Launch QEMU with N CXL Type-2 accelerator contexts attached to one host VM.
3+
# Used by the Splash multi-GPU scaling sweep.
4+
#
5+
# Environment:
6+
# NUM_TYPE2 - Number of Type-2 accelerators to attach (1, 2, 4, 6, 8).
7+
# Default: 1.
8+
# QEMU_BINARY - Path to qemu-system-x86_64.
9+
# DISK_IMAGE - Path to the guest rootfs image (relative CWD).
10+
# KERNEL_IMAGE - Path to bzImage.
11+
# SSH_PORT - hostfwd guest:22 -> host:${SSH_PORT}. Default 10022.
12+
# HETGPU_BACKEND - 0 auto / 3 nvidia / 5 simulation. Default 5.
13+
# HETGPU_LIB - Path to libnvcuda.so.
14+
# CXL_TYPE2_CACHE_SIZE / CXL_TYPE2_MEM_SIZE - per-device sizes.
15+
# DIRECTORY_ENTRIES - snoop filter capacity (0 uses device default).
16+
# MONITOR_SOCK - QEMU monitor unix socket path.
17+
# VM_LOG - File to redirect serial console + stderr.
18+
19+
set -e
20+
21+
NUM_TYPE2=${NUM_TYPE2:-1}
22+
QEMU_BINARY=${QEMU_BINARY:-/home/victoryang00/CXLMemSim/lib/qemu/build/qemu-system-x86_64}
23+
DISK_IMAGE=${DISK_IMAGE:-qemu1.img}
24+
DISK_IMAGE_PATH=${DISK_IMAGE_PATH:-}
25+
# If no absolute path provided, default to <repo>/build/<DISK_IMAGE> which is
26+
# the canonical location, independent of the invoker's working directory.
27+
if [ -z "${DISK_IMAGE_PATH}" ]; then
28+
DISK_IMAGE_PATH="/home/victoryang00/CXLMemSim/build/${DISK_IMAGE}"
29+
fi
30+
KERNEL_IMAGE=${KERNEL_IMAGE:-/home/victoryang00/cxl/arch/x86/boot/bzImage}
31+
SSH_PORT=${SSH_PORT:-10022}
32+
HETGPU_BACKEND=${HETGPU_BACKEND:-5}
33+
HETGPU_LIB=${HETGPU_LIB:-/home/victoryang00/hetGPU/target/debug/libnvcuda.so}
34+
CXL_TYPE2_CACHE_SIZE=${CXL_TYPE2_CACHE_SIZE:-128M}
35+
CXL_TYPE2_MEM_SIZE=${CXL_TYPE2_MEM_SIZE:-4G}
36+
DIRECTORY_ENTRIES=${DIRECTORY_ENTRIES:-0}
37+
MONITOR_SOCK=${MONITOR_SOCK:-/tmp/qemu-mon.sock}
38+
VM_LOG=${VM_LOG:-/home/victoryang00/CXLMemSim/artifact/splash_sweep/vm1.log}
39+
CXL_MEMSIM_HOST=${CXL_MEMSIM_HOST:-127.0.0.1}
40+
CXL_MEMSIM_PORT=${CXL_MEMSIM_PORT:-9999}
41+
42+
if ! [[ "$NUM_TYPE2" =~ ^[0-9]+$ ]] || [ "$NUM_TYPE2" -lt 1 ] || [ "$NUM_TYPE2" -gt 8 ]; then
43+
echo "NUM_TYPE2 must be an integer between 1 and 8" >&2
44+
exit 1
45+
fi
46+
47+
RP_OPTS=()
48+
T2_OPTS=()
49+
for i in $(seq 0 $((NUM_TYPE2-1))); do
50+
PORT=$((13 + i))
51+
RP_ID="root_port${PORT}"
52+
T2_ID="cxl-type2-hetgpu${i}"
53+
SN=$(printf "0x%x" $((0x10 + i)))
54+
RP_OPTS+=( -device "cxl-rp,port=${i},bus=cxl.1,id=${RP_ID},chassis=0,slot=${i}" )
55+
T2_OPTS+=( -device "cxl-type2,bus=${RP_ID},cache-size=${CXL_TYPE2_CACHE_SIZE},mem-size=${CXL_TYPE2_MEM_SIZE},sn=${SN},cxlmemsim-addr=${CXL_MEMSIM_HOST},cxlmemsim-port=${CXL_MEMSIM_PORT},coherency-enabled=true,gpu-mode=2,hetgpu-lib=${HETGPU_LIB},hetgpu-device=0,hetgpu-backend=${HETGPU_BACKEND},directory-entries=${DIRECTORY_ENTRIES},id=${T2_ID}" )
56+
done
57+
58+
exec "${QEMU_BINARY}" \
59+
--enable-kvm -cpu qemu64,+xsave,+rdtscp,+avx,+avx2,+sse4.1,+sse4.2,+avx512f,+avx512dq,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi,+clflushopt \
60+
-kernel "${KERNEL_IMAGE}" \
61+
-append "root=/dev/vda rw console=ttyS0,115200 nokaslr systemd.mask=cxl-numa-setup.service" \
62+
-netdev "user,id=net0,hostfwd=tcp::${SSH_PORT}-:22" \
63+
-device virtio-net-pci,netdev=net0,mac=52:54:00:00:00:02 \
64+
-device virtio-rng-pci \
65+
-drive file="${DISK_IMAGE_PATH:-./${DISK_IMAGE}}",if=none,id=disk0,format=raw \
66+
-device virtio-blk-pci,drive=disk0,bus=pcie.0 \
67+
-M q35,cxl=on -m 16G,maxmem=32G,slots=8 -smp 4 \
68+
-device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \
69+
"${RP_OPTS[@]}" \
70+
"${T2_OPTS[@]}" \
71+
-monitor "unix:${MONITOR_SOCK},server,nowait" \
72+
-D /dev/null \
73+
-nographic > "${VM_LOG}" 2>&1

qemu_integration/setup_cxl_numa.sh

Lines changed: 79 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,21 @@ CXL_DAX_MODE=${CXL_DAX_MODE:-devdax}
2020
CXL_TOUCH_DAX=${CXL_TOUCH_DAX:-${ZETTAI_TOUCH_DAX:-0}}
2121
CXL_CREATE_NDCTL_NAMESPACE=${CXL_CREATE_NDCTL_NAMESPACE:-0}
2222
CXL_CONFIGURE_NET=${CXL_CONFIGURE_NET:-0}
23-
CXL_NET_IFACE=${CXL_NET_IFACE:-enp0s2}
24-
CXL_NET_ADDR=${CXL_NET_ADDR:-192.168.100.10/24}
23+
CXL_HOST_ID=${CXL_HOST_ID:-${ZETTAI_HOST_ID:-0}}
24+
CXL_NET_IFACE=${CXL_NET_IFACE:-auto}
25+
CXL_NET_BASE=${CXL_NET_BASE:-192.168.100}
26+
CXL_NET_PREFIX=${CXL_NET_PREFIX:-24}
27+
CXL_NET_HOST_OFFSET=${CXL_NET_HOST_OFFSET:-10}
28+
if [[ -z "${CXL_NET_ADDR:-}" ]]; then
29+
if [[ "$CXL_HOST_ID" =~ ^[0-9]+$ && "$CXL_NET_HOST_OFFSET" =~ ^[0-9]+$ ]]; then
30+
CXL_NET_ADDR="${CXL_NET_BASE}.$((CXL_NET_HOST_OFFSET + CXL_HOST_ID))/${CXL_NET_PREFIX}"
31+
else
32+
CXL_NET_ADDR="${CXL_NET_BASE}.${CXL_NET_HOST_OFFSET}/${CXL_NET_PREFIX}"
33+
fi
34+
fi
2535
CXL_NET_GW=${CXL_NET_GW:-192.168.100.1}
36+
CXL_NET_WAIT_RETRIES=${CXL_NET_WAIT_RETRIES:-10}
37+
CXL_NET_WAIT_DELAY=${CXL_NET_WAIT_DELAY:-0.2}
2638
MAX_RETRIES=${MAX_RETRIES:-20}
2739
RETRY_DELAY=${RETRY_DELAY:-2}
2840

@@ -44,6 +56,9 @@ Environment:
4456
CXL_TOUCH_DAX mmap and touch /dev/daxX.Y, default: 0
4557
CXL_CREATE_NDCTL_NAMESPACE Legacy ndctl namespace path, default: 0
4658
CXL_CONFIGURE_NET Configure static guest network, default: 0
59+
CXL_HOST_ID Node id used for default IP, default: 0
60+
CXL_NET_IFACE Interface name or auto, default: auto
61+
CXL_NET_ADDR Static address, default: 192.168.100.(10+CXL_HOST_ID)/24
4762
EOF
4863
}
4964

@@ -304,9 +319,68 @@ configure_network() {
304319
return 0
305320
fi
306321

307-
ip link set "$CXL_NET_IFACE" up
308-
ip addr add "$CXL_NET_ADDR" dev "$CXL_NET_IFACE" 2>/dev/null || true
309-
ip route add default via "$CXL_NET_GW" 2>/dev/null || true
322+
if ! command -v ip >/dev/null 2>&1; then
323+
log "WARNING: ip command is not installed; skipping network setup"
324+
return 0
325+
fi
326+
327+
local iface=""
328+
if ! iface=$(wait_for_net_iface); then
329+
log "WARNING: no network interface available for CXL_NET_IFACE=$CXL_NET_IFACE; skipping network setup"
330+
return 0
331+
fi
332+
333+
log "Configuring guest network: iface=$iface addr=$CXL_NET_ADDR gw=$CXL_NET_GW"
334+
if ! ip link set "$iface" up; then
335+
log "WARNING: failed to bring $iface up; skipping network setup"
336+
return 0
337+
fi
338+
339+
ip addr add "$CXL_NET_ADDR" dev "$iface" 2>/dev/null || true
340+
if [[ -n "$CXL_NET_GW" ]]; then
341+
ip route add default via "$CXL_NET_GW" 2>/dev/null || true
342+
fi
343+
}
344+
345+
detect_net_iface() {
346+
if [[ "$CXL_NET_IFACE" != "auto" ]]; then
347+
if ip link show dev "$CXL_NET_IFACE" >/dev/null 2>&1; then
348+
echo "$CXL_NET_IFACE"
349+
return 0
350+
fi
351+
return 1
352+
fi
353+
354+
local iface=""
355+
iface=$(ip -o link show up 2>/dev/null |
356+
awk -F': ' '$2 != "lo" { sub(/@.*/, "", $2); print $2; exit }')
357+
if [[ -z "$iface" ]]; then
358+
iface=$(ip -o link show 2>/dev/null |
359+
awk -F': ' '$2 != "lo" { sub(/@.*/, "", $2); print $2; exit }')
360+
fi
361+
362+
if [[ -n "$iface" ]]; then
363+
echo "$iface"
364+
return 0
365+
fi
366+
return 1
367+
}
368+
369+
wait_for_net_iface() {
370+
local retries=0
371+
local iface=""
372+
373+
while [[ $retries -lt $CXL_NET_WAIT_RETRIES ]]; do
374+
if iface=$(detect_net_iface); then
375+
echo "$iface"
376+
return 0
377+
fi
378+
log "Waiting for guest network interface... (attempt $((retries + 1))/$CXL_NET_WAIT_RETRIES)"
379+
sleep "$CXL_NET_WAIT_DELAY"
380+
retries=$((retries + 1))
381+
done
382+
383+
return 1
310384
}
311385

312386
main() {
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#!/usr/bin/env bash
2+
set -euo pipefail
3+
4+
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)
5+
SCRIPT="$SCRIPT_DIR/setup_cxl_numa.sh"
6+
TMP_DIR=$(mktemp -d)
7+
trap 'rm -rf "$TMP_DIR"' EXIT
8+
9+
STUB_DIR="$TMP_DIR/bin"
10+
STATE_DIR="$TMP_DIR/state"
11+
mkdir -p "$STUB_DIR" "$STATE_DIR"
12+
13+
cat >"$STUB_DIR/cxl" <<'STUB'
14+
#!/usr/bin/env bash
15+
set -euo pipefail
16+
17+
case "$*" in
18+
"list -M"*)
19+
echo '[{"memdev":"mem0"}]'
20+
;;
21+
"list -B -D"*)
22+
echo '[{"decoder":"decoder0.0","volatile_capable":true,"max_available_extent":268435456}]'
23+
;;
24+
"list -R"*)
25+
if [[ -f "$CXL_TEST_STATE/region_created" ]]; then
26+
echo '[{"region":"region0"}]'
27+
else
28+
echo '[]'
29+
fi
30+
;;
31+
create-region*)
32+
touch "$CXL_TEST_STATE/region_created"
33+
echo '{"region":"region0"}'
34+
;;
35+
list*)
36+
echo '[]'
37+
;;
38+
*)
39+
echo "unexpected cxl invocation: $*" >&2
40+
exit 2
41+
;;
42+
esac
43+
STUB
44+
45+
cat >"$STUB_DIR/daxctl" <<'STUB'
46+
#!/usr/bin/env bash
47+
set -euo pipefail
48+
49+
case "${1:-}" in
50+
list)
51+
echo '[{"chardev":"dax0.0"}]'
52+
;;
53+
create-device)
54+
echo '{"chardev":"dax0.0"}'
55+
;;
56+
reconfigure-device)
57+
exit 0
58+
;;
59+
*)
60+
echo "unexpected daxctl invocation: $*" >&2
61+
exit 2
62+
;;
63+
esac
64+
STUB
65+
66+
cat >"$STUB_DIR/ip" <<'STUB'
67+
#!/usr/bin/env bash
68+
set -euo pipefail
69+
70+
printf '%s\n' "$*" >>"$CXL_TEST_STATE/ip.calls"
71+
72+
case "$*" in
73+
"-o link show up"|"-o link show")
74+
echo '2: enp0s2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP mode DEFAULT group default qlen 1000'
75+
;;
76+
link\ set\ *|addr\ add\ *|route\ add\ *)
77+
exit 0
78+
;;
79+
*)
80+
echo "unexpected ip invocation: $*" >&2
81+
exit 2
82+
;;
83+
esac
84+
STUB
85+
86+
cat >"$STUB_DIR/modprobe" <<'STUB'
87+
#!/usr/bin/env bash
88+
exit 0
89+
STUB
90+
91+
cat >"$STUB_DIR/udevadm" <<'STUB'
92+
#!/usr/bin/env bash
93+
exit 0
94+
STUB
95+
96+
cat >"$STUB_DIR/numactl" <<'STUB'
97+
#!/usr/bin/env bash
98+
echo 'available: 1 nodes (0)'
99+
STUB
100+
101+
chmod +x "$STUB_DIR"/*
102+
103+
export PATH="$STUB_DIR:$PATH"
104+
export CXL_TEST_STATE="$STATE_DIR"
105+
106+
LOG_FILE="$TMP_DIR/cxl_numa_setup.log"
107+
108+
LOG_FILE="$LOG_FILE" \
109+
MAX_RETRIES=1 \
110+
RETRY_DELAY=0 \
111+
CXL_CONFIGURE_NET=1 \
112+
CXL_HOST_ID=1 \
113+
bash "$SCRIPT" >"$TMP_DIR/script.stdout" 2>"$TMP_DIR/script.stderr"
114+
115+
if ! grep -q 'addr add 192.168.100.11/24 dev enp0s2' "$STATE_DIR/ip.calls"; then
116+
echo "FAIL: secondary host id should default to 192.168.100.11/24" >&2
117+
echo "ip calls:" >&2
118+
cat "$STATE_DIR/ip.calls" >&2
119+
echo "setup log:" >&2
120+
cat "$LOG_FILE" >&2
121+
exit 1
122+
fi
123+
124+
echo "OK: setup_cxl_numa secondary network defaults"

0 commit comments

Comments
 (0)