Skip to content

Commit 26b0baa

Browse files
committed
update the ibs for AMD
1 parent 04d7a66 commit 26b0baa

14 files changed

Lines changed: 579 additions & 259 deletions

File tree

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,17 @@ The memory pool is managed by `SharedMemoryManager`. It can use POSIX shared mem
106106

107107
The server CLI uses local parse-result structs in `src/main_server.cc`, `src/main.cc`, and `src/rob.cc`, so command-line parsing is handled in-tree.
108108

109+
## CPU PMU Compatibility
110+
111+
The application-level simulator uses Linux `perf_event_open()` for sampling. Intel systems use PEBS for load-miss samples and Intel CHA PMUs when the CPU model has a known uncore path. AMD systems use the AMD IBS op PMU when `/sys/bus/event_source/devices/ibs_op/type` is available, then fall back to generic hardware cache-miss sampling if IBS or physical-address sampling is unavailable. Intel CHA counters and LBR accounting are optional on non-Intel systems; unsupported PMU paths are logged and disabled instead of aborting the run.
112+
113+
If monitor setup fails, check the CPU PMU support exposed by the kernel and the perf permission level:
114+
115+
```bash
116+
cat /proc/sys/kernel/perf_event_paranoid
117+
ls /sys/bus/event_source/devices/
118+
```
119+
109120
## Coherency and Distributed Memory
110121

111122
The distributed path is implemented in:

include/helper.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,11 @@ struct LBRElem {
106106
};
107107

108108
struct CPUInfo {
109-
uint32_t max_cpuid;
110-
uint32_t cpu_family;
111-
uint32_t cpu_model;
112-
uint32_t cpu_stepping;
109+
char vendor_id[16]{};
110+
uint32_t max_cpuid{};
111+
uint32_t cpu_family{};
112+
uint32_t cpu_model{};
113+
uint32_t cpu_stepping{};
113114
};
114115

115116
struct Elem {

include/lbr.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ struct lbr_sample {
4949
uint32_t tid;
5050
uint64_t timestamp;
5151
uint32_t cpu;
52-
uint64_t nr2;
52+
uint32_t cpu_reserved;
53+
uint64_t nr;
5354
lbr lbrs[32];
5455
cntr counters[32];
5556
};
@@ -63,6 +64,7 @@ class LBR {
6364
size_t rdlen{};
6465
size_t mplen{};
6566
bool use_pe2 = false;
67+
bool sample_has_branch_counters = false;
6668
perf_event_mmap_page *mp;
6769
explicit LBR(pid_t, uint64_t);
6870
~LBR();

include/monitor.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,7 @@ class Monitor {
7373
injected_delay(other.injected_delay), before(nullptr), // Will be set after copying elements
7474
after(nullptr), // Will be set after copying elements
7575
total_delay(other.total_delay), start_exec_ts(other.start_exec_ts), end_exec_ts(other.end_exec_ts),
76-
is_process(other.is_process), pebs_ctx(other.pebs_ctx ? new PEBS(*other.pebs_ctx) : nullptr),
77-
lbr_ctx(other.lbr_ctx ? new LBR(*other.lbr_ctx) : nullptr) {
76+
is_process(other.is_process), pebs_ctx(nullptr), lbr_ctx(nullptr) {
7877
status.store(other.status.load());
7978
std::copy(std::begin(other.elem), std::end(other.elem), std::begin(elem));
8079
before = &elem[0];

include/pebs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class PEBS {
3030
uint32_t seq{};
3131
size_t rdlen{};
3232
size_t mplen{};
33+
bool sample_has_phys_addr{};
3334
perf_event_mmap_page *mp;
3435
PEBS(pid_t, uint64_t);
3536
~PEBS();
Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,74 @@
11
#!/bin/bash
2-
# 重启两个VM使用共享内存
2+
# Restart two VMs with shared memory
33

44
echo "========================================="
5-
echo "重启VM以使用共享内存配置"
5+
echo "Restart VMs with shared-memory configuration"
66
echo "========================================="
77
echo ""
88

9-
# 找到QEMU进程并杀掉
10-
echo "[1/5] 停止现有VM..."
9+
# Find and stop QEMU processes
10+
echo "[1/5] Stopping existing VMs..."
1111
pkill -9 qemu-system-x86_64
1212
sleep 3
1313

14-
# 确保cxlmemsim_server正在运行
15-
echo "[2/5] 检查cxlmemsim_server..."
14+
# Ensure cxlmemsim_server is running
15+
echo "[2/5] Checking cxlmemsim_server..."
1616
if ! pgrep -x cxlmemsim_server > /dev/null; then
17-
echo " 错误: cxlmemsim_server未运行"
18-
echo " 请先启动server"
17+
echo " ERROR: cxlmemsim_server is not running"
18+
echo " Start the server first"
1919
exit 1
2020
fi
21-
echo " ✓ Server正在运行"
21+
echo " OK: server is running"
2222

23-
# 检查共享内存文件
24-
echo "[3/5] 检查共享内存文件..."
23+
# Check the shared-memory file
24+
echo "[3/5] Checking shared-memory file..."
2525
if [ ! -f /dev/shm/cxlmemsim_shared ]; then
26-
echo " 错误: /dev/shm/cxlmemsim_shared 不存在"
26+
echo " ERROR: /dev/shm/cxlmemsim_shared does not exist"
2727
exit 1
2828
fi
29-
echo " ✓ 共享内存文件存在: $(ls -lh /dev/shm/cxlmemsim_shared | awk '{print $5}')"
29+
echo " OK: shared-memory file exists: $(ls -lh /dev/shm/cxlmemsim_shared | awk '{print $5}')"
3030

31-
# 创建LSA文件(如果不存在)
32-
echo "[4/5] 准备LSA文件..."
31+
# Create LSA files if they do not exist
32+
echo "[4/5] Preparing LSA files..."
3333
for i in 0 1; do
3434
LSA_FILE="/tmp/lsa${i}.raw"
3535
if [ ! -f "$LSA_FILE" ]; then
36-
echo " 创建 $LSA_FILE..."
36+
echo " Creating $LSA_FILE..."
3737
truncate -s 256M "$LSA_FILE"
3838
fi
3939
done
40-
echo " ✓ LSA文件准备完成"
40+
echo " OK: LSA files are ready"
4141

42-
# 启动VM
43-
echo "[5/5] 启动VM..."
42+
# Start VMs
43+
echo "[5/5] Starting VMs..."
4444
echo ""
45-
echo "启动Node 0..."
45+
echo "Starting Node 0..."
4646
cd /home/yhgan913/CXLMemSim/qemu_integration
4747
nohup ./launch_qemu_cxl.sh > /tmp/qemu0.log 2>&1 &
4848
QEMU0_PID=$!
4949
echo " PID: $QEMU0_PID"
5050

5151
sleep 5
5252

53-
echo "启动Node 1..."
53+
echo "Starting Node 1..."
5454
nohup ./launch_qemu_cxl1.sh > /tmp/qemu1.log 2>&1 &
5555
QEMU1_PID=$!
5656
echo " PID: $QEMU1_PID"
5757

5858
echo ""
5959
echo "========================================="
60-
echo "VM已启动"
60+
echo "VMs started"
6161
echo "========================================="
6262
echo "Node 0 PID: $QEMU0_PID"
6363
echo "Node 1 PID: $QEMU1_PID"
6464
echo ""
65-
echo "查看日志:"
65+
echo "View logs:"
6666
echo " tail -f /tmp/qemu0.log"
6767
echo " tail -f /tmp/qemu1.log"
6868
echo ""
69-
echo "等待10秒让VM启动..."
69+
echo "Waiting 10 seconds for VM boot..."
7070
sleep 10
7171

72-
echo "检查VM状态..."
73-
ping -c 1 -W 2 192.168.100.10 > /dev/null && echo " Node 0 (192.168.100.10) 可达" || echo " Node 0 不可达"
74-
ping -c 1 -W 2 192.168.100.11 > /dev/null && echo " Node 1 (192.168.100.11) 可达" || echo " Node 1 不可达"
72+
echo "Checking VM status..."
73+
ping -c 1 -W 2 192.168.100.10 > /dev/null && echo " OK: Node 0 (192.168.100.10) reachable" || echo " FAIL: Node 0 unreachable"
74+
ping -c 1 -W 2 192.168.100.11 > /dev/null && echo " OK: Node 1 (192.168.100.11) reachable" || echo " FAIL: Node 1 unreachable"
Lines changed: 56 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,145 +1,145 @@
11
#!/bin/bash
2-
# 测试共享内存是否工作
2+
# Test whether shared memory is working
33

44
echo "========================================="
5-
echo "测试共享内存配置"
5+
echo "Testing shared-memory configuration"
66
echo "========================================="
77
echo ""
88

9-
# 1. 检查host共享内存
10-
echo "[1/6] 检查host共享内存文件..."
9+
# 1. Check host shared memory
10+
echo "[1/6] Checking host shared-memory file..."
1111
if [ ! -f /dev/shm/cxlmemsim_shared ]; then
12-
echo " ✗ 错误: /dev/shm/cxlmemsim_shared 不存在"
12+
echo " ERROR: /dev/shm/cxlmemsim_shared does not exist"
1313
exit 1
1414
fi
15-
echo " ✓ 文件存在: $(ls -lh /dev/shm/cxlmemsim_shared | awk '{print $5}')"
15+
echo " OK: file exists: $(ls -lh /dev/shm/cxlmemsim_shared | awk '{print $5}')"
1616

17-
# 2. 检查哪些进程在使用
18-
echo "[2/6] 检查使用共享内存的进程..."
17+
# 2. Check which processes are using it
18+
echo "[2/6] Checking processes using shared memory..."
1919
USERS=$(lsof /dev/shm/cxlmemsim_shared 2>/dev/null | tail -n +2 | awk '{print $1}' | sort -u)
2020
if [ -z "$USERS" ]; then
21-
echo " ✗ 警告: 没有进程在使用共享内存"
21+
echo " WARN: no process is using shared memory"
2222
else
23-
echo " ✓ 使用中的进程:"
23+
echo " OK: processes using shared memory:"
2424
echo "$USERS" | while read proc; do
2525
COUNT=$(lsof /dev/shm/cxlmemsim_shared 2>/dev/null | grep -c "$proc")
26-
echo " - $proc (${COUNT}个实例)"
26+
echo " - $proc (${COUNT} instances)"
2727
done
2828
fi
2929
echo ""
3030

31-
# 3. 检查VM可达性
32-
echo "[3/6] 检查VM网络连接..."
31+
# 3. Check VM reachability
32+
echo "[3/6] Checking VM network connectivity..."
3333
if ping -c 1 -W 2 192.168.100.10 > /dev/null 2>&1; then
34-
echo " Node 0 (192.168.100.10) 可达"
34+
echo " OK: Node 0 (192.168.100.10) reachable"
3535
NODE0_UP=1
3636
else
37-
echo " Node 0 (192.168.100.10) 不可达"
37+
echo " FAIL: Node 0 (192.168.100.10) unreachable"
3838
NODE0_UP=0
3939
fi
4040

4141
if ping -c 1 -W 2 192.168.100.11 > /dev/null 2>&1; then
42-
echo " Node 1 (192.168.100.11) 可达"
42+
echo " OK: Node 1 (192.168.100.11) reachable"
4343
NODE1_UP=1
4444
else
45-
echo " Node 1 (192.168.100.11) 不可达"
45+
echo " FAIL: Node 1 (192.168.100.11) unreachable"
4646
NODE1_UP=0
4747
fi
4848
echo ""
4949

5050
if [ $NODE0_UP -eq 0 ] || [ $NODE1_UP -eq 0 ]; then
51-
echo "错误: 一个或多个VM不可达,无法继续测试"
51+
echo "ERROR: one or more VMs are unreachable; cannot continue"
5252
exit 1
5353
fi
5454

55-
# 4. 检查DAX设备
56-
echo "[4/6] 检查VM内DAX设备..."
55+
# 4. Check DAX devices
56+
echo "[4/6] Checking DAX devices inside VMs..."
5757
NODE0_DAX=$(ssh root@192.168.100.10 "ls -l /dev/dax0.0 2>&1" | grep -c "^c")
5858
NODE1_DAX=$(ssh root@192.168.100.11 "ls -l /dev/dax0.0 2>&1" | grep -c "^c")
5959

6060
if [ $NODE0_DAX -eq 1 ]; then
61-
echo " Node 0: /dev/dax0.0 存在"
61+
echo " OK: Node 0: /dev/dax0.0 exists"
6262
else
63-
echo " Node 0: /dev/dax0.0 不存在"
63+
echo " FAIL: Node 0: /dev/dax0.0 does not exist"
6464
fi
6565

6666
if [ $NODE1_DAX -eq 1 ]; then
67-
echo " Node 1: /dev/dax0.0 存在"
67+
echo " OK: Node 1: /dev/dax0.0 exists"
6868
else
69-
echo " Node 1: /dev/dax0.0 不存在"
69+
echo " FAIL: Node 1: /dev/dax0.0 does not exist"
7070
fi
7171
echo ""
7272

73-
# 5. 测试共享内存写入/读取
74-
echo "[5/6] 测试共享内存读写..."
73+
# 5. Test shared-memory write/read
74+
echo "[5/6] Testing shared-memory write/read..."
7575
TEST_STRING="SHARED_MEMORY_TEST_$(date +%s)"
76-
echo " 写入测试字符串到Node 0: $TEST_STRING"
76+
echo " Writing test string to Node 0: $TEST_STRING"
7777

78-
# 在Node 0写入
78+
# Write on Node 0
7979
ssh root@192.168.100.10 "echo -n '$TEST_STRING' | dd of=/dev/dax0.0 bs=1 seek=1024 2>/dev/null"
8080
sleep 1
8181

82-
# 在Node 1读取
82+
# Read on Node 1
8383
RESULT=$(ssh root@192.168.100.11 "dd if=/dev/dax0.0 bs=1 skip=1024 count=${#TEST_STRING} 2>/dev/null")
8484

8585
if [ "$RESULT" = "$TEST_STRING" ]; then
86-
echo " ✓ 成功!Node 1 读取到Node 0写入的数据"
87-
echo " 写入: $TEST_STRING"
88-
echo " 读取: $RESULT"
86+
echo " OK: Node 1 read data written by Node 0"
87+
echo " wrote: $TEST_STRING"
88+
echo " read: $RESULT"
8989
SHARED_WORKS=1
9090
else
91-
echo " ✗ 失败!共享内存不工作"
92-
echo " 写入: $TEST_STRING"
93-
echo " 读取: $RESULT"
91+
echo " FAIL: shared memory is not working"
92+
echo " wrote: $TEST_STRING"
93+
echo " read: $RESULT"
9494
SHARED_WORKS=0
9595
fi
9696
echo ""
9797

98-
# 6. 测试反向(Node 1写,Node 0读)
99-
echo "[6/6] 测试反向读写..."
98+
# 6. Test reverse direction (Node 1 writes, Node 0 reads)
99+
echo "[6/6] Testing reverse write/read..."
100100
TEST_STRING2="REVERSE_TEST_$(date +%s)"
101-
echo " 写入测试字符串到Node 1: $TEST_STRING2"
101+
echo " Writing test string to Node 1: $TEST_STRING2"
102102

103-
# 在Node 1写入
103+
# Write on Node 1
104104
ssh root@192.168.100.11 "echo -n '$TEST_STRING2' | dd of=/dev/dax0.0 bs=1 seek=2048 2>/dev/null"
105105
sleep 1
106106

107-
# 在Node 0读取
107+
# Read on Node 0
108108
RESULT2=$(ssh root@192.168.100.10 "dd if=/dev/dax0.0 bs=1 skip=2048 count=${#TEST_STRING2} 2>/dev/null")
109109

110110
if [ "$RESULT2" = "$TEST_STRING2" ]; then
111-
echo " ✓ 成功!Node 0 读取到Node 1写入的数据"
112-
echo " 写入: $TEST_STRING2"
113-
echo " 读取: $RESULT2"
111+
echo " OK: Node 0 read data written by Node 1"
112+
echo " wrote: $TEST_STRING2"
113+
echo " read: $RESULT2"
114114
REVERSE_WORKS=1
115115
else
116-
echo " ✗ 失败!反向共享不工作"
117-
echo " 写入: $TEST_STRING2"
118-
echo " 读取: $RESULT2"
116+
echo " FAIL: reverse shared-memory path is not working"
117+
echo " wrote: $TEST_STRING2"
118+
echo " read: $RESULT2"
119119
REVERSE_WORKS=0
120120
fi
121121
echo ""
122122

123-
# 总结
123+
# Summary
124124
echo "========================================="
125-
echo "测试总结"
125+
echo "Test summary"
126126
echo "========================================="
127127
if [ $SHARED_WORKS -eq 1 ] && [ $REVERSE_WORKS -eq 1 ]; then
128-
echo "✓ 共享内存配置正确!"
128+
echo "OK: shared-memory configuration is correct"
129129
echo ""
130-
echo "可以运行Tigon多节点测试了:"
130+
echo "You can now run the Tigon multi-node test:"
131131
echo " cd /home/yhgan913/CXLMemSim/workloads/tigon"
132132
echo " export CXL_BACKEND=dax"
133133
echo " export CXL_MEMORY_RESOURCE=/dev/dax0.0"
134134
echo " ./scripts/run.sh TPCC TwoPLPasha 2 3 mixed 10 15 1 0 1 Clock OnDemand 200000000 1 WriteThrough None 30 10 BLACKHOLE 20000 0 0"
135135
exit 0
136136
else
137-
echo "✗ 共享内存配置有问题"
137+
echo "FAIL: shared-memory configuration has a problem"
138138
echo ""
139-
echo "故障排查:"
140-
echo " 1. 确认两个VM都使用 /dev/shm/cxlmemsim_shared"
141-
echo " 2. 检查 lsof /dev/shm/cxlmemsim_shared"
142-
echo " 3. 查看 /tmp/qemu0.log /tmp/qemu1.log"
143-
echo " 4. 重启VM: ./restart_vms_shared.sh"
139+
echo "Troubleshooting:"
140+
echo " 1. Confirm both VMs use /dev/shm/cxlmemsim_shared"
141+
echo " 2. Check lsof /dev/shm/cxlmemsim_shared"
142+
echo " 3. Review /tmp/qemu0.log and /tmp/qemu1.log"
143+
echo " 4. Restart VMs: ./restart_vms_shared.sh"
144144
exit 1
145145
fi

0 commit comments

Comments
 (0)