|
1 | 1 | #!/bin/bash |
2 | | -# 测试共享内存是否工作 |
| 2 | +# Test whether shared memory is working |
3 | 3 |
|
4 | 4 | echo "=========================================" |
5 | | -echo "测试共享内存配置" |
| 5 | +echo "Testing shared-memory configuration" |
6 | 6 | echo "=========================================" |
7 | 7 | echo "" |
8 | 8 |
|
9 | | -# 1. 检查host共享内存 |
10 | | -echo "[1/6] 检查host共享内存文件..." |
| 9 | +# 1. Check host shared memory |
| 10 | +echo "[1/6] Checking host shared-memory file..." |
11 | 11 | if [ ! -f /dev/shm/cxlmemsim_shared ]; then |
12 | | - echo " ✗ 错误: /dev/shm/cxlmemsim_shared 不存在" |
| 12 | + echo " ERROR: /dev/shm/cxlmemsim_shared does not exist" |
13 | 13 | exit 1 |
14 | 14 | fi |
15 | | -echo " ✓ 文件存在: $(ls -lh /dev/shm/cxlmemsim_shared | awk '{print $5}')" |
| 15 | +echo " OK: file exists: $(ls -lh /dev/shm/cxlmemsim_shared | awk '{print $5}')" |
16 | 16 |
|
17 | | -# 2. 检查哪些进程在使用 |
18 | | -echo "[2/6] 检查使用共享内存的进程..." |
| 17 | +# 2. Check which processes are using it |
| 18 | +echo "[2/6] Checking processes using shared memory..." |
19 | 19 | USERS=$(lsof /dev/shm/cxlmemsim_shared 2>/dev/null | tail -n +2 | awk '{print $1}' | sort -u) |
20 | 20 | if [ -z "$USERS" ]; then |
21 | | - echo " ✗ 警告: 没有进程在使用共享内存" |
| 21 | + echo " WARN: no process is using shared memory" |
22 | 22 | else |
23 | | - echo " ✓ 使用中的进程:" |
| 23 | + echo " OK: processes using shared memory:" |
24 | 24 | echo "$USERS" | while read proc; do |
25 | 25 | COUNT=$(lsof /dev/shm/cxlmemsim_shared 2>/dev/null | grep -c "$proc") |
26 | | - echo " - $proc (${COUNT}个实例)" |
| 26 | + echo " - $proc (${COUNT} instances)" |
27 | 27 | done |
28 | 28 | fi |
29 | 29 | echo "" |
30 | 30 |
|
31 | | -# 3. 检查VM可达性 |
32 | | -echo "[3/6] 检查VM网络连接..." |
| 31 | +# 3. Check VM reachability |
| 32 | +echo "[3/6] Checking VM network connectivity..." |
33 | 33 | if ping -c 1 -W 2 192.168.100.10 > /dev/null 2>&1; then |
34 | | - echo " ✓ Node 0 (192.168.100.10) 可达" |
| 34 | + echo " OK: Node 0 (192.168.100.10) reachable" |
35 | 35 | NODE0_UP=1 |
36 | 36 | else |
37 | | - echo " ✗ Node 0 (192.168.100.10) 不可达" |
| 37 | + echo " FAIL: Node 0 (192.168.100.10) unreachable" |
38 | 38 | NODE0_UP=0 |
39 | 39 | fi |
40 | 40 |
|
41 | 41 | if ping -c 1 -W 2 192.168.100.11 > /dev/null 2>&1; then |
42 | | - echo " ✓ Node 1 (192.168.100.11) 可达" |
| 42 | + echo " OK: Node 1 (192.168.100.11) reachable" |
43 | 43 | NODE1_UP=1 |
44 | 44 | else |
45 | | - echo " ✗ Node 1 (192.168.100.11) 不可达" |
| 45 | + echo " FAIL: Node 1 (192.168.100.11) unreachable" |
46 | 46 | NODE1_UP=0 |
47 | 47 | fi |
48 | 48 | echo "" |
49 | 49 |
|
50 | 50 | if [ $NODE0_UP -eq 0 ] || [ $NODE1_UP -eq 0 ]; then |
51 | | - echo "错误: 一个或多个VM不可达,无法继续测试" |
| 51 | + echo "ERROR: one or more VMs are unreachable; cannot continue" |
52 | 52 | exit 1 |
53 | 53 | fi |
54 | 54 |
|
55 | | -# 4. 检查DAX设备 |
56 | | -echo "[4/6] 检查VM内DAX设备..." |
| 55 | +# 4. Check DAX devices |
| 56 | +echo "[4/6] Checking DAX devices inside VMs..." |
57 | 57 | NODE0_DAX=$(ssh root@192.168.100.10 "ls -l /dev/dax0.0 2>&1" | grep -c "^c") |
58 | 58 | NODE1_DAX=$(ssh root@192.168.100.11 "ls -l /dev/dax0.0 2>&1" | grep -c "^c") |
59 | 59 |
|
60 | 60 | if [ $NODE0_DAX -eq 1 ]; then |
61 | | - echo " ✓ Node 0: /dev/dax0.0 存在" |
| 61 | + echo " OK: Node 0: /dev/dax0.0 exists" |
62 | 62 | else |
63 | | - echo " ✗ Node 0: /dev/dax0.0 不存在" |
| 63 | + echo " FAIL: Node 0: /dev/dax0.0 does not exist" |
64 | 64 | fi |
65 | 65 |
|
66 | 66 | if [ $NODE1_DAX -eq 1 ]; then |
67 | | - echo " ✓ Node 1: /dev/dax0.0 存在" |
| 67 | + echo " OK: Node 1: /dev/dax0.0 exists" |
68 | 68 | else |
69 | | - echo " ✗ Node 1: /dev/dax0.0 不存在" |
| 69 | + echo " FAIL: Node 1: /dev/dax0.0 does not exist" |
70 | 70 | fi |
71 | 71 | echo "" |
72 | 72 |
|
73 | | -# 5. 测试共享内存写入/读取 |
74 | | -echo "[5/6] 测试共享内存读写..." |
| 73 | +# 5. Test shared-memory write/read |
| 74 | +echo "[5/6] Testing shared-memory write/read..." |
75 | 75 | TEST_STRING="SHARED_MEMORY_TEST_$(date +%s)" |
76 | | -echo " 写入测试字符串到Node 0: $TEST_STRING" |
| 76 | +echo " Writing test string to Node 0: $TEST_STRING" |
77 | 77 |
|
78 | | -# 在Node 0写入 |
| 78 | +# Write on Node 0 |
79 | 79 | ssh root@192.168.100.10 "echo -n '$TEST_STRING' | dd of=/dev/dax0.0 bs=1 seek=1024 2>/dev/null" |
80 | 80 | sleep 1 |
81 | 81 |
|
82 | | -# 在Node 1读取 |
| 82 | +# Read on Node 1 |
83 | 83 | RESULT=$(ssh root@192.168.100.11 "dd if=/dev/dax0.0 bs=1 skip=1024 count=${#TEST_STRING} 2>/dev/null") |
84 | 84 |
|
85 | 85 | if [ "$RESULT" = "$TEST_STRING" ]; then |
86 | | - echo " ✓ 成功!Node 1 读取到Node 0写入的数据" |
87 | | - echo " 写入: $TEST_STRING" |
88 | | - echo " 读取: $RESULT" |
| 86 | + echo " OK: Node 1 read data written by Node 0" |
| 87 | + echo " wrote: $TEST_STRING" |
| 88 | + echo " read: $RESULT" |
89 | 89 | SHARED_WORKS=1 |
90 | 90 | else |
91 | | - echo " ✗ 失败!共享内存不工作" |
92 | | - echo " 写入: $TEST_STRING" |
93 | | - echo " 读取: $RESULT" |
| 91 | + echo " FAIL: shared memory is not working" |
| 92 | + echo " wrote: $TEST_STRING" |
| 93 | + echo " read: $RESULT" |
94 | 94 | SHARED_WORKS=0 |
95 | 95 | fi |
96 | 96 | echo "" |
97 | 97 |
|
98 | | -# 6. 测试反向(Node 1写,Node 0读) |
99 | | -echo "[6/6] 测试反向读写..." |
| 98 | +# 6. Test reverse direction (Node 1 writes, Node 0 reads) |
| 99 | +echo "[6/6] Testing reverse write/read..." |
100 | 100 | TEST_STRING2="REVERSE_TEST_$(date +%s)" |
101 | | -echo " 写入测试字符串到Node 1: $TEST_STRING2" |
| 101 | +echo " Writing test string to Node 1: $TEST_STRING2" |
102 | 102 |
|
103 | | -# 在Node 1写入 |
| 103 | +# Write on Node 1 |
104 | 104 | ssh root@192.168.100.11 "echo -n '$TEST_STRING2' | dd of=/dev/dax0.0 bs=1 seek=2048 2>/dev/null" |
105 | 105 | sleep 1 |
106 | 106 |
|
107 | | -# 在Node 0读取 |
| 107 | +# Read on Node 0 |
108 | 108 | RESULT2=$(ssh root@192.168.100.10 "dd if=/dev/dax0.0 bs=1 skip=2048 count=${#TEST_STRING2} 2>/dev/null") |
109 | 109 |
|
110 | 110 | if [ "$RESULT2" = "$TEST_STRING2" ]; then |
111 | | - echo " ✓ 成功!Node 0 读取到Node 1写入的数据" |
112 | | - echo " 写入: $TEST_STRING2" |
113 | | - echo " 读取: $RESULT2" |
| 111 | + echo " OK: Node 0 read data written by Node 1" |
| 112 | + echo " wrote: $TEST_STRING2" |
| 113 | + echo " read: $RESULT2" |
114 | 114 | REVERSE_WORKS=1 |
115 | 115 | else |
116 | | - echo " ✗ 失败!反向共享不工作" |
117 | | - echo " 写入: $TEST_STRING2" |
118 | | - echo " 读取: $RESULT2" |
| 116 | + echo " FAIL: reverse shared-memory path is not working" |
| 117 | + echo " wrote: $TEST_STRING2" |
| 118 | + echo " read: $RESULT2" |
119 | 119 | REVERSE_WORKS=0 |
120 | 120 | fi |
121 | 121 | echo "" |
122 | 122 |
|
123 | | -# 总结 |
| 123 | +# Summary |
124 | 124 | echo "=========================================" |
125 | | -echo "测试总结" |
| 125 | +echo "Test summary" |
126 | 126 | echo "=========================================" |
127 | 127 | if [ $SHARED_WORKS -eq 1 ] && [ $REVERSE_WORKS -eq 1 ]; then |
128 | | - echo "✓ 共享内存配置正确!" |
| 128 | + echo "OK: shared-memory configuration is correct" |
129 | 129 | echo "" |
130 | | - echo "可以运行Tigon多节点测试了:" |
| 130 | + echo "You can now run the Tigon multi-node test:" |
131 | 131 | echo " cd /home/yhgan913/CXLMemSim/workloads/tigon" |
132 | 132 | echo " export CXL_BACKEND=dax" |
133 | 133 | echo " export CXL_MEMORY_RESOURCE=/dev/dax0.0" |
134 | 134 | echo " ./scripts/run.sh TPCC TwoPLPasha 2 3 mixed 10 15 1 0 1 Clock OnDemand 200000000 1 WriteThrough None 30 10 BLACKHOLE 20000 0 0" |
135 | 135 | exit 0 |
136 | 136 | else |
137 | | - echo "✗ 共享内存配置有问题" |
| 137 | + echo "FAIL: shared-memory configuration has a problem" |
138 | 138 | echo "" |
139 | | - echo "故障排查:" |
140 | | - echo " 1. 确认两个VM都使用 /dev/shm/cxlmemsim_shared" |
141 | | - echo " 2. 检查 lsof /dev/shm/cxlmemsim_shared" |
142 | | - echo " 3. 查看 /tmp/qemu0.log 和 /tmp/qemu1.log" |
143 | | - echo " 4. 重启VM: ./restart_vms_shared.sh" |
| 139 | + echo "Troubleshooting:" |
| 140 | + echo " 1. Confirm both VMs use /dev/shm/cxlmemsim_shared" |
| 141 | + echo " 2. Check lsof /dev/shm/cxlmemsim_shared" |
| 142 | + echo " 3. Review /tmp/qemu0.log and /tmp/qemu1.log" |
| 143 | + echo " 4. Restart VMs: ./restart_vms_shared.sh" |
144 | 144 | exit 1 |
145 | 145 | fi |
0 commit comments