Skip to content

Commit 296835a

Browse files
committed
KVM: Enable HA heartbeat on ShareMountPoint
1 parent c3d6a8c commit 296835a

File tree

7 files changed

+244
-6
lines changed

7 files changed

+244
-6
lines changed

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434

3535
public class KVMHAMonitor extends KVMHABase implements Runnable {
3636

37+
public static final List<StoragePoolType> STORAGE_POOL_TYPES_WITH_HA_SUPPORT = List.of(StoragePoolType.NetworkFilesystem, StoragePoolType.SharedMountPoint);
38+
3739
private final Map<String, HAStoragePool> storagePool = new ConcurrentHashMap<>();
3840
private final boolean rebootHostAndAlertManagementOnHeartbeatTimeout;
3941

@@ -86,7 +88,7 @@ protected void runHeartBeat() {
8688
Set<String> removedPools = new HashSet<>();
8789
for (String uuid : storagePool.keySet()) {
8890
HAStoragePool primaryStoragePool = storagePool.get(uuid);
89-
if (primaryStoragePool.getPool().getType() == StoragePoolType.NetworkFilesystem) {
91+
if (STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(primaryStoragePool.getPool().getType())) {
9092
checkForNotExistingPools(removedPools, uuid);
9193
if (removedPools.contains(uuid)) {
9294
continue;

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckVMActivityOnStoragePoolCommandWrapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,9 @@ public Answer execute(final CheckVMActivityOnStoragePoolCommand command, final L
4747
final KVMStoragePoolManager storagePoolMgr = libvirtComputingResource.getStoragePoolMgr();
4848

4949
KVMStoragePool primaryPool = storagePoolMgr.getStoragePool(pool.getType(), pool.getUuid());
50+
primaryPool.setType(pool.getType());
5051

51-
if (primaryPool.isPoolSupportHA()){
52+
if (primaryPool.isPoolSupportHA()) {
5253
final HAStoragePool nfspool = monitor.getStoragePool(pool.getUuid());
5354
final KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, command.getHost(), command.getVolumeList(), libvirtComputingResource.getVmActivityCheckPath(), command.getSuspectTimeInSeconds());
5455
final Future<Boolean> future = executors.submit(ha);

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePool.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ default Long getUsedIops() {
9191

9292
public StoragePoolType getType();
9393

94+
default void setType(StoragePoolType type) {
95+
}
96+
9497
public boolean delete();
9598

9699
PhysicalDiskFormat getDefaultFormat();

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/KVMStoragePoolManager.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public class KVMStoragePoolManager {
5757
private final Map<String, StoragePoolInformation> _storagePools = new ConcurrentHashMap<String, StoragePoolInformation>();
5858
private final Map<String, StorageAdaptor> _storageMapper = new HashMap<String, StorageAdaptor>();
5959

60-
private StorageAdaptor getStorageAdaptor(StoragePoolType type) {
60+
public StorageAdaptor getStorageAdaptor(StoragePoolType type) {
6161
// type can be null: LibVirtComputingResource:3238
6262
if (type == null) {
6363
return _storageMapper.get("libvirt");
@@ -390,6 +390,7 @@ public KVMStoragePool createStoragePool(String name, String host, int port, Stri
390390
private synchronized KVMStoragePool createStoragePool(String name, String host, int port, String path, String userInfo, StoragePoolType type, Map<String, String> details, boolean primaryStorage) {
391391
StorageAdaptor adaptor = getStorageAdaptor(type);
392392
KVMStoragePool pool = adaptor.createStoragePool(name, host, port, path, userInfo, type, details, primaryStorage);
393+
pool.setType(type);
393394

394395
// LibvirtStorageAdaptor-specific statement
395396
if (pool.isPoolSupportHA() && primaryStorage) {

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStoragePool.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import com.cloud.agent.properties.AgentProperties;
3333
import com.cloud.agent.properties.AgentPropertiesFileHandler;
3434
import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool;
35+
import com.cloud.hypervisor.kvm.resource.KVMHAMonitor;
3536
import com.cloud.storage.Storage;
3637
import com.cloud.storage.Storage.StoragePoolType;
3738
import com.cloud.utils.exception.CloudRuntimeException;
@@ -320,13 +321,16 @@ public void setDetails(Map<String, String> details) {
320321

321322
@Override
322323
public boolean isPoolSupportHA() {
323-
return type == StoragePoolType.NetworkFilesystem;
324+
return KVMHAMonitor.STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type);
324325
}
325326

326327
public String getHearthBeatPath() {
327-
if (type == StoragePoolType.NetworkFilesystem) {
328+
if (StoragePoolType.NetworkFilesystem.equals(type)) {
328329
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
329330
return Script.findScript(kvmScriptsDir, "kvmheartbeat.sh");
331+
} else if (StoragePoolType.SharedMountPoint.equals(type)) {
332+
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
333+
return Script.findScript(kvmScriptsDir, "kvmsmpheartbeat.sh");
330334
}
331335
return null;
332336
}
@@ -410,4 +414,9 @@ public Boolean vmActivityCheck(HAStoragePool pool, HostTO host, Duration activit
410414
return true;
411415
}
412416
}
417+
418+
@Override
419+
public void setType(StoragePoolType type) {
420+
this.type = type;
421+
}
413422
}

plugins/storage/volume/default/src/main/java/org/apache/cloudstack/storage/datastore/driver/CloudStackPrimaryDataStoreDriverImpl.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import static com.cloud.utils.NumbersUtil.toHumanReadableSize;
2222

2323
import java.util.HashMap;
24+
import java.util.List;
2425
import java.util.Map;
2526
import java.util.UUID;
2627

@@ -100,6 +101,8 @@ public Map<String, String> getCapabilities() {
100101
protected Logger logger = LogManager.getLogger(getClass());
101102
private static final String NO_REMOTE_ENDPOINT_WITH_ENCRYPTION = "No remote endpoint to send command, unable to find a valid endpoint. Requires encryption support: %s";
102103

104+
private static final List<StoragePoolType> STORAGE_POOL_TYPES_WITH_HA_SUPPORT = List.of(StoragePoolType.NetworkFilesystem, StoragePoolType.SharedMountPoint);
105+
103106
@Inject
104107
DiskOfferingDao diskOfferingDao;
105108
@Inject
@@ -587,7 +590,7 @@ private boolean anyVolumeRequiresEncryption(DataObject ... objects) {
587590

588591
@Override
589592
public boolean isStorageSupportHA(StoragePoolType type) {
590-
return StoragePoolType.NetworkFilesystem == type;
593+
return type != null && STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type);
591594
}
592595

593596
@Override
Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
#!/bin/bash
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
help() {
20+
printf "Usage: $0
21+
-i identifier (ignored for local-only heartbeat)
22+
-p path (ignored for local-only heartbeat)
23+
-m mount point (local path where heartbeat will be written)
24+
-h host (host IP/name to include in heartbeat filename)
25+
-r write/read hb log (read-check mode)
26+
-c cleanup (trigger emergency reboot)
27+
-t interval between read hb log\n"
28+
exit 1
29+
}
30+
31+
#set -x
32+
NfsSvrIP=
33+
NfsSvrPath=
34+
MountPoint=
35+
HostIP=
36+
interval=
37+
rflag=0
38+
cflag=0
39+
40+
while getopts 'i:p:m:h:t:rc' OPTION
41+
do
42+
case $OPTION in
43+
i)
44+
NfsSvrIP="$OPTARG"
45+
;; # retained for CLI compatibility but unused for local-only script
46+
p)
47+
NfsSvrPath="$OPTARG"
48+
;; # retained for CLI compatibility but unused for local-only script
49+
m)
50+
MountPoint="$OPTARG"
51+
;;
52+
h)
53+
HostIP="$OPTARG"
54+
;;
55+
r)
56+
rflag=1
57+
;;
58+
t)
59+
interval="$OPTARG"
60+
;;
61+
c)
62+
cflag=1
63+
;;
64+
*)
65+
help
66+
;;
67+
esac
68+
done
69+
70+
# Match original kvmheartbeat.sh: require NfsSvrIP parameter for CLI compatibility
71+
if [ -z "$NfsSvrIP" ]
72+
then
73+
exit 1
74+
fi
75+
76+
# For local-only heartbeat we require a mountpoint
77+
if [ -z "$MountPoint" ]
78+
then
79+
echo "Mount point (-m) is required"
80+
help
81+
fi
82+
83+
# Ensure mount point exists and is writable
84+
if [ ! -d "$MountPoint" ]; then
85+
mkdir -p "$MountPoint" 2>/dev/null
86+
if [ $? -ne 0 ]; then
87+
echo "Failed to create mount point directory: $MountPoint" >&2
88+
exit 1
89+
fi
90+
fi
91+
92+
# Determine a sensible HostIP if not provided
93+
if [ -z "$HostIP" ]; then
94+
# try to get a non-loopback IPv4 address, fallback to hostname
95+
ipaddr=$(hostname -I 2>/dev/null | awk '{print $1}')
96+
if [ -n "$ipaddr" ]; then
97+
HostIP="$ipaddr"
98+
else
99+
HostIP=$(hostname)
100+
fi
101+
fi
102+
103+
#delete VMs on this mountpoint (best-effort)
104+
deleteVMs() {
105+
local mountPoint=$1
106+
vmPids=$(ps aux | grep qemu | grep "$mountPoint" | awk '{print $2}' 2> /dev/null)
107+
if [ $? -gt 0 ]
108+
then
109+
return
110+
fi
111+
112+
if [ -z "$vmPids" ]
113+
then
114+
return
115+
fi
116+
117+
for pid in $vmPids
118+
do
119+
kill -9 $pid &> /dev/null
120+
done
121+
}
122+
123+
#checking is there the mount point present under $MountPoint?
124+
mounts=$(cat /proc/mounts | grep "$MountPoint")
125+
if [ $? -gt 0 ]
126+
then
127+
# mount point not present — we don't remount in local-only script
128+
# nothing to do here; keep for compatibility with original flow
129+
:
130+
else
131+
# mount exists; if not in read-check mode, consider deleting VMs similar to original behavior
132+
if [ "$rflag" == "0" ]
133+
then
134+
deleteVMs $MountPoint
135+
fi
136+
fi
137+
138+
hbFolder="$MountPoint/KVMHA/"
139+
hbFile="$hbFolder/hb-$HostIP"
140+
141+
write_hbLog() {
142+
#write the heart beat log
143+
stat "$hbFile" &> /dev/null
144+
if [ $? -gt 0 ]
145+
then
146+
# create a new one
147+
mkdir -p "$hbFolder" &> /dev/null
148+
# touch will be done by atomic write below; ensure folder is writable
149+
if [ ! -w "$hbFolder" ]; then
150+
printf "Folder not writable: $hbFolder" >&2
151+
return 2
152+
fi
153+
fi
154+
155+
timestamp=$(date +%s)
156+
# Write atomically to avoid partial writes (write to tmp then mv)
157+
tmpfile="${hbFile}.$$"
158+
printf "%s\n" "$timestamp" > "$tmpfile" 2>/dev/null
159+
if [ $? -ne 0 ]; then
160+
printf "Failed to write heartbeat to $tmpfile" >&2
161+
return 2
162+
fi
163+
mv -f "$tmpfile" "$hbFile" 2>/dev/null
164+
return $?
165+
}
166+
167+
check_hbLog() {
168+
if [ ! -f "$hbFile" ]; then
169+
# signal large difference if file missing
170+
return 999999
171+
fi
172+
now=$(date +%s)
173+
hb=$(cat "$hbFile" 2>/dev/null)
174+
if [ -z "$hb" ]; then
175+
return 999998
176+
fi
177+
diff=`expr $now - $hb 2>/dev/null`
178+
if [ $? -ne 0 ]
179+
then
180+
return 999997
181+
fi
182+
if [ -z "$interval" ]; then
183+
# if no interval provided, consider 0 as success
184+
if [ $diff -gt 0 ]; then
185+
return $diff
186+
else
187+
return 0
188+
fi
189+
fi
190+
if [ $diff -gt $interval ]
191+
then
192+
return $diff
193+
fi
194+
return 0
195+
}
196+
197+
if [ "$rflag" == "1" ]
198+
then
199+
check_hbLog
200+
diff=$?
201+
if [ $diff == 0 ]
202+
then
203+
echo "=====> ALIVE <====="
204+
else
205+
echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======"
206+
fi
207+
exit 0
208+
elif [ "$cflag" == "1" ]
209+
then
210+
/usr/bin/logger -t heartbeat "kvmsmpheartbeat.sh will reboot system because it was unable to write the heartbeat to the storage."
211+
sync &
212+
sleep 5
213+
echo b > /proc/sysrq-trigger
214+
exit $?
215+
else
216+
write_hbLog
217+
exit $?
218+
fi
219+

0 commit comments

Comments
 (0)