Skip to content

Commit 3be22eb

Browse files
authored
Merge pull request #660 from jschoiRR/mold-main#2025
[Mold Agent] HA 기능 개선(rbd, clvm, gfs) - messges 로그 추가, rbd watcher 상태를 조회, clvm vg 조회 방식 변경
2 parents 21b11b9 + f0b0618 commit 3be22eb

17 files changed

Lines changed: 391 additions & 365 deletions

File tree

agent/bindir/libvirtqemuhook.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,8 @@ if __name__ == '__main__':
293293
logger.error('The given action: %s, is not a valid libvirt qemu operation.' % action)
294294
sys.exit(0)
295295

296-
# if action == "prepare" and status == "begin":
297-
# getOwnership()
296+
if action == "prepare" and status == "begin":
297+
getOwnership()
298298

299299
if action == "migrate" and status == "begin":
300300
handleMigrateBegin()

agent/src/main/java/com/cloud/agent/properties/AgentProperties.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ public Property<Integer> getWorkers() {
806806
* Data type: Long.<br>
807807
* Default value: <code>60000l</code>
808808
*/
809-
public static final Property<Long> KVM_HEARTBEAT_CHECKER_FREQUENCY = new Property<>("kvm.heartbeat.checker.timeout", 60000L);
809+
public static final Property<Long> KVM_HEARTBEAT_CHECKER_FREQUENCY = new Property<>("kvm.heartbeat.checker.frequency", 60000L);
810810

811811
/**
812812
* Keystore passphrase

api/src/main/java/org/apache/cloudstack/ha/HAConfig.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ public String getDescription() {
116116
FSM.addTransition(Checking, Event.TooFewActivityCheckSamples, Suspect);
117117
FSM.addTransition(Checking, Event.ActivityCheckFailureUnderThresholdRatio, Degraded);
118118
FSM.addTransition(Checking, Event.ActivityCheckFailureOverThresholdRatio, Recovering);
119+
FSM.addTransition(Checking, Event.HealthCheckPassed, Available);
119120

120121
FSM.addTransition(Degraded, Event.Disabled, Disabled);
121122
FSM.addTransition(Degraded, Event.Ineligible, Ineligible);

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4152,7 +4152,6 @@ private Map<String, String> getVersionStrings() {
41524152

41534153
@Override
41544154
public StartupCommand[] initialize() {
4155-
Script.runSimpleBashScript("rbd rm MOLD-AC");
41564155

41574156
final KVMHostInfo info = new KVMHostInfo(dom0MinMem, dom0OvercommitMem, manualCpuSpeed, dom0MinCpuCores);
41584157
calculateHostCpuMaxCapacity(info.getAllocatableCpus(), info.getCpuSpeed());

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/storage/LibvirtStoragePool.java

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -569,16 +569,14 @@ public Boolean checkingHeartBeatRBD(HAStoragePool pool, HostTO host, String volu
569569
logger.info("### [HA Checking] checkingHeartBeatRBD Method Start!!!");
570570
boolean validResult = false;
571571
Script cmd = new Script(getHearthBeatPath(), HeartBeatCheckerTimeout, logger);
572-
if (pool.getPool().getType() == StoragePoolType.RBD) {
573-
cmd.add("-i", pool.getPoolSourceHost());
574-
cmd.add("-p", pool.getPoolMountSourcePath());
575-
cmd.add("-n", pool.getPoolAuthUserName());
576-
cmd.add("-s", pool.getPoolUUID());
577-
cmd.add("-h", host.getPrivateNetwork().getIp());
578-
cmd.add("-u", volumeList.length() > 0 ? volumeList : "");
579-
cmd.add("-r", "r");
580-
cmd.add("-t", String.valueOf(HeartBeatCheckerFreq / 1000));
581-
}
572+
cmd.add("-i", pool.getPoolSourceHost());
573+
cmd.add("-p", pool.getPoolMountSourcePath());
574+
cmd.add("-n", pool.getPoolAuthUserName());
575+
cmd.add("-s", pool.getPoolUUID());
576+
cmd.add("-h", host.getPrivateNetwork().getIp());
577+
// cmd.add("-u", volumeList.length() > 0 ? volumeList : "");
578+
cmd.add("-r", "r");
579+
cmd.add("-t", String.valueOf(HeartBeatCheckerFreq / 1000));
582580

583581
OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser();
584582
String result = cmd.execute(parser);

scripts/vm/hypervisor/kvm/kvmheartbeat_clvm.sh

Lines changed: 75 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
# to you under the Apache License, Version 2.0 (the
77
# "License"); you may not use this file except in compliance
88
# with the License. You may obtain a copy of the License at
9-
#
9+
#
1010
# http://www.apache.org/licenses/LICENSE-2.0
11-
#
11+
#
1212
# Unless required by applicable law or agreed to in writing,
1313
# software distributed under the License is distributed on an
1414
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -38,36 +38,35 @@ interval=0
3838
rflag=0
3939
cflag=0
4040

41-
while getopts 'p:n:g:h:q:t:rc' OPTION
42-
do
41+
while getopts 'p:n:g:h:q:t:rc' OPTION; do
4342
case $OPTION in
4443
p)
45-
RbdPoolName="$OPTARG"
46-
;;
44+
RbdPoolName="$OPTARG"
45+
;;
4746
n)
48-
RbdPoolAuthUserName="$OPTARG"
49-
;;
47+
RbdPoolAuthUserName="$OPTARG"
48+
;;
5049
g)
51-
GfsPoolPath="$OPTARG"
52-
;;
50+
GfsPoolPath="$OPTARG"
51+
;;
5352
h)
54-
HostIP="$OPTARG"
55-
;;
53+
HostIP="$OPTARG"
54+
;;
5655
q)
57-
poolPath="$OPTARG"
58-
;;
56+
poolPath="$OPTARG"
57+
;;
5958
t)
60-
interval="$OPTARG"
61-
;;
59+
interval="$OPTARG"
60+
;;
6261
r)
63-
rflag=1
64-
;;
62+
rflag=1
63+
;;
6564
c)
66-
cflag=1
67-
;;
65+
cflag=1
66+
;;
6867
*)
69-
help
70-
;;
68+
help
69+
;;
7170
esac
7271
done
7372

@@ -78,80 +77,97 @@ hbFile=$hbFolder/$HostIP-$poolPath
7877

7978
write_hbLog() {
8079
#write the heart beat log
81-
path=$(pvs 2>/dev/null | grep -w $poolPath | awk '{print $1}' | sed 's/[0-9]//g')
80+
path=$(grep 'device' /etc/lvm/backup/$poolPath | grep -oP '(?<=/dev/mapper/)[A-Za-z_-]+(?=[0-9])')
8281
persist=$(multipath -l $path | grep status=active)
83-
if [ $? -eq 0 ]
84-
then
82+
if [ $? -eq 0 ]; then
8583
Timestamp=$(date +%s)
84+
CurrentTime=$(date +"%Y-%m-%d %H:%M:%S")
85+
8686
if [ -n "$RbdPoolName" ]; then
87-
obj=$(rbd -p $RbdPoolName ls --id $RbdPoolAuthUserName | grep MOLD-HB)
87+
rbd -p $RbdPoolName ls --id $RbdPoolAuthUserName | grep -w MOLD-HB-$HostIP-$poolPath
8888
if [ $? -gt 0 ]; then
89-
rbd -p $RbdPoolName create --size 1 --id $RbdPoolAuthUserName MOLD-HB
89+
rbd -p $RbdPoolName create --size 1 --id $RbdPoolAuthUserName MOLD-HB-$HostIP-$poolPath
90+
fi
91+
92+
rbd -p $RbdPoolName --id $RbdPoolAuthUserName image-meta set MOLD-HB-$HostIP-$poolPath $HostIP-$poolPath $Timestamp
93+
ret=$?
94+
if [ $ret -eq 0 ]; then
95+
logger -p user.info -t MOLD-HA-HB "[Writing] 호스트:$HostIP | HB 파일 갱신(CLVM with RBD, 스토리지:$poolPath) > [현 시간:$CurrentTime]"
96+
else
97+
logger -p user.info -t MOLD-HA-HB "[Writing] 호스트:$HostIP | HB 파일 갱신(CLVM with RBD, 스토리지:$poolPath) > HB 갱신 실패!!!"
9098
fi
91-
obj=$(rbd -p $RbdPoolName --id $RbdPoolAuthUserName image-meta set MOLD-HB $HostIP-$poolPath $Timestamp)
92-
elif [ -n "$GfsPoolPath" ] ; then
93-
stat $hbFile &> /dev/null
94-
if [ $? -gt 0 ] ; then
95-
mkdir -p $hbFolder &> /dev/null
96-
touch $hbFile &> /dev/null
97-
if [ $? -gt 0 ] ; then
98-
printf "Failed to create $hbFile"
99-
return 2
100-
fi
99+
return $ret
100+
elif [ -n "$GfsPoolPath" ]; then
101+
stat $hbFile &>/dev/null
102+
if [ $? -gt 0 ]; then
103+
mkdir -p $hbFolder &>/dev/null
104+
touch $hbFile &>/dev/null
105+
if [ $? -gt 0 ]; then
106+
printf "Failed to create $hbFile"
107+
return 2
101108
fi
109+
fi
102110

103-
echo $Timestamp > $hbFile
104-
return $?
111+
echo "$Timestamp" >"$hbFile"
112+
ret=$?
113+
if [ $ret -eq 0 ]; then
114+
logger -p user.info -t MOLD-HA-HB "[Writing] 호스트:$HostIP | HB 파일 갱신(CLVM with GFS, 스토리지:$poolPath) > [현 시간:$CurrentTime]"
115+
else
116+
logger -p user.info -t MOLD-HA-HB "[Writing] 호스트:$HostIP | HB 파일 갱신(CLVM with GFS, 스토리지:$poolPath) > HB 갱신 실패!!!"
117+
fi
118+
return $ret
105119
else
120+
logger -p user.info -t MOLD-HA-HB "[Writing] 호스트:$HostIP | HB 파일 갱신(CLVM, 스토리지:$poolPath) 실패!!! > RBD 또는 GFS 형식의 스토리지가 존재하지 않습니다."
106121
printf "There is no storage information of type RBD or SharedMountPoint."
107122
return 0
108123
fi
109-
110-
if [ $? -gt 0 ]; then
111-
printf "Failed to create rbd file and set image-meta"
112-
return 2
113-
fi
114124
return 0
115125
fi
116126
}
117127

118128
check_hbLog() {
119-
#check the heart beat log
120-
now=$(date +%s)
121-
if [ -n "$RbdPoolName" ] ; then
122-
getHbTime=$(rbd -p $RbdPoolName --id $RbdPoolAuthUserName image-meta get MOLD-HB $HostIP-$poolPath)
129+
#check the heart beat log
130+
Timestamp=$(date +%s)
131+
CurrentTime=$(date +"%Y-%m-%d %H:%M:%S")
132+
133+
if [ -n "$RbdPoolName" ]; then
134+
getHbTime=$(rbd -p $RbdPoolName --id $RbdPoolAuthUserName image-meta get MOLD-HB-$HostIP-$poolPath $HostIP-$poolPath)
123135
if [ $? -gt 0 ] || [ -z "$getHbTime" ]; then
124136
return 1
125137
fi
126-
diff=$(expr $now - $getHbTime)
127-
elif [ -n "$GfsPoolPath" ] ; then
138+
diff=$(expr $Timestamp - $getHbTime)
139+
getHbTimeFmt=$(date -d @${getHbTime} '+%Y-%m-%d %H:%M:%S')
140+
logger -p user.info -t MOLD-HA-HB "[Checking] 호스트:$HostIP | HB 파일 체크(CLVM with RBD, 스토리지:$poolPath) > [현 시간:$CurrentTime | HB 파일 시간:$getHbTimeFmt | 시간 차이:$diff초]"
141+
142+
elif [ -n "$GfsPoolPath" ]; then
128143
getHbTime=$(cat $hbFile)
129-
diff=$(expr $now - $getHbTime)
144+
diff=$(expr $Timestamp - $getHbTime)
145+
getHbTimeFmt=$(date -d @${getHbTime} '+%Y-%m-%d %H:%M:%S')
146+
logger -p user.info -t MOLD-HA-HB "[Checking] 호스트:$HostIP | HB 파일 체크(CLVM with GFS, 스토리지:$poolPath) > [현 시간:$CurrentTime | HB 파일 시간:$getHbTimeFmt | 시간 차이:$diff초]"
130147
else
148+
logger -p user.info -t MOLD-HA-HB "[Checking] 호스트:$HostIP | HB 파일 체크(CLVM with RBD, 스토리지:$poolPath) 실패!!! > RBD 또는 GFS 형식의 스토리지가 존재하지 않습니다."
131149
printf "There is no storage information of type RBD or SharedMountPoint."
132150
return 0
133151
fi
134152

135153
if [ $diff -gt $interval ]; then
136-
return $diff
154+
logger -p user.info -t MOLD-HA-HB "[Result] 호스트:$HostIP | HB 체크 결과(CLVM, 스토리지:$poolPath) > [HOST STATE : DEAD]"
155+
echo "### [HOST STATE : DEAD] Set maximum interval: ($interval seconds), Actual difference: ($diff seconds) => Considered host down in [PoolType : CLVM] ###"
156+
else
157+
logger -p user.info -t MOLD-HA-HB "[Result] 호스트:$HostIP | HB 체크 결과(CLVM, 스토리지:$poolPath) > [HOST STATE : ALIVE]"
158+
echo "### [HOST STATE : ALIVE] in [PoolType : CLVM] ###"
137159
fi
138160
return 0
139161
}
140162

141163
if [ "$rflag" == "1" ]; then
142164
check_hbLog
143-
diff=$?
144-
if [ $diff == 0 ]; then
145-
echo "### [HOST STATE : ALIVE] in [PoolType : CLVM] ###"
146-
else
147-
echo "### [HOST STATE : DEAD] Set maximum interval: ($interval seconds), Actual difference: ($diff seconds) => Considered host down in [PoolType : CLVM] ###"
148-
fi
149165
exit 0
150166
elif [ "$cflag" == "1" ]; then
151167
/usr/bin/logger -t heartbeat "kvmheartbeat_clvm.sh will reboot system because it was unable to write the heartbeat to the storage."
152168
sync &
153169
sleep 5
154-
echo b > /proc/sysrq-trigger
170+
echo b >/proc/sysrq-trigger
155171
exit $?
156172
else
157173
write_hbLog

0 commit comments

Comments
 (0)