Skip to content

Commit be66eb2

Browse files
authored
Auto Enable/Disable KVM hosts (#7170)
* Auto Enable Disable KVM hosts * Improve health check result * Fix corner cases * Script path refactor * Fix sonar cloud reports * Fix last code smells * Add marvin tests * Fix new line on agent.properties to prevent host add failures * Send alert on auto-enable-disable and add annotations when the setting is enabled * Address reviews * Add a reason for enabling or disabling a host when the automatic feature is enabled * Fix comment on the marvin test description * Fix for disabling the feature if the admin has manually updated the host resource state before any health check result
1 parent 82a6a1f commit be66eb2

File tree

15 files changed

+688
-62
lines changed

15 files changed

+688
-62
lines changed

agent/conf/agent.properties

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,3 +398,7 @@ iscsi.session.cleanup.enabled=false
398398

399399
# The number of iothreads. There should be only 1 or 2 IOThreads per VM CPU (default is 1). The recommended number of iothreads is 1
400400
# iothreads=1
401+
402+
# The path of an executable file/script for host health check for CloudStack to Auto Disable/Enable the host
403+
# depending on the return value of the file/script
404+
# agent.health.check.script.path=

agent/src/main/java/com/cloud/agent/properties/AgentProperties.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,9 @@ public class AgentProperties{
312312
*/
313313
public static final Property<String> OPENVSWITCH_DPDK_OVS_PATH = new Property<>("openvswitch.dpdk.ovs.path", null, String.class);
314314

315+
public static final Property<String> HEALTH_CHECK_SCRIPT_PATH =
316+
new Property<>("agent.health.check.script.path", null, String.class);
317+
315318
/**
316319
* Sets the hypervisor type.<br>
317320
* Possible values: kvm | lxc <br>

api/src/main/java/com/cloud/resource/ResourceService.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ public interface ResourceService {
4949
*/
5050
Host updateHost(UpdateHostCmd cmd) throws NoTransitionException;
5151

52+
Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException;
53+
5254
Host cancelMaintenance(CancelMaintenanceCmd cmd);
5355

5456
Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException;

api/src/main/java/org/apache/cloudstack/api/ApiConstants.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ public class ApiConstants {
10201020
public static final String PUBLIC_MTU = "publicmtu";
10211021
public static final String PRIVATE_MTU = "privatemtu";
10221022
public static final String MTU = "mtu";
1023+
public static final String AUTO_ENABLE_KVM_HOST = "autoenablekvmhost";
10231024
public static final String LIST_APIS = "listApis";
10241025

10251026
/**

api/src/main/java/org/apache/cloudstack/api/command/admin/host/UpdateHostCmd.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import com.cloud.host.Host;
2020
import com.cloud.user.Account;
2121
import org.apache.cloudstack.acl.RoleType;
22-
import org.apache.cloudstack.annotation.AnnotationService;
2322
import org.apache.cloudstack.api.APICommand;
2423
import org.apache.cloudstack.api.ApiConstants;
2524
import org.apache.cloudstack.api.ApiErrorCode;
@@ -117,9 +116,6 @@ public void execute() {
117116
Host result;
118117
try {
119118
result = _resourceService.updateHost(this);
120-
if(getAnnotation() != null) {
121-
annotationService.addAnnotation(getAnnotation(), AnnotationService.EntityType.HOST, result.getUuid(), true);
122-
}
123119
HostResponse hostResponse = _responseGenerator.createHostResponse(result);
124120
hostResponse.setResponseName(getCommandName());
125121
this.setResponseObject(hostResponse);

core/src/main/java/com/cloud/agent/api/PingRoutingCommand.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ public class PingRoutingCommand extends PingCommand {
2929

3030
boolean _gatewayAccessible = true;
3131
boolean _vnetAccessible = true;
32+
private Boolean hostHealthCheckResult;
3233

3334
protected PingRoutingCommand() {
3435
}
@@ -57,4 +58,12 @@ public boolean isVnetAccessible() {
5758
public void setVnetAccessible(boolean vnetAccessible) {
5859
_vnetAccessible = vnetAccessible;
5960
}
61+
62+
public Boolean getHostHealthCheckResult() {
63+
return hostHealthCheckResult;
64+
}
65+
66+
public void setHostHealthCheckResult(Boolean hostHealthCheckResult) {
67+
this.hostHealthCheckResult = hostHealthCheckResult;
68+
}
6069
}

core/src/main/java/com/cloud/agent/api/StartupRoutingCommand.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ public class StartupRoutingCommand extends StartupCommand {
4444
List<String> hostTags = new ArrayList<String>();
4545
String hypervisorVersion;
4646
HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails = new HashMap<String, HashMap<String, VgpuTypesInfo>>();
47+
private Boolean hostHealthCheckResult;
4748

4849
public StartupRoutingCommand() {
4950
super(Host.Type.Routing);
@@ -188,4 +189,12 @@ public boolean getSupportsClonedVolumes() {
188189
public void setSupportsClonedVolumes(boolean supportsClonedVolumes) {
189190
this.supportsClonedVolumes = supportsClonedVolumes;
190191
}
192+
193+
public Boolean getHostHealthCheckResult() {
194+
return hostHealthCheckResult;
195+
}
196+
197+
public void setHostHealthCheckResult(Boolean hostHealthCheckResult) {
198+
this.hostHealthCheckResult = hostHealthCheckResult;
199+
}
191200
}

engine/components-api/src/main/java/com/cloud/agent/AgentManager.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@
3939
public interface AgentManager {
4040
static final ConfigKey<Integer> Wait = new ConfigKey<Integer>("Advanced", Integer.class, "wait", "1800", "Time in seconds to wait for control commands to return",
4141
true);
42+
ConfigKey<Boolean> EnableKVMAutoEnableDisable = new ConfigKey<>(Boolean.class,
43+
"enable.kvm.host.auto.enable.disable",
44+
"Advanced",
45+
"false",
46+
"(KVM only) Enable Auto Disable/Enable KVM hosts in the cluster " +
47+
"according to the hosts health check results",
48+
true, ConfigKey.Scope.Cluster, null);
4249

4350
public enum TapAgentsAction {
4451
Add, Del, Contains,

engine/orchestration/src/main/java/com/cloud/agent/manager/AgentManagerImpl.java

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
5252
import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao;
5353
import org.apache.cloudstack.utils.identity.ManagementServerNode;
54+
import org.apache.commons.lang3.BooleanUtils;
5455
import org.apache.log4j.Logger;
5556
import org.apache.log4j.MDC;
5657

@@ -1250,6 +1251,52 @@ public AgentHandler(final Task.Type type, final Link link, final byte[] data) {
12501251
super(type, link, data);
12511252
}
12521253

1254+
private void processHostHealthCheckResult(Boolean hostHealthCheckResult, long hostId) {
1255+
if (hostHealthCheckResult == null) {
1256+
return;
1257+
}
1258+
HostVO host = _hostDao.findById(hostId);
1259+
if (host == null) {
1260+
s_logger.error(String.format("Unable to find host with ID: %s", hostId));
1261+
return;
1262+
}
1263+
if (!BooleanUtils.toBoolean(EnableKVMAutoEnableDisable.valueIn(host.getClusterId()))) {
1264+
s_logger.debug(String.format("%s is disabled for the cluster %s, cannot process the health check result " +
1265+
"received for the host %s", EnableKVMAutoEnableDisable.key(), host.getClusterId(), host.getName()));
1266+
return;
1267+
}
1268+
1269+
ResourceState.Event resourceEvent = hostHealthCheckResult ? ResourceState.Event.Enable : ResourceState.Event.Disable;
1270+
1271+
try {
1272+
s_logger.info(String.format("Host health check %s, auto %s KVM host: %s",
1273+
hostHealthCheckResult ? "succeeds" : "fails",
1274+
hostHealthCheckResult ? "enabling" : "disabling",
1275+
host.getName()));
1276+
_resourceMgr.autoUpdateHostAllocationState(hostId, resourceEvent);
1277+
} catch (NoTransitionException e) {
1278+
s_logger.error(String.format("Cannot Auto %s host: %s", resourceEvent, host.getName()), e);
1279+
}
1280+
}
1281+
1282+
private void processStartupRoutingCommand(StartupRoutingCommand startup, long hostId) {
1283+
if (startup == null) {
1284+
s_logger.error("Empty StartupRoutingCommand received");
1285+
return;
1286+
}
1287+
Boolean hostHealthCheckResult = startup.getHostHealthCheckResult();
1288+
processHostHealthCheckResult(hostHealthCheckResult, hostId);
1289+
}
1290+
1291+
private void processPingRoutingCommand(PingRoutingCommand pingRoutingCommand, long hostId) {
1292+
if (pingRoutingCommand == null) {
1293+
s_logger.error("Empty PingRoutingCommand received");
1294+
return;
1295+
}
1296+
Boolean hostHealthCheckResult = pingRoutingCommand.getHostHealthCheckResult();
1297+
processHostHealthCheckResult(hostHealthCheckResult, hostId);
1298+
}
1299+
12531300
protected void processRequest(final Link link, final Request request) {
12541301
final AgentAttache attache = (AgentAttache)link.attachment();
12551302
final Command[] cmds = request.getCommands();
@@ -1291,6 +1338,7 @@ protected void processRequest(final Link link, final Request request) {
12911338
try {
12921339
if (cmd instanceof StartupRoutingCommand) {
12931340
final StartupRoutingCommand startup = (StartupRoutingCommand) cmd;
1341+
processStartupRoutingCommand(startup, hostId);
12941342
answer = new StartupAnswer(startup, attache.getId(), mgmtServiceConf.getPingInterval());
12951343
} else if (cmd instanceof StartupProxyCommand) {
12961344
final StartupProxyCommand startup = (StartupProxyCommand) cmd;
@@ -1322,6 +1370,7 @@ protected void processRequest(final Link link, final Request request) {
13221370
// if the router is sending a ping, verify the
13231371
// gateway was pingable
13241372
if (cmd instanceof PingRoutingCommand) {
1373+
processPingRoutingCommand((PingRoutingCommand) cmd, hostId);
13251374
final boolean gatewayAccessible = ((PingRoutingCommand)cmd).isGatewayAccessible();
13261375
final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId));
13271376

@@ -1748,8 +1797,8 @@ public String getConfigComponentName() {
17481797

17491798
@Override
17501799
public ConfigKey<?>[] getConfigKeys() {
1751-
return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize, DirectAgentPoolSize,
1752-
DirectAgentThreadCap };
1800+
return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, Wait, AlertWait, DirectAgentLoadSize,
1801+
DirectAgentPoolSize, DirectAgentThreadCap, EnableKVMAutoEnableDisable };
17531802
}
17541803

17551804
protected class SetHostParamsListener implements Listener {

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
322322
private String _dcId;
323323
private String _clusterId;
324324
private final Properties _uefiProperties = new Properties();
325+
private String hostHealthCheckScriptPath;
325326

326327
private long _hvVersion;
327328
private Duration _timeout;
@@ -717,6 +718,10 @@ protected enum BridgeType {
717718
NATIVE, OPENVSWITCH, TUNGSTEN
718719
}
719720

721+
protected enum HealthCheckResult {
722+
SUCCESS, FAILURE, IGNORE
723+
}
724+
720725
protected BridgeType _bridgeType;
721726

722727
protected StorageSubsystemCommandHandler storageHandler;
@@ -943,6 +948,12 @@ public boolean configure(final String name, final Map<String, Object> params) th
943948
throw new ConfigurationException("Unable to find the ovs-pvlan-kvm-vm.sh");
944949
}
945950

951+
hostHealthCheckScriptPath = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEALTH_CHECK_SCRIPT_PATH);
952+
if (StringUtils.isNotBlank(hostHealthCheckScriptPath) && !new File(hostHealthCheckScriptPath).exists()) {
953+
s_logger.info(String.format("Unable to find the host health check script at: %s, " +
954+
"discarding it", hostHealthCheckScriptPath));
955+
}
956+
946957
setupTungstenVrouterPath = Script.findScript(tungstenScriptsDir, "setup_tungsten_vrouter.sh");
947958
if (setupTungstenVrouterPath == null) {
948959
throw new ConfigurationException("Unable to find the setup_tungsten_vrouter.sh");
@@ -3436,13 +3447,54 @@ protected synchronized String attachOrDetachDevice(final Connect conn, final boo
34363447

34373448
@Override
34383449
public PingCommand getCurrentStatus(final long id) {
3439-
3450+
PingRoutingCommand pingRoutingCommand;
34403451
if (!_canBridgeFirewall) {
3441-
return new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport());
3452+
pingRoutingCommand = new PingRoutingCommand(com.cloud.host.Host.Type.Routing, id, this.getHostVmStateReport());
34423453
} else {
34433454
final HashMap<String, Pair<Long, Long>> nwGrpStates = syncNetworkGroups(id);
3444-
return new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates);
3455+
pingRoutingCommand = new PingRoutingWithNwGroupsCommand(getType(), id, this.getHostVmStateReport(), nwGrpStates);
34453456
}
3457+
HealthCheckResult healthCheckResult = getHostHealthCheckResult();
3458+
if (healthCheckResult != HealthCheckResult.IGNORE) {
3459+
pingRoutingCommand.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS);
3460+
}
3461+
return pingRoutingCommand;
3462+
}
3463+
3464+
/**
3465+
* The health check result is true, if the script is executed successfully and the exit code is 0
3466+
* The health check result is false, if the script is executed successfully and the exit code is 1
3467+
* The health check result is null, if
3468+
* - Script file is not specified, or
3469+
* - Script file does not exist, or
3470+
* - Script file is not accessible by the user of the cloudstack-agent process, or
3471+
* - Script file is not executable
3472+
* - There are errors when the script is executed (exit codes other than 0 or 1)
3473+
*/
3474+
private HealthCheckResult getHostHealthCheckResult() {
3475+
if (StringUtils.isBlank(hostHealthCheckScriptPath)) {
3476+
s_logger.debug("Host health check script path is not specified");
3477+
return HealthCheckResult.IGNORE;
3478+
}
3479+
File script = new File(hostHealthCheckScriptPath);
3480+
if (!script.exists() || !script.isFile() || !script.canExecute()) {
3481+
s_logger.warn(String.format("The host health check script file set at: %s cannot be executed, " +
3482+
"reason: %s", hostHealthCheckScriptPath,
3483+
!script.exists() ? "file does not exist" : "please check file permissions to execute this file"));
3484+
return HealthCheckResult.IGNORE;
3485+
}
3486+
int exitCode = executeBashScriptAndRetrieveExitValue(hostHealthCheckScriptPath);
3487+
if (s_logger.isDebugEnabled()) {
3488+
s_logger.debug(String.format("Host health check script exit code: %s", exitCode));
3489+
}
3490+
return retrieveHealthCheckResultFromExitCode(exitCode);
3491+
}
3492+
3493+
private HealthCheckResult retrieveHealthCheckResultFromExitCode(int exitCode) {
3494+
if (exitCode != 0 && exitCode != 1) {
3495+
return HealthCheckResult.IGNORE;
3496+
}
3497+
return exitCode == 0 ? HealthCheckResult.SUCCESS : HealthCheckResult.FAILURE;
34463498
}
34473499

34483500
@Override
@@ -3484,6 +3536,10 @@ public StartupCommand[] initialize() {
34843536
cmd.setGatewayIpAddress(_localGateway);
34853537
cmd.setIqn(getIqn());
34863538
cmd.getHostDetails().put(HOST_VOLUME_ENCRYPTION, String.valueOf(hostSupportsVolumeEncryption()));
3539+
HealthCheckResult healthCheckResult = getHostHealthCheckResult();
3540+
if (healthCheckResult != HealthCheckResult.IGNORE) {
3541+
cmd.setHostHealthCheckResult(healthCheckResult == HealthCheckResult.SUCCESS);
3542+
}
34873543

34883544
if (cmd.getHostDetails().containsKey("Host.OS")) {
34893545
_hostDistro = cmd.getHostDetails().get("Host.OS");

0 commit comments

Comments
 (0)