Skip to content

Commit 3694667

Browse files
Marcus SorensenMarcus Sorensen
andauthored
Trigger out of band VM state update via libvirt event when VM stops (#7963)
* Trigger out of band VM state update via libvirt event when VM stops * Add License headers, refactor nested try --------- Co-authored-by: Marcus Sorensen <mls@apple.com>
1 parent 348a63d commit 3694667

8 files changed

Lines changed: 165 additions & 10 deletions

File tree

agent/src/main/java/com/cloud/agent/Agent.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040

4141
import javax.naming.ConfigurationException;
4242

43+
import com.cloud.resource.AgentStatusUpdater;
44+
import com.cloud.resource.ResourceStatusUpdater;
4345
import com.cloud.utils.NumbersUtil;
4446
import org.apache.cloudstack.agent.lb.SetupMSListAnswer;
4547
import org.apache.cloudstack.agent.lb.SetupMSListCommand;
@@ -100,7 +102,7 @@
100102
* For more configuration options, see the individual types.
101103
*
102104
**/
103-
public class Agent implements HandlerFactory, IAgentControl {
105+
public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater {
104106
protected static Logger s_logger = Logger.getLogger(Agent.class);
105107

106108
public enum ExitStatus {
@@ -409,6 +411,20 @@ public void scheduleWatch(final Link link, final Request request, final long del
409411
}
410412
}
411413

414+
public void triggerUpdate() {
415+
PingCommand command = _resource.getCurrentStatus(getId());
416+
command.setOutOfBand(true);
417+
s_logger.debug("Sending out of band ping");
418+
419+
final Request request = new Request(_id, -1, command, false);
420+
request.setSequence(getNextSequence());
421+
try {
422+
_link.send(request.toBytes());
423+
} catch (final ClosedChannelException e) {
424+
s_logger.warn("Unable to send ping update: " + request.toString());
425+
}
426+
}
427+
412428
protected void cancelTasks() {
413429
synchronized (_watchList) {
414430
for (final WatchTask task : _watchList) {
@@ -461,6 +477,10 @@ public void sendStartup(final Link link) {
461477
} catch (final ClosedChannelException e) {
462478
s_logger.warn("Unable to send request: " + request.toString());
463479
}
480+
481+
if (_resource instanceof ResourceStatusUpdater) {
482+
((ResourceStatusUpdater) _resource).registerStatusUpdater(this);
483+
}
464484
}
465485
}
466486

core/src/main/java/com/cloud/agent/api/PingCommand.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
public class PingCommand extends Command {
2525
Host.Type hostType;
2626
long hostId;
27+
boolean outOfBand;
2728

2829
protected PingCommand() {
2930
}
@@ -33,6 +34,12 @@ public PingCommand(Host.Type type, long id) {
3334
hostId = id;
3435
}
3536

37+
public PingCommand(Host.Type type, long id, boolean oob) {
38+
hostType = type;
39+
hostId = id;
40+
outOfBand = oob;
41+
}
42+
3643
public Host.Type getHostType() {
3744
return hostType;
3845
}
@@ -41,6 +48,10 @@ public long getHostId() {
4148
return hostId;
4249
}
4350

51+
public boolean getOutOfBand() { return outOfBand; }
52+
53+
public void setOutOfBand(boolean oob) { this.outOfBand = oob; }
54+
4455
@Override
4556
public boolean executeInSequence() {
4657
return false;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
package com.cloud.resource;
18+
19+
/**
20+
* AgentStatusUpdater is an agent with triggerable update functionality
21+
*/
22+
public interface AgentStatusUpdater {
23+
/**
24+
* Trigger the sending of an update (Ping).
25+
*/
26+
void triggerUpdate();
27+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
package com.cloud.resource;
18+
19+
/**
20+
* ResourceStatusUpdater is a resource that can trigger out of band status updates
21+
*/
22+
public interface ResourceStatusUpdater {
23+
/**
24+
* Register an AgentStatusUpdater to use for triggering out of band updates.
25+
*
26+
* @param updater The object to call triggerUpdate() on
27+
*/
28+
void registerStatusUpdater(AgentStatusUpdater updater);
29+
}

engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3727,7 +3727,7 @@ public boolean processCommands(final long agentId, final long seq, final Command
37273727
if (cmd instanceof PingRoutingCommand) {
37283728
final PingRoutingCommand ping = (PingRoutingCommand)cmd;
37293729
if (ping.getHostVmStateReport() != null) {
3730-
_syncMgr.processHostVmStatePingReport(agentId, ping.getHostVmStateReport());
3730+
_syncMgr.processHostVmStatePingReport(agentId, ping.getHostVmStateReport(), ping.getOutOfBand());
37313731
}
37323732

37333733
scanStalledVMInTransitionStateOnUpHost(agentId);

engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSync.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ public interface VirtualMachinePowerStateSync {
2727
void processHostVmStateReport(long hostId, Map<String, HostVmStateReportEntry> report);
2828

2929
// to adapt legacy ping report
30-
void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report);
30+
void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report, boolean force);
3131

3232
Map<Long, VirtualMachine.PowerState> convertVmStateReport(Map<String, HostVmStateReportEntry> states);
3333
}

engine/orchestration/src/main/java/com/cloud/vm/VirtualMachinePowerStateSyncImpl.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,19 +55,19 @@ public void processHostVmStateReport(long hostId, Map<String, HostVmStateReportE
5555
s_logger.debug("Process host VM state report. host: " + hostId);
5656

5757
Map<Long, VirtualMachine.PowerState> translatedInfo = convertVmStateReport(report);
58-
processReport(hostId, translatedInfo);
58+
processReport(hostId, translatedInfo, false);
5959
}
6060

6161
@Override
62-
public void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report) {
62+
public void processHostVmStatePingReport(long hostId, Map<String, HostVmStateReportEntry> report, boolean force) {
6363
if (s_logger.isDebugEnabled())
6464
s_logger.debug("Process host VM state report from ping process. host: " + hostId);
6565

6666
Map<Long, VirtualMachine.PowerState> translatedInfo = convertVmStateReport(report);
67-
processReport(hostId, translatedInfo);
67+
processReport(hostId, translatedInfo, force);
6868
}
6969

70-
private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> translatedInfo) {
70+
private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> translatedInfo, boolean force) {
7171

7272
if (s_logger.isDebugEnabled()) {
7373
s_logger.debug("Process VM state report. host: " + hostId + ", number of records in report: " + translatedInfo.size());
@@ -117,7 +117,7 @@ private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> tra
117117

118118
// Make sure powerState is up to date for missing VMs
119119
try {
120-
if (!_instanceDao.isPowerStateUpToDate(instance.getId())) {
120+
if (!force && !_instanceDao.isPowerStateUpToDate(instance.getId())) {
121121
s_logger.warn("Detected missing VM but power state is outdated, wait for another process report run for VM id: " + instance.getId());
122122
_instanceDao.resetVmPowerStateTracking(instance.getId());
123123
continue;
@@ -150,7 +150,7 @@ private void processReport(long hostId, Map<Long, VirtualMachine.PowerState> tra
150150

151151
long milliSecondsSinceLastStateUpdate = currentTime.getTime() - vmStateUpdateTime.getTime();
152152

153-
if (milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) {
153+
if (force || milliSecondsSinceLastStateUpdate > milliSecondsGracefullPeriod) {
154154
s_logger.debug("vm id: " + instance.getId() + " - time since last state update(" + milliSecondsSinceLastStateUpdate + "ms) has passed graceful period");
155155

156156
// this is were a race condition might have happened if we don't re-fetch the instance;

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java

Lines changed: 69 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,20 +83,25 @@
8383
import org.libvirt.DomainInfo.DomainState;
8484
import org.libvirt.DomainInterfaceStats;
8585
import org.libvirt.DomainSnapshot;
86+
import org.libvirt.Library;
8687
import org.libvirt.LibvirtException;
8788
import org.libvirt.MemoryStatistic;
8889
import org.libvirt.Network;
8990
import org.libvirt.SchedParameter;
9091
import org.libvirt.SchedUlongParameter;
9192
import org.libvirt.Secret;
9293
import org.libvirt.VcpuInfo;
94+
import org.libvirt.event.DomainEvent;
95+
import org.libvirt.event.DomainEventDetail;
96+
import org.libvirt.event.StoppedDetail;
9397
import org.w3c.dom.Document;
9498
import org.w3c.dom.Element;
9599
import org.w3c.dom.Node;
96100
import org.w3c.dom.NodeList;
97101
import org.xml.sax.InputSource;
98102
import org.xml.sax.SAXException;
99103

104+
100105
import com.cloud.agent.api.Answer;
101106
import com.cloud.agent.api.Command;
102107
import com.cloud.agent.api.HostVmStateReportEntry;
@@ -175,6 +180,8 @@
175180
import com.cloud.network.Networks.IsolationType;
176181
import com.cloud.network.Networks.RouterPrivateIpStrategy;
177182
import com.cloud.network.Networks.TrafficType;
183+
import com.cloud.resource.AgentStatusUpdater;
184+
import com.cloud.resource.ResourceStatusUpdater;
178185
import com.cloud.resource.RequestWrapper;
179186
import com.cloud.resource.ServerResource;
180187
import com.cloud.resource.ServerResourceBase;
@@ -224,11 +231,12 @@
224231
* private mac addresses for domrs | mac address | start + 126 || ||
225232
* pool | the parent of the storage pool hierarchy * }
226233
**/
227-
public class LibvirtComputingResource extends ServerResourceBase implements ServerResource, VirtualRouterDeployer {
234+
public class LibvirtComputingResource extends ServerResourceBase implements ServerResource, VirtualRouterDeployer, ResourceStatusUpdater {
228235
protected static Logger s_logger = Logger.getLogger(LibvirtComputingResource.class);
229236

230237
private static final String CONFIG_VALUES_SEPARATOR = ",";
231238

239+
232240
private static final String LEGACY = "legacy";
233241
private static final String SECURE = "secure";
234242

@@ -457,6 +465,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
457465
protected CPUStat cpuStat = new CPUStat();
458466
protected MemStat memStat = new MemStat(dom0MinMem, dom0OvercommitMem);
459467
private final LibvirtUtilitiesHelper libvirtUtilitiesHelper = new LibvirtUtilitiesHelper();
468+
private AgentStatusUpdater _agentStatusUpdater;
460469

461470
protected Boolean enableManuallySettingCpuTopologyOnKvmVm = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.ENABLE_MANUALLY_SETTING_CPU_TOPOLOGY_ON_KVM_VM);
462471

@@ -481,6 +490,11 @@ protected long getHypervisorQemuVersion() {
481490
return hypervisorQemuVersion;
482491
}
483492

493+
@Override
494+
public void registerStatusUpdater(AgentStatusUpdater updater) {
495+
_agentStatusUpdater = updater;
496+
}
497+
484498
@Override
485499
public ExecutionResult executeInVR(final String routerIp, final String script, final String args) {
486500
return executeInVR(routerIp, script, args, timeout);
@@ -3590,9 +3604,63 @@ private StartupStorageCommand createLocalStoragePool(String localStoragePath, St
35903604
} catch (final CloudRuntimeException e) {
35913605
s_logger.debug("Unable to initialize local storage pool: " + e);
35923606
}
3607+
setupLibvirtEventListener();
35933608
return sscmd;
35943609
}
35953610

3611+
private void setupLibvirtEventListener() {
3612+
final Thread libvirtListenerThread = new Thread(() -> {
3613+
try {
3614+
Library.runEventLoop();
3615+
} catch (LibvirtException e) {
3616+
s_logger.error("LibvirtException was thrown in event loop: ", e);
3617+
} catch (InterruptedException e) {
3618+
s_logger.error("Libvirt event loop was interrupted: ", e);
3619+
}
3620+
});
3621+
3622+
try {
3623+
libvirtListenerThread.setDaemon(true);
3624+
libvirtListenerThread.start();
3625+
3626+
Connect conn = LibvirtConnection.getConnection();
3627+
conn.addLifecycleListener(this::onDomainLifecycleChange);
3628+
3629+
s_logger.debug("Set up the libvirt domain event lifecycle listener");
3630+
} catch (LibvirtException e) {
3631+
s_logger.error("Failed to get libvirt connection for domain event lifecycle", e);
3632+
}
3633+
}
3634+
3635+
private int onDomainLifecycleChange(Domain domain, DomainEvent domainEvent) {
3636+
try {
3637+
s_logger.debug(String.format("Got event lifecycle change on Domain %s, event %s", domain.getName(), domainEvent));
3638+
if (domainEvent != null) {
3639+
switch (domainEvent.getType()) {
3640+
case STOPPED:
3641+
/* libvirt-destroyed VMs have detail StoppedDetail.DESTROYED, self shutdown guests are StoppedDetail.SHUTDOWN
3642+
* Checking for this helps us differentiate between events where cloudstack or admin stopped the VM vs guest
3643+
* initiated, and avoid pushing extra updates for actions we are initiating without a need for extra tracking */
3644+
DomainEventDetail detail = domainEvent.getDetail();
3645+
if (StoppedDetail.SHUTDOWN.equals(detail) || StoppedDetail.CRASHED.equals(detail)) {
3646+
s_logger.info("Triggering out of band status update due to completed self-shutdown or crash of VM");
3647+
_agentStatusUpdater.triggerUpdate();
3648+
} else {
3649+
s_logger.debug("Event detail: " + detail);
3650+
}
3651+
break;
3652+
default:
3653+
s_logger.debug(String.format("No handling for event %s", domainEvent));
3654+
}
3655+
}
3656+
} catch (LibvirtException e) {
3657+
s_logger.error("Libvirt exception while processing lifecycle event", e);
3658+
} catch (Throwable e) {
3659+
s_logger.error("Error during lifecycle", e);
3660+
}
3661+
return 0;
3662+
}
3663+
35963664
public String diskUuidToSerial(String uuid) {
35973665
String uuidWithoutHyphen = uuid.replace("-","");
35983666
return uuidWithoutHyphen.substring(0, Math.min(uuidWithoutHyphen.length(), 20));

0 commit comments

Comments
 (0)