Skip to content

Commit ca511f1

Browse files
sureshanapartiLocharla, Sandeep
authored andcommitted
Mark VMs in error state when expunge fails during destroy operation (apache#12749)
* Mark VMs in error state when expunge fails during destroy operation * fetch volume by external id (used by external plugins) * review comments * Update reorder hosts log to DEBUG, log line is too verbose to have on as INFO
1 parent d04e499 commit ca511f1

File tree

5 files changed

+54
-4
lines changed

5 files changed

+54
-4
lines changed

api/src/main/java/com/cloud/vm/VirtualMachine.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,9 @@ public static StateMachine2<State, VirtualMachine.Event, VirtualMachine> getStat
124124
s_fsm.addTransition(new Transition<State, Event>(State.Stopping, VirtualMachine.Event.StopRequested, State.Stopping, null));
125125
s_fsm.addTransition(new Transition<State, Event>(State.Stopping, VirtualMachine.Event.AgentReportShutdowned, State.Stopped, Arrays.asList(new Impact[]{Impact.USAGE})));
126126
s_fsm.addTransition(new Transition<State, Event>(State.Expunging, VirtualMachine.Event.OperationFailed, State.Expunging,null));
127+
// Note: In addition to the Stopped -> Error transition for failed VM creation,
128+
// a VM can also transition from Expunging to Error on OperationFailedToError.
129+
s_fsm.addTransition(new Transition<State, Event>(State.Expunging, VirtualMachine.Event.OperationFailedToError, State.Error, null));
127130
s_fsm.addTransition(new Transition<State, Event>(State.Expunging, VirtualMachine.Event.ExpungeOperation, State.Expunging,null));
128131
s_fsm.addTransition(new Transition<State, Event>(State.Error, VirtualMachine.Event.DestroyRequested, State.Expunging, null));
129132
s_fsm.addTransition(new Transition<State, Event>(State.Error, VirtualMachine.Event.ExpungeOperation, State.Expunging, null));

engine/schema/src/main/java/com/cloud/storage/dao/VolumeDao.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,4 +166,12 @@ public interface VolumeDao extends GenericDao<VolumeVO, Long>, StateDao<Volume.S
166166
int getVolumeCountByOfferingId(long diskOfferingId);
167167

168168
VolumeVO findByLastIdAndState(long lastVolumeId, Volume.State...states);
169+
170+
/**
171+
* Retrieves volume by its externalId
172+
*
173+
* @param externalUuid
174+
* @return Volume Object of matching search criteria
175+
*/
176+
VolumeVO findByExternalUuid(String externalUuid);
169177
}

engine/schema/src/main/java/com/cloud/storage/dao/VolumeDaoImpl.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ public class VolumeDaoImpl extends GenericDaoBase<VolumeVO, Long> implements Vol
7474
private final SearchBuilder<VolumeVO> storeAndInstallPathSearch;
7575
private final SearchBuilder<VolumeVO> volumeIdSearch;
7676
protected GenericSearchBuilder<VolumeVO, Long> CountByAccount;
77+
protected final SearchBuilder<VolumeVO> ExternalUuidSearch;
7778
protected GenericSearchBuilder<VolumeVO, SumCount> primaryStorageSearch;
7879
protected GenericSearchBuilder<VolumeVO, SumCount> primaryStorageSearch2;
7980
protected GenericSearchBuilder<VolumeVO, SumCount> secondaryStorageSearch;
@@ -459,6 +460,10 @@ public VolumeDaoImpl() {
459460
CountByAccount.and("idNIN", CountByAccount.entity().getId(), Op.NIN);
460461
CountByAccount.done();
461462

463+
ExternalUuidSearch = createSearchBuilder();
464+
ExternalUuidSearch.and("externalUuid", ExternalUuidSearch.entity().getExternalUuid(), Op.EQ);
465+
ExternalUuidSearch.done();
466+
462467
primaryStorageSearch = createSearchBuilder(SumCount.class);
463468
primaryStorageSearch.select("sum", Func.SUM, primaryStorageSearch.entity().getSize());
464469
primaryStorageSearch.and("accountId", primaryStorageSearch.entity().getAccountId(), Op.EQ);
@@ -934,4 +939,11 @@ public VolumeVO findByLastIdAndState(long lastVolumeId, State ...states) {
934939
sc.and(sc.entity().getState(), SearchCriteria.Op.IN, (Object[]) states);
935940
return sc.find();
936941
}
942+
943+
@Override
944+
public VolumeVO findByExternalUuid(String externalUuid) {
945+
SearchCriteria<VolumeVO> sc = ExternalUuidSearch.create();
946+
sc.setParameters("externalUuid", externalUuid);
947+
return findOneBy(sc);
948+
}
937949
}

server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,7 +1714,7 @@ protected List<Host> findSuitableHosts(VirtualMachineProfile vmProfile, Deployme
17141714

17151715
@Override
17161716
public void reorderHostsByPriority(Map<Long, Integer> priorities, List<Host> hosts) {
1717-
logger.info("Re-ordering hosts {} by priorities {}", hosts, priorities);
1717+
logger.debug("Re-ordering hosts {} by priorities {}", hosts, priorities);
17181718

17191719
hosts.removeIf(host -> DataCenterDeployment.PROHIBITED_HOST_PRIORITY.equals(getHostPriority(priorities, host.getId())));
17201720

@@ -1727,7 +1727,7 @@ public int compare(Host host1, Host host2) {
17271727
}
17281728
);
17291729

1730-
logger.info("Hosts after re-ordering are: {}", hosts);
1730+
logger.debug("Hosts after re-ordering are: {}", hosts);
17311731
}
17321732

17331733
private Integer getHostPriority(Map<Long, Integer> priorities, Long hostId) {

server/src/main/java/com/cloud/vm/UserVmManagerImpl.java

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2578,6 +2578,22 @@ public boolean expunge(UserVmVO vm) {
25782578
}
25792579
}
25802580

2581+
private void transitionExpungingToError(long vmId) {
2582+
UserVmVO vm = _vmDao.findById(vmId);
2583+
if (vm != null && vm.getState() == State.Expunging) {
2584+
try {
2585+
boolean transitioned = _itMgr.stateTransitTo(vm, VirtualMachine.Event.OperationFailedToError, null);
2586+
if (transitioned) {
2587+
logger.info("Transitioned VM [{}] from Expunging to Error after failed expunge", vm.getUuid());
2588+
} else {
2589+
logger.warn("Failed to persist transition of VM [{}] from Expunging to Error after failed expunge, possibly due to concurrent update", vm.getUuid());
2590+
}
2591+
} catch (NoTransitionException e) {
2592+
logger.warn("Failed to transition VM {} to Error state: {}", vm, e.getMessage());
2593+
}
2594+
}
2595+
}
2596+
25812597
/**
25822598
* Release network resources, it was done on vm stop previously.
25832599
* @param id vm id
@@ -3555,8 +3571,19 @@ public UserVm destroyVm(DestroyVMCmd cmd) throws ResourceUnavailableException, C
35553571
detachVolumesFromVm(vm, dataVols);
35563572

35573573
UserVm destroyedVm = destroyVm(vmId, expunge);
3558-
if (expunge && !expunge(vm)) {
3559-
throw new CloudRuntimeException("Failed to expunge vm " + destroyedVm);
3574+
if (expunge) {
3575+
boolean expunged = false;
3576+
String errorMsg = "";
3577+
try {
3578+
expunged = expunge(vm);
3579+
} catch (RuntimeException e) {
3580+
logger.error("Failed to expunge VM [{}] due to: {}", vm, e.getMessage(), e);
3581+
errorMsg = e.getMessage();
3582+
}
3583+
if (!expunged) {
3584+
transitionExpungingToError(vm.getId());
3585+
throw new CloudRuntimeException("Failed to expunge VM " + vm.getUuid() + (StringUtils.isNotBlank(errorMsg) ? " due to: " + errorMsg : ""));
3586+
}
35603587
}
35613588

35623589
autoScaleManager.removeVmFromVmGroup(vmId);

0 commit comments

Comments
 (0)