Skip to content

Commit d3de5d8

Browse files
VM start error handling improvements, and config to expose error to user
1 parent d6c3977 commit d3de5d8

File tree

2 files changed

+38
-8
lines changed

2 files changed

+38
-8
lines changed

engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package com.cloud.vm;
1919

20+
import static com.cloud.configuration.ConfigurationManagerImpl.EXPOSE_ERRORS_TO_USER;
2021
import static com.cloud.configuration.ConfigurationManagerImpl.MIGRATE_VM_ACROSS_CLUSTERS;
2122

2223
import java.lang.reflect.Field;
@@ -931,7 +932,14 @@ public void start(final String vmUuid, final Map<VirtualMachineProfile.Param, Ob
931932
public void start(final String vmUuid, final Map<VirtualMachineProfile.Param, Object> params, final DeploymentPlan planToDeploy, final DeploymentPlanner planner) {
932933
try {
933934
advanceStart(vmUuid, params, planToDeploy, planner);
934-
} catch (ConcurrentOperationException | InsufficientCapacityException e) {
935+
} catch (ConcurrentOperationException e) {
936+
throw new CloudRuntimeException(String.format("Unable to start a VM [%s] due to [%s].", vmUuid, e.getMessage()), e).add(VirtualMachine.class, vmUuid);
937+
} catch (final InsufficientCapacityException e) {
938+
final CallContext cctxt = CallContext.current();
939+
final Account account = cctxt.getCallingAccount();
940+
if (account.getType() == Account.Type.ADMIN) {
941+
throw new CloudRuntimeException("Unable to start a VM due to insufficient capacity: " + e.getMessage(), e).add(VirtualMachine.class, vmUuid);
942+
}
935943
throw new CloudRuntimeException(String.format("Unable to start a VM [%s] due to [%s].", vmUuid, e.getMessage()), e).add(VirtualMachine.class, vmUuid);
936944
} catch (final ResourceUnavailableException e) {
937945
if (e.getScope() != null && e.getScope().equals(VirtualRouter.class)){
@@ -1361,6 +1369,7 @@ public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfil
13611369

13621370
final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
13631371

1372+
Throwable lastKnownError = null;
13641373
boolean canRetry = true;
13651374
ExcludeList avoids = null;
13661375
try {
@@ -1384,7 +1393,8 @@ public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfil
13841393

13851394
int retry = StartRetry.value();
13861395
while (retry-- != 0) {
1387-
logger.debug("Instance start attempt #{}", (StartRetry.value() - retry));
1396+
int attemptNumber = StartRetry.value() - retry;
1397+
logger.debug("Instance start attempt #{}", attemptNumber);
13881398

13891399
if (reuseVolume) {
13901400
final List<VolumeVO> vols = _volsDao.findReadyRootVolumesByInstance(vm.getId());
@@ -1450,8 +1460,13 @@ public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfil
14501460
reuseVolume = false;
14511461
continue;
14521462
}
1453-
throw new InsufficientServerCapacityException("Unable to create a deployment for " + vmProfile, DataCenter.class, plan.getDataCenterId(),
1454-
areAffinityGroupsAssociated(vmProfile));
1463+
String message = String.format("Unable to create a deployment for %s after %s attempts", vmProfile, attemptNumber);
1464+
if ((account.getType() == Account.Type.ADMIN || Boolean.TRUE.equals(EXPOSE_ERRORS_TO_USER.value())) && lastKnownError != null) {
1465+
message += String.format(" Last known error: %s", lastKnownError.getMessage());
1466+
throw new CloudRuntimeException(message, lastKnownError);
1467+
} else {
1468+
throw new InsufficientServerCapacityException(message, DataCenter.class, plan.getDataCenterId(), areAffinityGroupsAssociated(vmProfile));
1469+
}
14551470
}
14561471

14571472
avoids.addHost(dest.getHost().getId());
@@ -1619,11 +1634,15 @@ public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfil
16191634
throw new ExecutionException("Unable to start VM:" + vm.getUuid() + " due to error in finalizeStart, not retrying");
16201635
}
16211636
}
1622-
logger.info("Unable to start VM on {} due to {}", dest.getHost(), (startAnswer == null ? " no start answer" : startAnswer.getDetails()));
1637+
String msg = String.format("Unable to start VM on %s due to %s", dest.getHost(), startAnswer == null ? "no start command answer" : startAnswer.getDetails());
1638+
lastKnownError = new ExecutionException(msg);
1639+
16231640
if (startAnswer != null && startAnswer.getContextParam("stopRetry") != null) {
1641+
logger.error(msg, lastKnownError);
16241642
break;
16251643
}
16261644

1645+
logger.debug(msg, lastKnownError);
16271646
} catch (OperationTimedoutException e) {
16281647
logger.debug("Unable to send the start command to host {} failed to start VM: {}", dest.getHost(), vm);
16291648
if (e.isActive()) {
@@ -1633,6 +1652,7 @@ public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfil
16331652
throw new AgentUnavailableException("Unable to start " + vm.getHostName(), destHostId, e);
16341653
} catch (final ResourceUnavailableException e) {
16351654
logger.warn("Unable to contact resource.", e);
1655+
lastKnownError = e;
16361656
if (!avoids.add(e)) {
16371657
if (e.getScope() == Volume.class || e.getScope() == Nic.class) {
16381658
throw e;
@@ -1689,7 +1709,15 @@ public void orchestrateStart(final String vmUuid, final Map<VirtualMachineProfil
16891709
}
16901710

16911711
if (startedVm == null) {
1692-
throw new CloudRuntimeException("Unable to start Instance '" + vm.getHostName() + "' (" + vm.getUuid() + "), see management server log for details");
1712+
String messageTmpl = "Unable to start Instance '%s' (%s)%s";
1713+
String details;
1714+
if ((account.getType() == Account.Type.ADMIN || Boolean.TRUE.equals(EXPOSE_ERRORS_TO_USER.value())) && lastKnownError != null) {
1715+
details = ": " + lastKnownError.getMessage();
1716+
} else {
1717+
details = ", see management server log for details";
1718+
}
1719+
String message = String.format(messageTmpl, vm.getHostName(), vm.getUuid(), details);
1720+
throw new CloudRuntimeException(message, lastKnownError);
16931721
}
16941722
}
16951723

server/src/main/java/com/cloud/configuration/ConfigurationManagerImpl.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,9 @@ public class ConfigurationManagerImpl extends ManagerBase implements Configurati
536536
public static final ConfigKey<Boolean> ALLOW_DOMAIN_ADMINS_TO_CREATE_TAGGED_OFFERINGS = new ConfigKey<>(Boolean.class, "allow.domain.admins.to.create.tagged.offerings", "Advanced",
537537
"false", "Allow domain admins to create offerings with tags.", true, ConfigKey.Scope.Account, null);
538538

539+
public static final ConfigKey<Boolean> EXPOSE_ERRORS_TO_USER = new ConfigKey<>(Boolean.class, "expose.errors.to.user", ConfigKey.CATEGORY_ADVANCED,
540+
"false", "If set to true, detailed error messages will be returned to all user roles. If false, detailed errors are only shown to admin users", true, ConfigKey.Scope.Global, null);
541+
539542
public static final ConfigKey<Long> DELETE_QUERY_BATCH_SIZE = new ConfigKey<>("Advanced", Long.class, "delete.query.batch.size", "0",
540543
"Indicates the limit applied while deleting entries in bulk. With this, the delete query will apply the limit as many times as necessary," +
541544
" to delete all the entries. This is advised when retaining several days of records, which can lead to slowness. <= 0 means that no limit will " +
@@ -8494,11 +8497,10 @@ public ConfigKey<?>[] getConfigKeys() {
84948497
BYTES_MAX_READ_LENGTH, BYTES_MAX_WRITE_LENGTH, ADD_HOST_ON_SERVICE_RESTART_KVM, SET_HOST_DOWN_TO_MAINTENANCE,
84958498
VM_SERVICE_OFFERING_MAX_CPU_CORES, VM_SERVICE_OFFERING_MAX_RAM_SIZE, MIGRATE_VM_ACROSS_CLUSTERS,
84968499
ENABLE_ACCOUNT_SETTINGS_FOR_DOMAIN, ENABLE_DOMAIN_SETTINGS_FOR_CHILD_DOMAIN,
8497-
ALLOW_DOMAIN_ADMINS_TO_CREATE_TAGGED_OFFERINGS, DELETE_QUERY_BATCH_SIZE, AllowNonRFC1918CompliantIPs, HostCapacityTypeCpuMemoryWeight
8500+
ALLOW_DOMAIN_ADMINS_TO_CREATE_TAGGED_OFFERINGS, EXPOSE_ERRORS_TO_USER, DELETE_QUERY_BATCH_SIZE, AllowNonRFC1918CompliantIPs, HostCapacityTypeCpuMemoryWeight
84988501
};
84998502
}
85008503

8501-
85028504
/**
85038505
* Returns a string representing the specified configuration's type.
85048506
* @param configName name of the configuration.

0 commit comments

Comments
 (0)