Skip to content

Commit 799578e

Browse files
nvazquezdhslove
authored andcommitted
Improve logs for VM migrations (apache#12332)
1 parent dd50ca5 commit 799578e

4 files changed

Lines changed: 35 additions & 9 deletions

File tree

engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3065,7 +3065,7 @@ private void orchestrateMigrate(final String vmUuid, final long srcHostId, final
30653065
}
30663066

30673067
protected void migrate(final VMInstanceVO vm, final long srcHostId, final DeployDestination dest) throws ResourceUnavailableException, ConcurrentOperationException {
3068-
logger.info("Migrating {} to {}", vm, dest);
3068+
logger.info("Start preparing migration of the VM: {} to {}", vm, dest);
30693069
final long dstHostId = dest.getHost().getId();
30703070
final Host fromHost = _hostDao.findById(srcHostId);
30713071
String provider = "";
@@ -3131,9 +3131,11 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
31313131
if (pfma == null || !pfma.getResult()) {
31323132
final String details = pfma != null ? pfma.getDetails() : "null answer returned";
31333133
final String msg = "Unable to prepare for migration due to " + details;
3134+
logger.error("Failed to prepare destination host {} for migration of VM {} : {}", dstHostId, vm.getInstanceName(), details);
31343135
pfma = null;
31353136
throw new AgentUnavailableException(msg, dstHostId);
31363137
}
3138+
logger.debug("Successfully prepared destination host {} for migration of VM {} ", dstHostId, vm.getInstanceName());
31373139
} catch (final OperationTimedoutException e1) {
31383140
throw new AgentUnavailableException("Operation timed out", dstHostId);
31393141
} finally {
@@ -3154,18 +3156,23 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
31543156
volumeMgr.release(vm.getId(), dstHostId);
31553157
}
31563158

3157-
logger.info("Migration cancelled because state has changed: {}", vm);
3158-
throw new ConcurrentOperationException("Migration cancelled because state has changed: " + vm);
3159+
String msg = "Migration cancelled because state has changed: " + vm;
3160+
logger.warn(msg);
3161+
throw new ConcurrentOperationException(msg);
31593162
}
31603163
} catch (final NoTransitionException e1) {
31613164
_networkMgr.rollbackNicForMigration(vmSrc, profile);
31623165
volumeMgr.release(vm.getId(), dstHostId);
3163-
logger.info("Migration cancelled because {}", e1.getMessage());
3166+
String msg = String.format("Migration cancelled for VM %s due to state transition failure: %s",
3167+
vm.getInstanceName(), e1.getMessage());
3168+
logger.warn(msg, e1);
31643169
throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage());
31653170
} catch (final CloudRuntimeException e2) {
31663171
_networkMgr.rollbackNicForMigration(vmSrc, profile);
31673172
volumeMgr.release(vm.getId(), dstHostId);
3168-
logger.info("Migration cancelled because {}", e2.getMessage());
3173+
String msg = String.format("Migration cancelled for VM %s due to runtime exception: %s",
3174+
vm.getInstanceName(), e2.getMessage());
3175+
logger.error(msg, e2);
31693176
work.setStep(Step.Done);
31703177
_workDao.update(work.getId(), work);
31713178
try {
@@ -3185,8 +3192,12 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
31853192
final Answer ma = _agentMgr.send(vm.getLastHostId(), mc);
31863193
if (ma == null || !ma.getResult()) {
31873194
final String details = ma != null ? ma.getDetails() : "null answer returned";
3195+
String msg = String.format("Migration command failed for VM %s on source host id=%s to destination host %s: %s",
3196+
vm.getInstanceName(), vm.getLastHostId(), dstHostId, details);
3197+
logger.error(msg);
31883198
throw new CloudRuntimeException(details);
31893199
}
3200+
logger.info("Migration command successful for VM {}", vm.getInstanceName());
31903201
} catch (final OperationTimedoutException e) {
31913202
boolean success = false;
31923203
if (HypervisorType.KVM.equals(vm.getHypervisorType())) {
@@ -3223,7 +3234,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
32233234

32243235
try {
32253236
if (!checkVmOnHost(vm, dstHostId)) {
3226-
logger.error("Unable to complete migration for {}", vm);
3237+
logger.error("Migration verification failed for VM {} : VM not found on destination host {} ", vm.getInstanceName(), dstHostId);
32273238
try {
32283239
_agentMgr.send(srcHostId, new Commands(cleanup(vm, dpdkInterfaceMapping)), null);
32293240
} catch (final AgentUnavailableException e) {
@@ -3238,7 +3249,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
32383249
migrated = true;
32393250
} finally {
32403251
if (!migrated) {
3241-
logger.info("Migration was unsuccessful. Cleaning up: {}", vm);
3252+
logger.info("Migration was unsuccessful. Cleaning up: {}", vm);
32423253
_networkMgr.rollbackNicForMigration(vmSrc, profile);
32433254
volumeMgr.release(vm.getId(), dstHostId);
32443255
// deallocate GPU devices for the VM on the destination host
@@ -3250,7 +3261,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
32503261
try {
32513262
_agentMgr.send(dstHostId, new Commands(cleanup(vm, dpdkInterfaceMapping)), null);
32523263
} catch (final AgentUnavailableException ae) {
3253-
logger.warn("Looks like the destination Host is unavailable for cleanup", ae);
3264+
logger.warn("Destination host {} unavailable for cleanup after failed migration of VM {}", dstHostId, vm.getInstanceName(), ae);
32543265
}
32553266
_networkMgr.setHypervisorHostname(profile, dest, false);
32563267
try {
@@ -3259,6 +3270,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy
32593270
logger.warn(e.getMessage());
32603271
}
32613272
} else {
3273+
logger.info("Migration completed successfully for VM %s" + vm);
32623274
_networkMgr.commitNicForMigration(vmSrc, profile);
32633275
volumeMgr.release(vm.getId(), srcHostId);
32643276
// deallocate GPU devices for the VM on the src host after migration is complete
@@ -3294,6 +3306,7 @@ protected MigrateCommand buildMigrateCommand(VMInstanceVO vmInstance, VirtualMac
32943306
migrateCommand.setVlanToPersistenceMap(vlanToPersistenceMap);
32953307
}
32963308

3309+
logger.debug("Setting auto convergence to: {}", StorageManager.KvmAutoConvergence.value());
32973310
migrateCommand.setAutoConvergence(StorageManager.KvmAutoConvergence.value());
32983311
migrateCommand.setHostGuid(destination.getHost().getGuid());
32993312

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,17 +278,20 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0.
278278

279279
// abort the vm migration if the job is executed more than vm.migrate.wait
280280
final int migrateWait = libvirtComputingResource.getMigrateWait();
281+
logger.info("vm.migrate.wait value set to: {}for VM: {}", migrateWait, vmName);
281282
if (migrateWait > 0 && sleeptime > migrateWait * 1000) {
282283
DomainState state = null;
283284
try {
284285
state = dm.getInfo().state;
286+
logger.info("VM domain state when trying to abort migration : {}", state);
285287
} catch (final LibvirtException e) {
286288
logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage());
287289
}
288290
if (state != null && state == DomainState.VIR_DOMAIN_RUNNING) {
289291
try {
290292
DomainJobInfo job = dm.getJobInfo();
291-
logger.info(String.format("Aborting migration of VM [%s] with domain job [%s] due to time out after %d seconds.", vmName, job, migrateWait));
293+
logger.warn("Aborting migration of VM {} with domain job [{}] due to timeout after {} seconds. " +
294+
"Job stats: data processed={} bytes, data remaining={} bytes", vmName, job, migrateWait, job.getDataProcessed(), job.getDataRemaining());
292295
dm.abortJob();
293296
result = String.format("Migration of VM [%s] was cancelled by CloudStack due to time out after %d seconds.", vmName, migrateWait);
294297
commandState = Command.State.FAILED;
@@ -303,10 +306,12 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0.
303306

304307
// pause vm if we meet the vm.migrate.pauseafter threshold and not already paused
305308
final int migratePauseAfter = libvirtComputingResource.getMigratePauseAfter();
309+
logger.info("vm.migrate.pauseafter value set to: {} for VM: {}", migratePauseAfter, vmName);
306310
if (migratePauseAfter > 0 && sleeptime > migratePauseAfter) {
307311
DomainState state = null;
308312
try {
309313
state = dm.getInfo().state;
314+
logger.info("VM domain state when trying to pause VM for migration: {}", state);
310315
} catch (final LibvirtException e) {
311316
logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage());
312317
}
@@ -381,6 +386,7 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0.
381386
}
382387

383388
if (result == null) {
389+
logger.info("Post-migration cleanup for VM {}: ", vmName);
384390
libvirtComputingResource.destroyNetworkRulesForVM(conn, vmName);
385391
for (final InterfaceDef iface : ifaces) {
386392
String vlanId = libvirtComputingResource.getVlanIdFromBridgeName(iface.getBrName());
@@ -394,6 +400,7 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0.
394400
commandState = Command.State.COMPLETED;
395401
libvirtComputingResource.createOrUpdateLogFileForCommand(command, commandState);
396402
} else if (commandState == null) {
403+
logger.error("Migration of VM {} failed with result: {}", vmName, result);
397404
commandState = Command.State.FAILED;
398405
libvirtComputingResource.createOrUpdateLogFileForCommand(command, commandState);
399406
}

plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtPrepareForMigrationCommandWrapper.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public Answer execute(final PrepareForMigrationCommand command, final LibvirtCom
5858
final VirtualMachineTO vm = command.getVirtualMachine();
5959

6060
if (command.isRollback()) {
61+
logger.info("Handling rollback for PrepareForMigration of VM {}", vm.getName());
6162
return handleRollback(command, libvirtComputingResource);
6263
}
6364

@@ -85,6 +86,7 @@ public Answer execute(final PrepareForMigrationCommand command, final LibvirtCom
8586
if (interfaceDef != null && interfaceDef.getNetType() == GuestNetType.VHOSTUSER) {
8687
DpdkTO to = new DpdkTO(interfaceDef.getDpdkOvsPath(), interfaceDef.getDpdkSourcePort(), interfaceDef.getInterfaceMode());
8788
dpdkInterfaceMapping.put(nic.getMac(), to);
89+
logger.debug("Configured DPDK interface for VM {}", vm.getName());
8890
}
8991
}
9092

@@ -138,6 +140,7 @@ public Answer execute(final PrepareForMigrationCommand command, final LibvirtCom
138140
return new PrepareForMigrationAnswer(command, "failed to connect physical disks to host");
139141
}
140142

143+
logger.info("Successfully prepared destination host for migration of VM {}", vm.getName());
141144
return createPrepareForMigrationAnswer(command, dpdkInterfaceMapping, libvirtComputingResource, vm);
142145
} catch (final LibvirtException | CloudRuntimeException | InternalErrorException | URISyntaxException e) {
143146
if (MapUtils.isNotEmpty(dpdkInterfaceMapping)) {
@@ -173,6 +176,7 @@ private Answer handleRollback(PrepareForMigrationCommand command, LibvirtComputi
173176
KVMStoragePoolManager storagePoolMgr = libvirtComputingResource.getStoragePoolMgr();
174177
VirtualMachineTO vmTO = command.getVirtualMachine();
175178

179+
logger.info("Rolling back PrepareForMigration for VM {}: disconnecting physical disks", vmTO.getName());
176180
if (!storagePoolMgr.disconnectPhysicalDisksViaVmSpec(vmTO)) {
177181
return new PrepareForMigrationAnswer(command, "failed to disconnect physical disks from host");
178182
}

server/src/main/java/com/cloud/vm/UserVmManagerImpl.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7427,6 +7427,7 @@ public VirtualMachine migrateVirtualMachine(Long vmId, Host destinationHost) thr
74277427
throw new CloudRuntimeException("Unable to find suitable destination to migrate VM " + vm.getInstanceName());
74287428
}
74297429

7430+
logger.info("Starting migration of VM {} from host {} to host {} ", vm.getInstanceName(), srcHostId, dest.getHost().getId());
74307431
collectVmDiskAndNetworkStatistics(vmId, State.Running);
74317432
_itMgr.migrate(vm.getUuid(), srcHostId, dest);
74327433
return findMigratedVm(vm.getId(), vm.getType());
@@ -7498,6 +7499,7 @@ protected void validateStrictHostTagCheck(VMInstanceVO vm, HostVO host) {
74987499

74997500
private DeployDestination checkVmMigrationDestination(VMInstanceVO vm, Host srcHost, Host destinationHost) throws VirtualMachineMigrationException {
75007501
if (destinationHost == null) {
7502+
logger.error("Destination host is null for migration of VM: {}", vm.getInstanceName());
75017503
return null;
75027504
}
75037505
if (destinationHost.getId() == srcHost.getId()) {

0 commit comments

Comments
 (0)