From 28791e2bace9fc37ce296ff9d23a001e08d19d73 Mon Sep 17 00:00:00 2001 From: nvazquez Date: Wed, 24 Dec 2025 00:26:58 -0300 Subject: [PATCH] Improve logs for VM migrations --- .../cloud/vm/VirtualMachineManagerImpl.java | 29 ++++++++++++++----- .../wrapper/LibvirtMigrateCommandWrapper.java | 9 +++++- ...virtPrepareForMigrationCommandWrapper.java | 6 ++++ .../java/com/cloud/vm/UserVmManagerImpl.java | 4 +++ 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index f9238fa0e717..86f456306110 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -3053,7 +3053,7 @@ private void orchestrateMigrate(final String vmUuid, final long srcHostId, final } protected void migrate(final VMInstanceVO vm, final long srcHostId, final DeployDestination dest) throws ResourceUnavailableException, ConcurrentOperationException { - logger.info("Migrating {} to {}", vm, dest); + logger.info("Start preparing migration of the VM: {} to {}", vm, dest); final long dstHostId = dest.getHost().getId(); final Host fromHost = _hostDao.findById(srcHostId); if (fromHost == null) { @@ -3118,9 +3118,11 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy if (pfma == null || !pfma.getResult()) { final String details = pfma != null ? pfma.getDetails() : "null answer returned"; final String msg = "Unable to prepare for migration due to " + details; + logger.error("Failed to prepare destination host {} for migration of VM {} : {}", dstHostId, vm.getInstanceName(), details); pfma = null; throw new AgentUnavailableException(msg, dstHostId); } + logger.debug("Successfully prepared destination host {} for migration of VM {} ", dstHostId, vm.getInstanceName()); } catch (final OperationTimedoutException e1) { throw new AgentUnavailableException("Operation timed out", dstHostId); } finally { @@ -3141,18 +3143,23 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy volumeMgr.release(vm.getId(), dstHostId); } - logger.info("Migration cancelled because state has changed: {}", vm); - throw new ConcurrentOperationException("Migration cancelled because state has changed: " + vm); + String msg = "Migration cancelled because state has changed: " + vm; + logger.warn(msg); + throw new ConcurrentOperationException(msg); } } catch (final NoTransitionException e1) { _networkMgr.rollbackNicForMigration(vmSrc, profile); volumeMgr.release(vm.getId(), dstHostId); - logger.info("Migration cancelled because {}", e1.getMessage()); + String msg = String.format("Migration cancelled for VM %s due to state transition failure: %s", + vm.getInstanceName(), e1.getMessage()); + logger.warn(msg, e1); throw new ConcurrentOperationException("Migration cancelled because " + e1.getMessage()); } catch (final CloudRuntimeException e2) { _networkMgr.rollbackNicForMigration(vmSrc, profile); volumeMgr.release(vm.getId(), dstHostId); - logger.info("Migration cancelled because {}", e2.getMessage()); + String msg = String.format("Migration cancelled for VM %s due to runtime exception: %s", + vm.getInstanceName(), e2.getMessage()); + logger.error(msg, e2); work.setStep(Step.Done); _workDao.update(work.getId(), work); try { @@ -3172,8 +3179,12 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy final Answer ma = _agentMgr.send(vm.getLastHostId(), mc); if (ma == null || !ma.getResult()) { final String details = ma != null ? ma.getDetails() : "null answer returned"; + String msg = String.format("Migration command failed for VM %s on source host id=%s to destination host %s: %s", + vm.getInstanceName(), vm.getLastHostId(), dstHostId, details); + logger.error(msg); throw new CloudRuntimeException(details); } + logger.info("Migration command successful for VM {}", vm.getInstanceName()); } catch (final OperationTimedoutException e) { boolean success = false; if (HypervisorType.KVM.equals(vm.getHypervisorType())) { @@ -3210,7 +3221,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy try { if (!checkVmOnHost(vm, dstHostId)) { - logger.error("Unable to complete migration for {}", vm); + logger.error("Migration verification failed for VM {} : VM not found on destination host {} ", vm.getInstanceName(), dstHostId); try { _agentMgr.send(srcHostId, new Commands(cleanup(vm, dpdkInterfaceMapping)), null); } catch (final AgentUnavailableException e) { @@ -3225,7 +3236,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy migrated = true; } finally { if (!migrated) { - logger.info("Migration was unsuccessful. Cleaning up: {}", vm); + logger.info("Migration was unsuccessful. Cleaning up: {}", vm); _networkMgr.rollbackNicForMigration(vmSrc, profile); volumeMgr.release(vm.getId(), dstHostId); // deallocate GPU devices for the VM on the destination host @@ -3237,7 +3248,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy try { _agentMgr.send(dstHostId, new Commands(cleanup(vm, dpdkInterfaceMapping)), null); } catch (final AgentUnavailableException ae) { - logger.warn("Looks like the destination Host is unavailable for cleanup", ae); + logger.warn("Destination host {} unavailable for cleanup after failed migration of VM {}", dstHostId, vm.getInstanceName(), ae); } _networkMgr.setHypervisorHostname(profile, dest, false); try { @@ -3246,6 +3257,7 @@ protected void migrate(final VMInstanceVO vm, final long srcHostId, final Deploy logger.warn(e.getMessage()); } } else { + logger.info("Migration completed successfully for VM %s" + vm); _networkMgr.commitNicForMigration(vmSrc, profile); volumeMgr.release(vm.getId(), srcHostId); // deallocate GPU devices for the VM on the src host after migration is complete @@ -3276,6 +3288,7 @@ protected MigrateCommand buildMigrateCommand(VMInstanceVO vmInstance, VirtualMac migrateCommand.setVlanToPersistenceMap(vlanToPersistenceMap); } + logger.debug("Setting auto convergence to: {}", StorageManager.KvmAutoConvergence.value()); migrateCommand.setAutoConvergence(StorageManager.KvmAutoConvergence.value()); migrateCommand.setHostGuid(destination.getHost().getGuid()); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java index 859de5143f9b..81328d6ffb9d 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtMigrateCommandWrapper.java @@ -278,17 +278,20 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0. // abort the vm migration if the job is executed more than vm.migrate.wait final int migrateWait = libvirtComputingResource.getMigrateWait(); + logger.info("vm.migrate.wait value set to: {}for VM: {}", migrateWait, vmName); if (migrateWait > 0 && sleeptime > migrateWait * 1000) { DomainState state = null; try { state = dm.getInfo().state; + logger.info("VM domain state when trying to abort migration : {}", state); } catch (final LibvirtException e) { logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage()); } if (state != null && state == DomainState.VIR_DOMAIN_RUNNING) { try { DomainJobInfo job = dm.getJobInfo(); - logger.info(String.format("Aborting migration of VM [%s] with domain job [%s] due to time out after %d seconds.", vmName, job, migrateWait)); + logger.warn("Aborting migration of VM {} with domain job [{}] due to timeout after {} seconds. " + + "Job stats: data processed={} bytes, data remaining={} bytes", vmName, job, migrateWait, job.getDataProcessed(), job.getDataRemaining()); dm.abortJob(); result = String.format("Migration of VM [%s] was cancelled by CloudStack due to time out after %d seconds.", vmName, migrateWait); commandState = Command.State.FAILED; @@ -303,10 +306,12 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0. // pause vm if we meet the vm.migrate.pauseafter threshold and not already paused final int migratePauseAfter = libvirtComputingResource.getMigratePauseAfter(); + logger.info("vm.migrate.pauseafter value set to: {} for VM: {}", migratePauseAfter, vmName); if (migratePauseAfter > 0 && sleeptime > migratePauseAfter) { DomainState state = null; try { state = dm.getInfo().state; + logger.info("VM domain state when trying to pause VM for migration: {}", state); } catch (final LibvirtException e) { logger.info("Couldn't get VM domain state after " + sleeptime + "ms: " + e.getMessage()); } @@ -381,6 +386,7 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0. } if (result == null) { + logger.info("Post-migration cleanup for VM {}: ", vmName); libvirtComputingResource.destroyNetworkRulesForVM(conn, vmName); for (final InterfaceDef iface : ifaces) { String vlanId = libvirtComputingResource.getVlanIdFromBridgeName(iface.getBrName()); @@ -394,6 +400,7 @@ Use VIR_DOMAIN_XML_SECURE (value = 1) prior to v1.0.0. commandState = Command.State.COMPLETED; libvirtComputingResource.createOrUpdateLogFileForCommand(command, commandState); } else if (commandState == null) { + logger.error("Migration of VM {} failed with result: {}", vmName, result); commandState = Command.State.FAILED; libvirtComputingResource.createOrUpdateLogFileForCommand(command, commandState); } diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtPrepareForMigrationCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtPrepareForMigrationCommandWrapper.java index 8d7ee14dc13f..ccaf52e513c6 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtPrepareForMigrationCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtPrepareForMigrationCommandWrapper.java @@ -56,6 +56,7 @@ public Answer execute(final PrepareForMigrationCommand command, final LibvirtCom final VirtualMachineTO vm = command.getVirtualMachine(); if (command.isRollback()) { + logger.info("Handling rollback for PrepareForMigration of VM {}", vm.getName()); return handleRollback(command, libvirtComputingResource); } @@ -83,6 +84,9 @@ public Answer execute(final PrepareForMigrationCommand command, final LibvirtCom if (interfaceDef != null && interfaceDef.getNetType() == GuestNetType.VHOSTUSER) { DpdkTO to = new DpdkTO(interfaceDef.getDpdkOvsPath(), interfaceDef.getDpdkSourcePort(), interfaceDef.getInterfaceMode()); dpdkInterfaceMapping.put(nic.getMac(), to); + if (logger.isDebugEnabled()) { + logger.debug("Configured DPDK interface for VM {}", vm.getName()); + } } } @@ -122,6 +126,7 @@ public Answer execute(final PrepareForMigrationCommand command, final LibvirtCom return new PrepareForMigrationAnswer(command, "failed to connect physical disks to host"); } + logger.info("Successfully prepared destination host for migration of VM {}", vm.getName()); return createPrepareForMigrationAnswer(command, dpdkInterfaceMapping, libvirtComputingResource, vm); } catch (final LibvirtException | CloudRuntimeException | InternalErrorException | URISyntaxException e) { if (MapUtils.isNotEmpty(dpdkInterfaceMapping)) { @@ -157,6 +162,7 @@ private Answer handleRollback(PrepareForMigrationCommand command, LibvirtComputi KVMStoragePoolManager storagePoolMgr = libvirtComputingResource.getStoragePoolMgr(); VirtualMachineTO vmTO = command.getVirtualMachine(); + logger.info("Rolling back PrepareForMigration for VM {}: disconnecting physical disks", vmTO.getName()); if (!storagePoolMgr.disconnectPhysicalDisksViaVmSpec(vmTO)) { return new PrepareForMigrationAnswer(command, "failed to disconnect physical disks from host"); } diff --git a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java index 1ae609c7961b..d6c1a6c57bb6 100644 --- a/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java +++ b/server/src/main/java/com/cloud/vm/UserVmManagerImpl.java @@ -7189,6 +7189,9 @@ public VirtualMachine migrateVirtualMachine(Long vmId, Host destinationHost) thr throw new CloudRuntimeException("Unable to find suitable destination to migrate VM " + vm.getInstanceName()); } + if (logger.isInfoEnabled()) { + logger.info("Starting migration of VM {} from host {} to host {} ", vm.getInstanceName(), srcHostId, dest.getHost().getId()); + } collectVmDiskAndNetworkStatistics(vmId, State.Running); _itMgr.migrate(vm.getUuid(), srcHostId, dest); return findMigratedVm(vm.getId(), vm.getType()); @@ -7260,6 +7263,7 @@ protected void validateStrictHostTagCheck(VMInstanceVO vm, HostVO host) { private DeployDestination checkVmMigrationDestination(VMInstanceVO vm, Host srcHost, Host destinationHost) throws VirtualMachineMigrationException { if (destinationHost == null) { + logger.error("Destination host is null for migration of VM: {}", vm.getInstanceName()); return null; } if (destinationHost.getId() == srcHost.getId()) {