@Override public void scheduleRestart(VMInstanceVO vm, boolean investigate) { Long hostId = vm.getHostId(); if (hostId == null) { try { s_logger.debug("Found a vm that is scheduled to be restarted but has no host id: " + vm); _itMgr.advanceStop(vm.getUuid(), true); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; if (vm.getHypervisorType() == HypervisorType.VMware || vm.getHypervisorType() == HypervisorType.Hyperv) { s_logger.info("Skip HA for VMware VM or Hyperv VM" + vm.getInstanceName()); return; s_logger.debug("VM does not require investigation so I'm marking it as Stopped: " + vm.toString()); if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY; } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_SSVM; if (!(_forceHA || vm.isHaEnabled())) { String hostDesc = "id:" + vm.getHostId() + ", availability zone id:" + vm.getDataCenterId() + ", pod id:" + vm.getPodIdToDeployIn(); _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "VM (name: " + vm.getHostName() + ", id: " + vm.getId() + ") stopped unexpectedly on host " + hostDesc, "Virtual Machine " + vm.getHostName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped unexpectedly."); _itMgr.advanceStop(vm.getUuid(), true);
protected List<Long> getAffectedVmsForVmStart(VMInstanceVO vm) { List<Long> affectedVms = new ArrayList<Long>(); affectedVms.add(vm.getId()); List<SecurityGroupVMMapVO> groupsForVm = _securityGroupVMMapDao.listByInstanceId(vm.getId()); // For each group, find the security rules that allow the group for (SecurityGroupVMMapVO mapVO : groupsForVm) {// FIXME: use custom sql in the dao //Add usage events for security group assign UsageEventUtils.publishUsageEvent(EventTypes.EVENT_SECURITY_GROUP_ASSIGN, vm.getAccountId(), vm.getDataCenterId(), vm.getId(), mapVO.getSecurityGroupId(), vm .getClass().getName(), vm.getUuid()); List<SecurityGroupRuleVO> allowingRules = _securityGroupRuleDao.listByAllowedSecurityGroupId(mapVO.getSecurityGroupId()); // For each security rule that allows a group that the vm belongs to, find the group it belongs to affectedVms.addAll(getAffectedVmsForSecurityRules(allowingRules)); } return affectedVms; }
if (groupVM != null && !groupVM.isRemoved()) { if (groupVM.getHostId() != null) { avoid.addHost(groupVM.getHostId()); if (s_logger.isDebugEnabled()) { s_logger.debug("Added host " + groupVM.getHostId() + " to avoid set, since VM " + groupVM.getId() + " is present on the host"); } else if (VirtualMachine.State.Stopped.equals(groupVM.getState()) && groupVM.getLastHostId() != null) { long secondsSinceLastUpdate = (DateUtil.currentGMTTime().getTime() - groupVM.getUpdateTime().getTime()) / 1000; if (secondsSinceLastUpdate < _vmCapacityReleaseInterval) { avoid.addHost(groupVM.getLastHostId()); if (s_logger.isDebugEnabled()) { s_logger.debug("Added host " + groupVM.getLastHostId() + " to avoid set, since VM " + groupVM.getId() + " is present on the host, in Stopped state but has reserved capacity");
public VirtualMachineModel(VMInstanceVO vm, String uuid) { _uuid = uuid; if (vm != null) { _instanceId = vm.getId(); _instanceName = vm.getInstanceName(); } }
@Override public boolean scheduleMigration(final VMInstanceVO vm) { if (vm.getHostId() != null) { final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Migration, Step.Scheduled, vm.getHostId(), vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); wakeupWorkers(); } return true; }
@Override public Long getHostIdForSnapshotOperation(Volume vol) { VMInstanceVO vm = _vmDao.findById(vol.getInstanceId()); if (vm != null) { if (vm.getHostId() != null) { return vm.getHostId(); } else if (vm.getLastHostId() != null) { return vm.getLastHostId(); } } return null; }
IdStatesSearch.and("id", IdStatesSearch.entity().getId(), Op.EQ); IdStatesSearch.and("states", IdStatesSearch.entity().getState(), Op.IN); IdStatesSearch.done(); VMClusterSearch.join("hostSearch", hostSearch, hostSearch.entity().getId(), VMClusterSearch.entity().getHostId(), JoinType.INNER); hostSearch.and("clusterId", hostSearch.entity().getClusterId(), SearchCriteria.Op.EQ); VMClusterSearch.done(); LHVMClusterSearch.join("hostSearch1", hostSearch1, hostSearch1.entity().getId(), LHVMClusterSearch.entity().getLastHostId(), JoinType.INNER); LHVMClusterSearch.and("hostid", LHVMClusterSearch.entity().getHostId(), Op.NULL); hostSearch1.and("clusterId", hostSearch1.entity().getClusterId(), SearchCriteria.Op.EQ); LHVMClusterSearch.done(); AllFieldsSearch.and("host", AllFieldsSearch.entity().getHostId(), Op.EQ); AllFieldsSearch.and("lastHost", AllFieldsSearch.entity().getLastHostId(), Op.EQ); AllFieldsSearch.and("state", AllFieldsSearch.entity().getState(), Op.EQ); AllFieldsSearch.and("zone", AllFieldsSearch.entity().getDataCenterId(), Op.EQ); AllFieldsSearch.and("pod", AllFieldsSearch.entity().getPodIdToDeployIn(), Op.EQ); AllFieldsSearch.and("type", AllFieldsSearch.entity().getType(), Op.EQ); AllFieldsSearch.and("account", AllFieldsSearch.entity().getAccountId(), Op.EQ); AllFieldsSearch.done(); ZoneTemplateNonExpungedSearch.and("zone", ZoneTemplateNonExpungedSearch.entity().getDataCenterId(), Op.EQ); ZoneTemplateNonExpungedSearch.and("template", ZoneTemplateNonExpungedSearch.entity().getTemplateId(), Op.EQ); ZoneTemplateNonExpungedSearch.and("state", ZoneTemplateNonExpungedSearch.entity().getState(), Op.NEQ); ZoneTemplateNonExpungedSearch.done(); TemplateNonExpungedSearch.and("template", TemplateNonExpungedSearch.entity().getTemplateId(), Op.EQ); TemplateNonExpungedSearch.and("state", TemplateNonExpungedSearch.entity().getState(), Op.NEQ);
switch (vm.getState()) { case Starting: s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-on report while there is no pending jobs on it"); stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId()); } catch (final NoTransitionException e) { s_logger.warn("Unexpected VM state transition exception, race-condition?", e); s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Running state according to power-on report from hypervisor"); _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (Starting -> Running) from out-of-context transition. VM network environment may need to be reset"); break; if (vm.getHostId() != null && vm.getHostId().longValue() != vm.getPowerHostId().longValue()) { s_logger.info("Detected out of band VM migration from host " + vm.getHostId() + " to host " + vm.getPowerHostId()); stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId()); } catch (final NoTransitionException e) { s_logger.warn("Unexpected VM state transition exception, race-condition?", e); s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-on report while there is no pending jobs on it"); stateTransitTo(vm, VirtualMachine.Event.FollowAgentPowerOnReport, vm.getPowerHostId()); } catch (final NoTransitionException e) { s_logger.warn("Unexpected VM state transition exception, race-condition?", e); _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(),
final ServiceOfferingVO offering = _offeringDao.findById(vm.getId(), vm.getServiceOfferingId()); final VirtualMachineTemplate template = _entityMgr.findByIdIncludingRemoved(VirtualMachineTemplate.class, vm.getTemplateId()); DataCenterDeployment plan = new DataCenterDeployment(vm.getDataCenterId(), vm.getPodIdToDeployIn(), null, null, null, null, ctx); if (planToDeploy != null && planToDeploy.getDataCenterId() != 0) { if (s_logger.isDebugEnabled()) { final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType()); final List<VolumeVO> vols = _volsDao.findReadyRootVolumesByInstance(vm.getId()); for (final VolumeVO vol : vols) { final Account owner = _entityMgr.findById(Account.class, vm.getAccountId()); final VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(vm, template, offering, owner, params); DeployDestination dest = null; vm.setPodIdToDeployIn(dest.getPod().getId()); final Long cluster_id = dest.getCluster().getId(); final ClusterDetailsVO cluster_detail_cpu = _clusterDetailsDao.findDetail(cluster_id, "cpuOvercommitRatio"); final ClusterDetailsVO cluster_detail_ram = _clusterDetailsDao.findDetail(cluster_id, "memoryOvercommitRatio"); if (userVmDetailsDao.findDetail(vm.getId(), "cpuOvercommitRatio") == null && (Float.parseFloat(cluster_detail_cpu.getValue()) > 1f || Float.parseFloat(cluster_detail_ram.getValue()) > 1f)) { userVmDetailsDao.addDetail(vm.getId(), "cpuOvercommitRatio", cluster_detail_cpu.getValue(), true); userVmDetailsDao.addDetail(vm.getId(), "memoryOvercommitRatio", cluster_detail_ram.getValue(), true); } else if (userVmDetailsDao.findDetail(vm.getId(), "cpuOvercommitRatio") != null) { userVmDetailsDao.addDetail(vm.getId(), "cpuOvercommitRatio", cluster_detail_cpu.getValue(), true); userVmDetailsDao.addDetail(vm.getId(), "memoryOvercommitRatio", cluster_detail_ram.getValue(), true);
Long oldHostId = vmi.getHostId(); Long oldUpdated = vmi.getUpdated(); Date oldUpdateDate = vmi.getUpdateTime(); if (newState.equals(oldState) && newHostId != null && newHostId.equals(oldHostId)) { sc.setParameters("id", vmi.getId()); sc.setParameters("states", oldState); sc.setParameters("host", vmi.getHostId()); sc.setParameters("update", vmi.getUpdated()); vmi.incrUpdated(); UpdateBuilder ub = getUpdateBuilder(vmi); ub.set(vmi, "podIdToDeployIn", vmi.getPodIdToDeployIn()); ub.set(vmi, _updateTimeAttr, new Date()); StringBuilder str = new StringBuilder("Unable to update ").append(vo.toString()); str.append(": DB Data={Host=").append(vo.getHostId()).append("; State=").append(vo.getState().toString()).append("; updated=").append(vo.getUpdated()) .append("; time=").append(vo.getUpdateTime()); str.append("} New Data: {Host=").append(vm.getHostId()).append("; State=").append(vm.getState().toString()).append("; updated=").append(vmi.getUpdated()) .append("; time=").append(vo.getUpdateTime()); str.append("} Stale Data: {Host=").append(oldHostId).append("; State=").append(oldState).append("; updated=").append(oldUpdated).append("; time=") .append(oldUpdateDate).append("}"); if (vo != null && vo.getState() == newState) { s_logger.debug("VM " + vo.getInstanceName() + " state has been already been updated to " + newState); return true;
if (vm.getType() == VirtualMachine.Type.User) { reorderedVMList.add(vm); } else { reorderedVMList.add(0, vm); if (vm.isHaEnabled()) { sb.append(" " + vm.getHostName()); ServiceOfferingVO vmOffering = _serviceOfferingDao.findById(vm.getServiceOfferingId()); if (vmOffering.isUseLocalStorage()) { if (s_logger.isDebugEnabled()){ s_logger.debug("Notifying HA Mgr of to restart vm " + vm.getId() + "-" + vm.getInstanceName()); vm = _instanceDao.findByUuid(vm.getUuid()); Long hostId = vm.getHostId(); if (hostId != null && !hostId.equals(host.getId())) { s_logger.debug("VM " + vm.getInstanceName() + " is not on down host " + host.getId() + " it is on other host " + hostId + " VM HA is done"); continue;
switch (vm.getState()) { case Starting: case Stopping: case Stopped: case Migrating: s_logger.info("VM " + vm.getInstanceName() + " is at " + vm.getState() + " and we received a power-off report while there is no pending jobs on it"); if(vm.isHaEnabled() && vm.getState() == State.Running && HaVmRestartHostUp.value() && vm.getHypervisorType() != HypervisorType.VMware && vm.getHypervisorType() != HypervisorType.Hyperv) { s_logger.info("Detected out-of-band stop of a HA enabled VM " + vm.getInstanceName() + ", will schedule restart"); if(!_haMgr.hasPendingHaWork(vm.getId())) { _haMgr.scheduleRestart(vm, true); } else { s_logger.info("VM " + vm.getInstanceName() + " already has an pending HA task working on it"); _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") state is sync-ed (" + vm.getState() + " -> Stopped) from out-of-context transition."); s_logger.info("VM " + vm.getInstanceName() + " is sync-ed to at Stopped state according to power-off report from hypervisor");
vmq.and(vmq.entity().getInstanceName(), SearchCriteria.Op.EQ, vmName); VMInstanceVO vm = vmq.find(); if (State.Starting != vm.getState()) { throw new CloudRuntimeException(String.format("baremetal instance[name:%s, state:%s] is not in state of Starting", vmName, vm.getState())); vm.setState(State.Running); vm.setLastHostId(vm.getHostId()); vmDao.update(vm.getId(), vm); s_logger.debug(String.format("received baremetal provision done notification for vm[id:%s name:%s] running on host[mac:%s, ip:%s]", vm.getId(), vm.getInstanceName(), host.getPrivateMacAddress(), host.getPrivateIpAddress()));
final SearchBuilder<VMInstanceVO> sb = _vmInstanceDao.createSearchBuilder(); sb.and("id", sb.entity().getId(), SearchCriteria.Op.EQ); sb.and("hostName", sb.entity().getHostName(), SearchCriteria.Op.LIKE); sb.and("state", sb.entity().getState(), SearchCriteria.Op.EQ); sb.and("dataCenterId", sb.entity().getDataCenterId(), SearchCriteria.Op.EQ); sb.and("podId", sb.entity().getPodIdToDeployIn(), SearchCriteria.Op.EQ); sb.and("hostId", sb.entity().getHostId(), SearchCriteria.Op.EQ); sb.and("type", sb.entity().getType(), SearchCriteria.Op.EQ); sb.and("nulltype", sb.entity().getType(), SearchCriteria.Op.IN); sb.join("volumeSearch", volumeSearch, sb.entity().getId(), volumeSearch.entity().getInstanceId(), JoinBuilder.JoinType.INNER);
if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER_MIGRATE; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY_MIGRATE; final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType()); final VirtualMachineTO to = hvGuru.implement(profile); ItWorkVO work = new ItWorkVO(UUID.randomUUID().toString(), _nodeId, State.Migrating, vm.getType(), vm.getId()); work.setStep(Step.Prepare); work.setResourceType(ItWorkVO.ResourceType.Host); vm.setLastHostId(srcHostId); vm.setPodIdToDeployIn(destHost.getPodId()); moveVmToMigratingState(vm, destHostId, work); Nic defaultNic = _networkModel.getDefaultNic(vm.getId()); UserVmVO userVm = _userVmDao.findById(vm.getId()); Map<String, String> details = userVmDetailsDao.listDetailsKeyPairs(vm.getId()); userVm.setDetails(details); final String serviceOffering = _serviceOfferingDao.findByIdIncludingRemoved(vm.getId(), vm.getServiceOfferingId()).getDisplayText(); boolean isWindows = _guestOSCategoryDao.findById(_guestOSDao.findById(vm.getGuestOSId()).getCategoryId()).getName().equalsIgnoreCase("Windows"); vmData = _networkModel.generateVmData(userVm.getUserData(), serviceOffering, vm.getDataCenterId(), vm.getInstanceName(), vm.getHostName(), vm.getId(), vm.getUuid(), defaultNic.getMacAddress(), userVm.getDetail("SSH.PublicKey"), (String) profile.getParameter(VirtualMachineProfile.Param.VmPassword), isWindows); String vmName = vm.getInstanceName(); String configDriveIsoRootFolder = "/tmp";
protected void advanceExpunge(VMInstanceVO vm) throws ResourceUnavailableException, OperationTimedoutException, ConcurrentOperationException { if (vm == null || vm.getRemoved() != null) { if (s_logger.isDebugEnabled()) { s_logger.debug("Unable to find vm or vm is destroyed: " + vm); advanceStop(vm.getUuid(), false); vm = _vmDao.findByUuid(vm.getUuid()); if (!stateTransitTo(vm, VirtualMachine.Event.ExpungeOperation, vm.getHostId())) { s_logger.debug("Unable to destroy the vm because it is not in the correct state: " + vm); throw new CloudRuntimeException("Unable to destroy " + vm); final HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType()); final Long hostId = vm.getHostId() != null ? vm.getHostId() : vm.getLastHostId(); List<Map<String, String>> targets = getTargets(hostId, vm.getId()); volumeMgr.revokeAccess(vm.getId(), hostId); volumeMgr.cleanupVolumes(vm.getId()); guru.finalizeExpunge(vm); userVmDetailsDao.removeDetails(vm.getId());
VMInstanceVO vm = _vmInstanceDao.findById(vmId); String errorMsg = "Failed to detach volume " + volume.getName() + " from VM " + vm.getHostName(); boolean sendCommand = vm.getState() == State.Running; Long hostId = vm.getHostId(); hostId = vm.getLastHostId(); HostVO host = _hostDao.findById(hostId); DiskTO disk = new DiskTO(volTO, volume.getDeviceId(), volume.getPath(), volume.getVolumeType()); DettachCommand cmd = new DettachCommand(disk, vm.getInstanceName());
protected void handleVmMigrated(VMInstanceVO vm) { if (!isVmSecurityGroupEnabled(vm.getId())) { return; } if (vm.getType() != VirtualMachine.Type.User) { Commands cmds = null; NetworkRulesSystemVmCommand nrc = new NetworkRulesSystemVmCommand(vm.getInstanceName(), vm.getType()); cmds = new Commands(nrc); try { _agentMgr.send(vm.getHostId(), cmds); } catch (AgentUnavailableException e) { s_logger.debug(e.toString()); } catch (OperationTimedoutException e) { s_logger.debug(e.toString()); } } else { List<Long> affectedVms = new ArrayList<Long>(); affectedVms.add(vm.getId()); scheduleRulesetUpdateToHosts(affectedVms, true, null); } }
@Override public DeleteHostAnswer deleteHost(HostVO host, boolean isForced, boolean isForceDeleteStorage) throws UnableDeleteHostException { if (host.getType() != Host.Type.Routing || host.getHypervisorType() != HypervisorType.BareMetal) { return null; } List<VMInstanceVO> deadVms = _vmDao.listByLastHostId(host.getId()); for (VMInstanceVO vm : deadVms) { if (vm.getState() == State.Running || vm.getHostId() != null) { throw new CloudRuntimeException("VM " + vm.getId() + "is still running on host " + host.getId()); } _vmDao.remove(vm.getId()); } return new DeleteHostAnswer(true); }
private void scanStalledVMInTransitionStateOnDisconnectedHosts() { final Date cutTime = new Date(DateUtil.currentGMTTime().getTime() - VmOpWaitInterval.value() * 1000); final List<Long> stuckAndUncontrollableVMs = listStalledVMInTransitionStateOnDisconnectedHosts(cutTime); for (final Long vmId : stuckAndUncontrollableVMs) { final VMInstanceVO vm = _vmDao.findById(vmId); // We now only alert administrator about this situation _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_SYNC, vm.getDataCenterId(), vm.getPodIdToDeployIn(), VM_SYNC_ALERT_SUBJECT, "VM " + vm.getHostName() + "(" + vm.getInstanceName() + ") is stuck in " + vm.getState() + " state and its host is unreachable for too long"); } }