private boolean isDecommissioned() { Collection<SingularitySlave> slaves = singularityClient.getSlaves(Optional.of(MachineState.DECOMMISSIONED)); boolean decommissioned = false; for (SingularitySlave slave : slaves) { if (slave.getHost().equals(hostname)) { decommissioned = true; } } return decommissioned; }
@JsonIgnore @Override public String getName() { return String.format("%s (%s)", getHost(), getId()); }
public List<SingularityTask> getTasksOnSlave(Collection<SingularityTaskId> activeTaskIds, SingularitySlave slave) { final List<SingularityTask> tasks = Lists.newArrayList(); final String sanitizedHost = JavaUtils.getReplaceHyphensWithUnderscores(slave.getHost()); for (SingularityTaskId activeTaskId : activeTaskIds) { if (activeTaskId.getSanitizedHost().equals(sanitizedHost)) { Optional<SingularityTask> maybeTask = getTask(activeTaskId); if (maybeTask.isPresent() && slave.getId().equals(maybeTask.get().getAgentId().getValue())) { tasks.add(maybeTask.get()); } } } return tasks; }
public CheckResult checkOffer(Offer offer) { final String slaveId = offer.getAgentId().getValue(); final String rackId = slaveAndRackHelper.getRackIdOrDefault(offer); final String host = slaveAndRackHelper.getMaybeTruncatedHost(offer); final Map<String, String> textAttributes = slaveAndRackHelper.getTextAttributes(offer); final SingularitySlave slave = new SingularitySlave(slaveId, host, rackId, textAttributes, Optional.absent()); CheckResult result = check(slave, slaveManager); if (result == CheckResult.NEW) { if (inactiveSlaveManager.isInactive(slave.getHost())) { LOG.info("Slave {} on inactive host {} attempted to rejoin. Marking as decommissioned.", slave, host); slaveManager.changeState(slave, MachineState.STARTING_DECOMMISSION, Optional.of(String.format("Slave %s on inactive host %s attempted to rejoin cluster.", slaveId, host)), Optional.absent()); } else { LOG.info("Offer revealed a new slave {}", slave); } } final SingularityRack rack = new SingularityRack(rackId); if (check(rack, rackManager) == CheckResult.NEW) { LOG.info("Offer revealed a new rack {}", rack); } return result; }
@Override protected void checkExpiringObjects() { for (SingularityExpiringMachineState expiringObject : slaveManager.getExpiringObjects()) { if (isExpiringDue(expiringObject)) { Optional<SingularitySlave> slave = slaveManager.getObject(expiringObject.getMachineId()); if (!slave.isPresent()) { LOG.warn("Slave {} not present, discarding {}", expiringObject.getMachineId(), expiringObject); } else { try { handleExpiringObject(expiringObject, slave.get(), getMessage(expiringObject)); } catch (Exception e) { LOG.error("Could not return slave {} to state {}", slave.get().getHost(), expiringObject.getRevertToState()); } } slaveManager.deleteExpiringObject(expiringObject.getMachineId()); } } } }
@Override protected void handleExpiringObject(SingularityExpiringMachineState expiringObject, SingularityMachineAbstraction machine, String message) { SingularitySlave slave = (SingularitySlave) machine; slaveManager.changeState(slave, expiringObject.getRevertToState(), Optional.of("Updated due to expiring action"), expiringObject.getUser()); if (expiringObject.isKillTasksOnDecommissionTimeout() && expiringObject.getRevertToState() == MachineState.DECOMMISSIONED) { List<SingularityTaskId> activeTasksIdsOnSlave = taskManager.getActiveTaskIds(); String sanitizedHost = JavaUtils.getReplaceHyphensWithUnderscores(slave.getHost()); long now = System.currentTimeMillis(); for (SingularityTaskId taskId : activeTasksIdsOnSlave) { if (taskId.getSanitizedHost().equals(sanitizedHost)) { taskManager.saveTaskCleanup(new SingularityTaskCleanup( expiringObject.getUser(), TaskCleanupType.DECOMMISSION_TIMEOUT, now, taskId, Optional.of(String.format("Slave decommission (started by: %s) timed out after %sms", expiringObject.getUser(), now - expiringObject.getStartMillis())), Optional.<String> absent(), Optional.<SingularityTaskShellCommandRequestId> absent())); } } } }
List<MesosTaskMonitorObject> allTaskUsage = mesosClient.getSlaveResourceUsage(slave.getHost(), useShortTimeout); MesosSlaveMetricsSnapshotObject slaveMetricsSnapshot = mesosClient.getSlaveMetricsSnapshot(slave.getHost()); double systemMemTotalBytes = 0; double systemMemFreeBytes = 0; LOG.debug("Saving slave {} usage {}", slave.getHost(), slaveUsage); usageManager.saveSpecificSlaveUsageAndSetCurrent(slave.getId(), slaveUsage); } catch (Throwable t) { String message = String.format("Could not get slave usage for host %s", slave.getHost()); LOG.error(message, t); exceptionNotifier.notify(message, t);
@Test public void testUsageCleaner() { initRequest(); initFirstDeploy(); saveAndSchedule(request.toBuilder().setInstances(Optional.of(2))); resourceOffers(1); List<SingularityTaskId> taskIds = taskManager.getActiveTaskIds(); String t1 = taskIds.get(0).getId(); String t2 = taskIds.get(1).getId(); String slaveId = slaveManager.getObjectIds().get(0); String host = slaveManager.getObjects().get(0).getHost(); MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 2, 5, 100); MesosTaskMonitorObject t2u1 = getTaskMonitor(t2, 10, 5, 1000); mesosClient.setSlaveResourceUsage(host, Arrays.asList(t1u1, t2u1)); usagePoller.runActionOnPoll(); cleaner.runActionOnPoll(); Assert.assertEquals(2, usageManager.getTasksWithUsage().size()); Assert.assertEquals(1, usageManager.getSlavesWithUsage().size()); Assert.assertEquals(1100, usageManager.getAllCurrentSlaveUsage().get(0).getMemoryBytesUsed(), 0); // kill task one statusUpdate(taskManager.getActiveTasks().get(0), TaskState.TASK_KILLED); killKilledTasks(); cleaner.runActionOnPoll(); Assert.assertEquals(1, usageManager.getTasksWithUsage().size()); Assert.assertEquals(1, usageManager.getSlavesWithUsage().size()); slaveManager.changeState(slaveId, MachineState.DEAD, Optional.absent(), Optional.absent()); cleaner.runActionOnPoll(); Assert.assertEquals(1, usageManager.getTasksWithUsage().size()); Assert.assertEquals(0, usageManager.getSlavesWithUsage().size()); }
@Test public void itTracksClusterUtilizationSimple() { initRequest(); double cpuReserved = 10; double memMbReserved = .001; initFirstDeployWithResources(cpuReserved, memMbReserved); saveAndSchedule(request.toBuilder().setInstances(Optional.of(1))); resourceOffers(1); SingularityTaskId taskId = taskManager.getActiveTaskIds().get(0); String t1 = taskId.getId(); String host = slaveManager.getObjects().get(0).getHost(); // used 8 cpu MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 40, getTimestampSeconds(taskId, 5), 800); mesosClient.setSlaveResourceUsage(host, Collections.singletonList(t1u1)); usagePoller.runActionOnPoll(); // used 8 cpu MesosTaskMonitorObject t1u2 = getTaskMonitor(t1, 80, getTimestampSeconds(taskId, 10), 850); mesosClient.setSlaveResourceUsage(host, Collections.singletonList(t1u2)); usagePoller.runActionOnPoll(); Assert.assertTrue("Couldn't find cluster utilization", usageManager.getClusterUtilization().isPresent()); SingularityClusterUtilization utilization = usageManager.getClusterUtilization().get(); int taskUsages = usageManager.getTaskUsage(t1).size(); testUtilization(utilization, 2, taskUsages, cpuReserved, memMbReserved, 0, 1, 1, 0, 2, 223, 0, 2, 223, 0, 2, 223); Assert.assertEquals(requestId, utilization.getMaxUnderUtilizedCpuRequestId()); Assert.assertEquals(requestId, utilization.getMaxUnderUtilizedMemBytesRequestId()); }
SingularityTaskId t1 = taskIds.get(0); SingularityTaskId t2 = taskIds.get(1); String host = slaveManager.getObjects().get(0).getHost();
@Test public void itTracksOverusedCpuInClusterUtilization() { initRequest(); double cpuReserved = 2; double memMbReserved = .0009; initFirstDeployWithResources(cpuReserved, memMbReserved); saveAndSchedule(request.toBuilder().setInstances(Optional.of(1))); resourceOffers(1); SingularityTaskId taskId = taskManager.getActiveTaskIds().get(0); String t1 = taskId.getId(); String host = slaveManager.getObjects().get(0).getHost(); // 4 cpus used MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 20, getTimestampSeconds(taskId, 5), 1024); mesosClient.setSlaveResourceUsage(host, Collections.singletonList(t1u1)); usagePoller.runActionOnPoll(); // 4 cpus used MesosTaskMonitorObject t1u2 = getTaskMonitor(t1, 40, getTimestampSeconds(taskId, 10), 1024); mesosClient.setSlaveResourceUsage(host, Collections.singletonList(t1u2)); usagePoller.runActionOnPoll(); Assert.assertTrue("Couldn't find cluster utilization", usageManager.getClusterUtilization().isPresent()); SingularityClusterUtilization utilization = usageManager.getClusterUtilization().get(); int taskUsages = usageManager.getTaskUsage(t1).size(); testUtilization(utilization, 2, taskUsages, cpuReserved, memMbReserved, 1, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0); Assert.assertEquals(requestId, utilization.getMaxOverUtilizedCpuRequestId()); }
@Test public void itDoesntIncludePerfectlyUtilizedRequestsInClusterUtilization() { initRequest(); double cpuReserved = 2; double memMbReserved = .001; initFirstDeployWithResources(cpuReserved, memMbReserved); saveAndSchedule(request.toBuilder().setInstances(Optional.of(1))); resourceOffers(1); SingularityTaskId taskId = taskManager.getActiveTaskIds().get(0); String t1 = taskId.getId(); String host = slaveManager.getObjects().get(0).getHost(); // 2 cpus used MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 10, getTimestampSeconds(taskId, 5), 1024); mesosClient.setSlaveResourceUsage(host, Collections.singletonList(t1u1)); usagePoller.runActionOnPoll(); // 2 cpus used MesosTaskMonitorObject t1u2 = getTaskMonitor(t1, 20, getTimestampSeconds(taskId, 10), 900); mesosClient.setSlaveResourceUsage(host, Collections.singletonList(t1u2)); usagePoller.runActionOnPoll(); Assert.assertTrue("Couldn't find cluster utilization", usageManager.getClusterUtilization().isPresent()); SingularityClusterUtilization utilization = usageManager.getClusterUtilization().get(); int taskUsages = usageManager.getTaskUsage(t1).size(); testUtilization(utilization, 2, taskUsages, cpuReserved, memMbReserved, 0, 0, 1, 0, 0, 86, 0, 0, 86, 0, 0, 86); Assert.assertEquals(requestId, utilization.getMaxUnderUtilizedMemBytesRequestId()); }
String host = slaveManager.getObjects().get(0).getHost();
SingularityTaskId t1 = taskIds.get(0); SingularityTaskId t2 = taskIds.get(1); String host = slaveManager.getObjects().get(0).getHost();
public List<SingularityTask> getTasksOnSlave(Collection<SingularityTaskId> activeTaskIds, SingularitySlave slave) { final List<SingularityTask> tasks = Lists.newArrayList(); final String sanitizedHost = JavaUtils.getReplaceHyphensWithUnderscores(slave.getHost()); for (SingularityTaskId activeTaskId : activeTaskIds) { if (activeTaskId.getSanitizedHost().equals(sanitizedHost)) { Optional<SingularityTask> maybeTask = getTask(activeTaskId); if (maybeTask.isPresent() && slave.getId().equals(maybeTask.get().getAgentId().getValue())) { tasks.add(maybeTask.get()); } } } return tasks; }
@Override protected void handleExpiringObject(SingularityExpiringMachineState expiringObject, SingularityMachineAbstraction machine, String message) { SingularitySlave slave = (SingularitySlave) machine; slaveManager.changeState(slave, expiringObject.getRevertToState(), Optional.of("Updated due to expiring action"), expiringObject.getUser()); if (expiringObject.isKillTasksOnDecommissionTimeout() && expiringObject.getRevertToState() == MachineState.DECOMMISSIONED) { List<SingularityTaskId> activeTasksIdsOnSlave = taskManager.getActiveTaskIds(); String sanitizedHost = JavaUtils.getReplaceHyphensWithUnderscores(slave.getHost()); long now = System.currentTimeMillis(); for (SingularityTaskId taskId : activeTasksIdsOnSlave) { if (taskId.getSanitizedHost().equals(sanitizedHost)) { taskManager.saveTaskCleanup(new SingularityTaskCleanup( expiringObject.getUser(), TaskCleanupType.DECOMMISSION_TIMEOUT, now, taskId, Optional.of(String.format("Slave decommission (started by: %s) timed out after %sms", expiringObject.getUser(), now - expiringObject.getStartMillis())), Optional.<String> absent(), Optional.<SingularityTaskShellCommandRequestId> absent())); } } } }
public CheckResult checkOffer(Offer offer) { final String slaveId = offer.getAgentId().getValue(); final String rackId = slaveAndRackHelper.getRackIdOrDefault(offer); final String host = slaveAndRackHelper.getMaybeTruncatedHost(offer); final Map<String, String> textAttributes = slaveAndRackHelper.getTextAttributes(offer); final SingularitySlave slave = new SingularitySlave(slaveId, host, rackId, textAttributes, Optional.absent()); CheckResult result = check(slave, slaveManager); if (result == CheckResult.NEW) { if (inactiveSlaveManager.isInactive(slave.getHost())) { LOG.info("Slave {} on inactive host {} attempted to rejoin. Marking as decommissioned.", slave, host); slaveManager.changeState(slave, MachineState.STARTING_DECOMMISSION, Optional.of(String.format("Slave %s on inactive host %s attempted to rejoin cluster.", slaveId, host)), Optional.absent()); } else { LOG.info("Offer revealed a new slave {}", slave); } } final SingularityRack rack = new SingularityRack(rackId); if (check(rack, rackManager) == CheckResult.NEW) { LOG.info("Offer revealed a new rack {}", rack); } return result; }
@Override protected void checkExpiringObjects() { for (SingularityExpiringMachineState expiringObject : slaveManager.getExpiringObjects()) { if (isExpiringDue(expiringObject)) { Optional<SingularitySlave> slave = slaveManager.getObject(expiringObject.getMachineId()); if (!slave.isPresent()) { LOG.warn("Slave {} not present, discarding {}", expiringObject.getMachineId(), expiringObject); } else { try { handleExpiringObject(expiringObject, slave.get(), getMessage(expiringObject)); } catch (Exception e) { LOG.error("Could not return slave {} to state {}", slave.get().getHost(), expiringObject.getRevertToState()); } } slaveManager.deleteExpiringObject(expiringObject.getMachineId()); } } } }
@Test public void itDoesntIncludePerfectlyUtilizedRequestsInClusterUtilization() { initRequest(); double cpuReserved = 2; double memMbReserved = .001; initFirstDeployWithResources(cpuReserved, memMbReserved); saveAndSchedule(request.toBuilder().setInstances(Optional.of(1))); resourceOffers(1); SingularityTaskId taskId = taskManager.getActiveTaskIds().get(0); String t1 = taskId.getId(); String host = slaveManager.getObjects().get(0).getHost(); // 2 cpus used MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 10, getTimestampSeconds(taskId, 5), 1024); mesosClient.setSlaveResourceUsage(host, Collections.singletonList(t1u1)); usagePoller.runActionOnPoll(); // 2 cpus used MesosTaskMonitorObject t1u2 = getTaskMonitor(t1, 20, getTimestampSeconds(taskId, 10), 900); mesosClient.setSlaveResourceUsage(host, Collections.singletonList(t1u2)); usagePoller.runActionOnPoll(); Assert.assertTrue("Couldn't find cluster utilization", usageManager.getClusterUtilization().isPresent()); SingularityClusterUtilization utilization = usageManager.getClusterUtilization().get(); int taskUsages = usageManager.getTaskUsage(t1).size(); testUtilization(utilization, 2, taskUsages, cpuReserved, memMbReserved, 0, 0, 1, 0, 0, 86, 0, 0, 86, 0, 0, 86); Assert.assertEquals(requestId, utilization.getMaxUnderUtilizedMemBytesRequestId()); }
@Test public void itTracksOverusedCpuInClusterUtilization() { initRequest(); double cpuReserved = 2; double memMbReserved = .0009; initFirstDeployWithResources(cpuReserved, memMbReserved); saveAndSchedule(request.toBuilder().setInstances(Optional.of(1))); resourceOffers(1); SingularityTaskId taskId = taskManager.getActiveTaskIds().get(0); String t1 = taskId.getId(); String host = slaveManager.getObjects().get(0).getHost(); // 4 cpus used MesosTaskMonitorObject t1u1 = getTaskMonitor(t1, 20, getTimestampSeconds(taskId, 5), 1024); mesosClient.setSlaveResourceUsage(host, Collections.singletonList(t1u1)); usagePoller.runActionOnPoll(); // 4 cpus used MesosTaskMonitorObject t1u2 = getTaskMonitor(t1, 40, getTimestampSeconds(taskId, 10), 1024); mesosClient.setSlaveResourceUsage(host, Collections.singletonList(t1u2)); usagePoller.runActionOnPoll(); Assert.assertTrue("Couldn't find cluster utilization", usageManager.getClusterUtilization().isPresent()); SingularityClusterUtilization utilization = usageManager.getClusterUtilization().get(); int taskUsages = usageManager.getTaskUsage(t1).size(); testUtilization(utilization, 2, taskUsages, cpuReserved, memMbReserved, 1, 0, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0); Assert.assertEquals(requestId, utilization.getMaxOverUtilizedCpuRequestId()); }