@Override public void schedule(Topologies topologies, Cluster cluster) { Set<String> origBlacklist = cluster.getBlacklistedHosts(); List<TopologyDetails> isoTopologies = isolatedTopologies(topologies.getTopologies()); Set<String> isoIds = extractTopologyIds(isoTopologies); workerSpecs.remove(ass.getExecutors()); cluster.blacklistHost(entry.getKey()); } else { for (AssignmentInfo ass : assignments) { if (isoIds.contains(ass.getTopologyId())) { cluster.freeSlot(ass.getWorkerSlot()); cluster.freeSlots(hostUsedSlots.get(hostSlots.getHostName())); for (WorkerSlot tmpSlot : slot.subList(0, num)) { Set<ExecutorDetails> executor = removeElemFromExecutorsSet(executorSet); cluster.assign(tmpSlot, topologyId, executor); cluster.blacklistHost(hostSlots.getHostName()); Set<Map.Entry<String, Set<WorkerSlot>>> entries = usedSlots.entrySet(); for (Map.Entry<String, Set<WorkerSlot>> entry : entries) { if (!cluster.isBlacklistedHost(entry.getKey())) { cluster.freeSlots(entry.getValue()); DefaultScheduler.defaultSchedule(leftOverTopologies, cluster); cluster.setBlacklistedHosts(origBlacklist);
Set<String> readyToRemove = new HashSet<>(); if (blacklistedNodeIds.size() > 0) { int availableSlots = cluster.getNonBlacklistedAvailableSlots(blacklistedNodeIds).size(); int neededSlots = 0; NormalizedResourceOffer available = cluster.getNonBlacklistedClusterAvailableResources(blacklistedNodeIds); NormalizedResourceOffer needed = new NormalizedResourceOffer(); for (TopologyDetails td : cluster.getTopologies()) { if (cluster.needsSchedulingRas(td)) { int slots = 0; try { int assignedSlots = cluster.getAssignedNumWorkers(td); int tdSlotsNeeded = slots - assignedSlots; neededSlots += tdSlotsNeeded; Map<String, SupervisorDetails> availableSupervisors = cluster.getSupervisors(); NormalizedResourceOffer shortage = new NormalizedResourceOffer(needed); shortage.remove(available, cluster.getResourceMetrics()); int shortageSlots = neededSlots - availableSlots; LOG.debug("Need {} and {} slots.", needed, neededSlots); SupervisorDetails sd = availableSupervisors.get(supervisorId); if (sd != null) { NormalizedResourcesWithMemory sdAvailable = cluster.getAvailableResources(sd); int sdAvailableSlots = cluster.getAvailablePorts(sd).size(); readyToRemove.add(supervisorId); shortage.remove(sdAvailable, cluster.getResourceMetrics()); shortageSlots -= sdAvailableSlots;
@Override public int getAssignedNumWorkers(TopologyDetails topology) { SchedulerAssignment assignment = topology != null ? this.getAssignmentById(topology.getId()) : null; if (assignment == null) { return 0; } Set<WorkerSlot> slots = new HashSet<>(); slots.addAll(assignment.getExecutorToSlot().values()); return slots.size(); }
/** * Unassign everything for the given topology id. * * @param topoId the is of the topology to unassign */ public void unassign(String topoId) { assertValidTopologyForModification(topoId); freeSlots(getUsedSlotsByTopologyId(topoId)); }
/** * Update the assignments and status from the other cluster. * * @param other the cluster to get the assignments and status from */ public void updateFrom(Cluster other) { for (SchedulerAssignment assignment : other.getAssignments().values()) { assertValidTopologyForModification(assignment.getTopologyId()); } setAssignments(other.getAssignments(), false); setStatusMap(other.getStatusMap()); } }
@Override public boolean needsScheduling(TopologyDetails topology) { int desiredNumWorkers = topology.getNumWorkers(); int assignedNumWorkers = this.getAssignedNumWorkers(topology); return desiredNumWorkers > assignedNumWorkers || getUnassignedExecutors(topology).size() > 0; }
private void updateChanges(Cluster cluster, Topologies topologies) { //Cannot simply set this.cluster=schedulingState.cluster since clojure is immutable cluster.setAssignments(schedulingState.cluster.getAssignments()); cluster.setBlacklistedHosts(schedulingState.cluster.getBlacklistedHosts()); cluster.setStatusMap(schedulingState.cluster.getStatusMap()); cluster.setSupervisorsResourcesMap(schedulingState.cluster.getSupervisorsResourcesMap()); cluster.setTopologyResourcesMap(schedulingState.cluster.getTopologyResourcesMap()); cluster.setWorkerResourcesMap(schedulingState.cluster.getWorkerResourcesMap()); //updating resources used by supervisor updateSupervisorsResources(cluster, topologies); }
Cluster cluster = new Cluster(inimbus, resourceMetrics, supervisors, topoToSchedAssignment, topologies, conf); cluster.setStatusMap(idToSchedStatus.get()); idToSchedStatus.set(Utils.merge(idToSchedStatus.get(), cluster.getStatusMap())); nodeIdToResources.set(cluster.getSupervisorsResourcesMap()); Map<String, TopologyResources> resources = cluster.getTopologyResourcesMap(); idToResources.getAndAccumulate(resources, (orig, update) -> Utils.merge(orig, update)); for (Entry<String, Map<WorkerSlot, WorkerResources>> uglyWorkerResources : cluster.getWorkerResourcesMap().entrySet()) { Map<WorkerSlot, WorkerResources> slotToResources = new HashMap<>(); for (Entry<WorkerSlot, WorkerResources> uglySlotToResources : uglyWorkerResources.getValue().entrySet()) { return cluster.getAssignments();
StormMetricsRegistry metricsRegistry = new StormMetricsRegistry(); ResourceMetrics resourceMetrics = new ResourceMetrics(metricsRegistry); Cluster cluster = new Cluster(iNimbus, resourceMetrics, supMap, new HashMap<String, SchedulerAssignmentImpl>(), topologies, config); BlacklistScheduler bs = new BlacklistScheduler(new DefaultScheduler(), metricsRegistry); bs.prepare(config); bs.schedule(topologies, cluster); cluster = new Cluster(iNimbus, resourceMetrics, TestUtilsForBlacklistScheduler.removeSupervisorFromSupervisors(supMap, "sup-0"), TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config); bs.schedule(topologies, cluster); cluster = new Cluster(iNimbus, resourceMetrics, TestUtilsForBlacklistScheduler.removeSupervisorFromSupervisors(supMap, "sup-0"), TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()), topologies, config); bs.schedule(topologies, cluster); cluster = new Cluster(iNimbus, resourceMetrics, supMap, new HashMap<String, SchedulerAssignmentImpl>(), topologies, config); bs.schedule(topologies, cluster); Assert.assertEquals("blacklist", Collections.singleton("host-0"), cluster.getBlacklistedHosts()); for (int i = 0; i < 300 / 10 - 2; i++) { bs.schedule(topologies, cluster); Assert.assertEquals("blacklist", Collections.singleton("host-0"), cluster.getBlacklistedHosts()); bs.schedule(topologies, cluster); Assert.assertEquals("blacklist", Collections.emptySet(), cluster.getBlacklistedHosts());
@Test public void sanityTestOfScheduling() { INimbus iNimbus = new INimbusTest(); Map<String, SupervisorDetails> supMap = genSupervisors(1, 2, 400, 2000); Config config = new Config(); config.putAll(defaultTopologyConf); ResourceAwareScheduler rs = new ResourceAwareScheduler(); TopologyDetails topology1 = genTopology("topology1", config, 1, 1, 1, 1, 0, 0, "user"); Topologies topologies = new Topologies(topology1); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); rs.prepare(config); rs.schedule(topologies, cluster); SchedulerAssignment assignment = cluster.getAssignmentById(topology1.getId()); Set<WorkerSlot> assignedSlots = assignment.getSlots(); Set<String> nodesIDs = new HashSet<>(); for (WorkerSlot slot : assignedSlots) { nodesIDs.add(slot.getNodeId()); } Collection<ExecutorDetails> executors = assignment.getExecutors(); assertEquals(1, assignedSlots.size()); assertEquals(1, nodesIDs.size()); assertEquals(2, executors.size()); assertEquals("Running - Fully Scheduled by DefaultResourceAwareStrategy", cluster.getStatusMap().get(topology1.getId())); }
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); cluster.setNetworkTopography(rackToNodes); SchedulerAssignment assignment = cluster.getAssignmentById(topo1.getId()); for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { Assert.assertEquals("All executors in topo-1 scheduled", 0, cluster.getUnassignedExecutors(topo1).size()); assignment = cluster.getAssignmentById(topo2.getId()); for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { Assert.assertEquals("All executors in topo-2 scheduled", 0, cluster.getUnassignedExecutors(topo1).size());
@Override public void schedule(Topologies topologies, Cluster cluster) { LOG.debug("running Black List scheduler"); LOG.debug("AssignableSlots: {}", cluster.getAssignableSlots()); LOG.debug("AvailableSlots: {}", cluster.getAvailableSlots()); LOG.debug("UsedSlots: {}", cluster.getUsedSlots()); Map<String, SupervisorDetails> supervisors = cluster.getSupervisors(); blacklistStrategy.resumeFromBlacklist(); badSupervisors(supervisors); Set<String> blacklistHosts = getBlacklistHosts(cluster, topologies); this.blacklistHost = blacklistHosts; cluster.setBlacklistedHosts(blacklistHosts); removeLongTimeDisappearFromCache(); underlyingScheduler.schedule(topologies, cluster); }
ResourceAwareScheduler rs = new ResourceAwareScheduler(); Topologies topologies = new Topologies(topology1); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config1); rs.prepare(config1); rs.schedule(topologies, cluster); assertEquals("Running - Fully Scheduled by DefaultResourceAwareStrategy", cluster.getStatusMap().get(topology1.getId())); assertEquals(4, cluster.getAssignedNumWorkers(topology1)); TopologyDetails topology2 = new TopologyDetails("topology2", config2, stormTopology2, 1, executorMap2, 0, "user"); topologies = new Topologies(topology2); cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config2); rs.prepare(config2); rs.schedule(topologies, cluster); String status = cluster.getStatusMap().get(topology2.getId()); assert status.startsWith("Not enough resources to schedule") : status; assert status.endsWith("5 executors not scheduled") : status; assertEquals(5, cluster.getUnassignedExecutors(topology2).size());
Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, conf); rs.schedule(topologies, cluster); for (Entry<String, SupervisorResources> entry: cluster.getSupervisorsResourcesMap().entrySet()) { String supervisorId = entry.getKey(); SupervisorResources resources = entry.getValue(); double totalExpectedWorkerOffHeap = (totalNumberOfTasks * memoryOffHeap) + sharedOffHeapWorker; SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId()); Set<WorkerSlot> slots = assignment.getSlots(); Map<String, Double> nodeToTotalShared = assignment.getNodeIdToTotalSharedOffHeapMemory();
/** * When the first topology failed to be scheduled make sure subsequent schedulings can still succeed */ @Test public void testSchedulingAfterFailedScheduling() { INimbus iNimbus = new INimbusTest(); Map<String, SupervisorDetails> supMap = genSupervisors(8, 4, 100, 1000); Config config = createClusterConfig(100, 500, 500, null); TopologyDetails topo1 = genTopology("topo-1", config, 8, 0, 2, 0, currentTime - 2, 10, "jerry"); TopologyDetails topo2 = genTopology("topo-2", config, 2, 0, 2, 0, currentTime - 2, 20, "jerry"); TopologyDetails topo3 = genTopology("topo-3", config, 1, 2, 1, 1, currentTime - 2, 20, "jerry"); Topologies topologies = new Topologies(topo1, topo2, topo3); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<String, SchedulerAssignmentImpl>(), topologies, config); ResourceAwareScheduler rs = new ResourceAwareScheduler(); rs.prepare(config); rs.schedule(topologies, cluster); assertTrue("Topo-2 scheduled?", cluster.getAssignmentById(topo2.getId()) != null); assertEquals("Topo-2 all executors scheduled?", 4, cluster.getAssignmentById(topo2.getId()).getExecutorToSlot().size()); assertTrue("Topo-3 scheduled?", cluster.getAssignmentById(topo3.getId()) != null); assertEquals("Topo-3 all executors scheduled?", 3, cluster.getAssignmentById(topo3.getId()).getExecutorToSlot().size()); }
userMap.put(topologySubmitter, new User(topologySubmitter, userResourcePools.get(topologySubmitter))); if (cluster.getUnassignedExecutors(td).size() > 0) { LOG.debug("adding td: {} to pending queue", td.getName()); userMap.get(topologySubmitter).addTopologyToPendingQueue(td); } else { LOG.debug("adding td: {} to running queue with existing status: {}", td.getName(), cluster.getStatusMap().get(td.getId())); userMap.get(topologySubmitter).addTopologyToRunningQueue(td); if (cluster.getStatusMap().get(td.getId()) == null || cluster.getStatusMap().get(td.getId()).equals("")) { cluster.setStatus(td.getId(), "Fully Scheduled");
public static Cluster nextCluster(Cluster cluster, Map<String, SupervisorDetails> supervisors, INimbus iNimbus, Map<String, Object> config, Topologies topologies) { Map<String, SchedulerAssignmentImpl> assignment; if (cluster == null) { assignment = new HashMap<>(); } else { assignment = TestUtilsForBlacklistScheduler.assignmentMapToImpl(cluster.getAssignments()); } return new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supervisors, assignment, topologies, config); } }
List<WorkerSlot> workerSlots = cluster.getAvailableSlots(); String info = ""; int slotsAssigned = cluster.getAssignedNumWorkers(topologyDetails); int slotsAvailable = mesosWorkerSlots.size(); slotAssignmentStrings.add("(" + mesosWorkerSlots.get(0).getNodeId() + ":" + mesosWorkerSlots.get(0).getPort() + ", " + executorsPerWorker.toString() + ")"); iterator.remove(); cluster.assign(mesosWorkerSlots.remove(0), topologyId, executorsPerWorker);
Set<String> readyToRemove = new HashSet<>(); if (blacklistedNodeIds.size() > 0) { int availableSlots = cluster.getNonBlacklistedAvailableSlots(blacklistedNodeIds).size(); int neededSlots = 0; for (TopologyDetails td : cluster.needsSchedulingTopologies()) { int slots = td.getNumWorkers(); int assignedSlots = cluster.getAssignedNumWorkers(td); int tdSlotsNeeded = slots - assignedSlots; neededSlots += tdSlotsNeeded; Map<String, SupervisorDetails> availableSupervisors = cluster.getSupervisors(); int shortageSlots = neededSlots - availableSlots; LOG.debug("Need {} slots.", neededSlots); SupervisorDetails sd = availableSupervisors.get(supervisorId); if (sd != null) { int sdAvailableSlots = cluster.getAvailablePorts(sd).size(); readyToRemove.add(supervisorId); shortageSlots -= sdAvailableSlots;
Assert.assertEquals("topo all executors scheduled? " + cluster.getUnassignedExecutors(topo), 0, cluster.getUnassignedExecutors(topo).size()); Assert.assertTrue("Valid Scheduling?", ConstraintSolverStrategy.validateSolution(cluster, topo)); LOG.info("Slots Used {}", cluster.getAssignmentById(topo.getId()).getSlots()); LOG.info("Assignment {}", cluster.getAssignmentById(topo.getId()).getSlotToExecutors()); SchedulerAssignment assignment = cluster.getAssignmentById(topo.getId()); cluster.freeSlot(slot); Assert.assertEquals("topo all executors scheduled?", 0, cluster.getUnassignedExecutors(topo).size()); Assert.assertTrue("Valid Scheduling?", ConstraintSolverStrategy.validateSolution(cluster, topo));