@Override public Map<String, Map<String, Double>> config() { return (Map) getUserResourcePools(); }
@Override public void schedule(Topologies topologies, Cluster cluster) { Map<String, User> userMap = getUsers(cluster); List<TopologyDetails> orderedTopologies = new ArrayList<>(schedulingPriorityStrategy.getOrderedTopologies(cluster, userMap)); if (LOG.isDebugEnabled()) { LOG.debug("Ordered list of topologies is: {}", orderedTopologies.stream().map((t) -> t.getId()).collect(Collectors.toList())); } for (TopologyDetails td : orderedTopologies) { if (!cluster.needsSchedulingRas(td)) { //cluster forgets about its previous status, so if it is scheduled just leave it. cluster.setStatusIfAbsent(td.getId(), "Fully Scheduled"); } else { User submitter = userMap.get(td.getTopologySubmitter()); scheduleTopology(td, cluster, submitter, orderedTopologies); } } }
raw = (Map<String, Map<String, Number>>) configLoader.load(DaemonConfig.RESOURCE_AWARE_SCHEDULER_USER_POOLS); if (raw != null) { return convertToDouble(raw); } else { LOG.warn("Config loader returned null. Will try to read from user-resource-pools.yaml"); raw = (Map<String, Map<String, Number>>) fromFile.get(DaemonConfig.RESOURCE_AWARE_SCHEDULER_USER_POOLS); if (raw != null) { return convertToDouble(raw); } else { LOG.warn("Reading from user-resource-pools.yaml returned null. This could because the file is not available. " return convertToDouble(raw);
rasStrategy.prepare(conf); } catch (DisallowedStrategyException e) { markFailedTopology(topologySubmitter, cluster, td, "Unsuccessful in scheduling - " + e.getAttemptedClass() + " is not an allowed strategy. Please make sure your " return; } catch (RuntimeException e) { markFailedTopology(topologySubmitter, cluster, td, "Unsuccessful in scheduling - failed to create instance of topology strategy " + strategyConf LOG.debug("scheduling result: {}", result); if (result == null) { markFailedTopology(topologySubmitter, cluster, td, "Internal scheduler error"); return; } else { SchedulerAssignment assignment = cluster.getAssignmentById(td.getId()); if (assignment != null) { cpuNeeded -= getCpuUsed(assignment); memoryNeeded -= getMemoryUsed(assignment); cpuNeeded -= getCpuUsed(evictAssignemnt); memoryNeeded -= getMemoryUsed(evictAssignemnt); evictedSomething = true; nodes.freeSlots(workersToEvict); markFailedTopology(topologySubmitter, cluster, td, message.toString());
@Test public void sanityTestOfScheduling() { INimbus iNimbus = new INimbusTest(); Map<String, SupervisorDetails> supMap = genSupervisors(1, 2, 400, 2000); Config config = new Config(); config.putAll(defaultTopologyConf); ResourceAwareScheduler rs = new ResourceAwareScheduler(); TopologyDetails topology1 = genTopology("topology1", config, 1, 1, 1, 1, 0, 0, "user"); Topologies topologies = new Topologies(topology1); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); rs.prepare(config); rs.schedule(topologies, cluster); SchedulerAssignment assignment = cluster.getAssignmentById(topology1.getId()); Set<WorkerSlot> assignedSlots = assignment.getSlots(); Set<String> nodesIDs = new HashSet<>(); for (WorkerSlot slot : assignedSlots) { nodesIDs.add(slot.getNodeId()); } Collection<ExecutorDetails> executors = assignment.getExecutors(); assertEquals(1, assignedSlots.size()); assertEquals(1, nodesIDs.size()); assertEquals(2, executors.size()); assertEquals("Running - Fully Scheduled by DefaultResourceAwareStrategy", cluster.getStatusMap().get(topology1.getId())); }
LOG.debug("\n\n\nRerunning ResourceAwareScheduler..."); initialize(topologies, cluster); for (User user : getUserMap().values()) { LOG.info(user.getDetailedInfo()); break; scheduleTopology(td); updateChanges(cluster, topologies);
SchedulingState schedulingState = checkpointSchedulingState(); IStrategy rasStrategy = null; try { LOG.error("failed to create instance of IStrategy: {} with error: {}! Topology {} will not be scheduled.", td.getName(), td.getConf().get(Config.TOPOLOGY_SCHEDULER_STRATEGY), e.getMessage()); topologySubmitter = cleanup(schedulingState, td); topologySubmitter.moveTopoFromPendingToInvalid(td); this.schedulingState.cluster.setStatus(td.getId(), "Unsuccessful in scheduling - failed to create instance of topology strategy " LOG.error(String.format("Exception thrown when running strategy %s to schedule topology %s. Topology will not be scheduled!" , rasStrategy.getClass().getName(), td.getName()), ex); topologySubmitter = cleanup(schedulingState, td); topologySubmitter.moveTopoFromPendingToInvalid(td); this.schedulingState.cluster.setStatus(td.getId(), "Unsuccessful in scheduling - Exception thrown when running strategy {}" if (result.isSuccess()) { try { if (mkAssignment(td, result.getSchedulingResultMap())) { topologySubmitter.moveTopoFromPendingToRunning(td); this.schedulingState.cluster.setStatus(td.getId(), "Running - " + result.getMessage()); } else { topologySubmitter = this.cleanup(schedulingState, td); topologySubmitter.moveTopoFromPendingToAttempted(td); this.schedulingState.cluster.setStatus(td.getId(), "Unsuccessful in scheduling - Unable to assign executors to nodes. Please check logs for details"); topologySubmitter = cleanup(schedulingState, td); topologySubmitter.moveTopoFromPendingToAttempted(td); this.schedulingState.cluster.setStatus(td.getId(), "Unsuccessful in scheduling - IllegalStateException thrown when attempting to assign executors to nodes. Please check log for details.");
private void initialize(Topologies topologies, Cluster cluster) { Map<String, User> userMap = getUsers(topologies, cluster); this.schedulingState = new SchedulingState(userMap, cluster, topologies, this.conf); }
RAS_Node targetNode = this.schedulingState.nodes.getNodeById(targetSlot.getNodeId()); targetSlot = allocateResourceToSlot(td, execsNeedScheduling, targetSlot);
config.putAll(createGrasClusterConfig(88, 775, 25, null, null)); ResourceAwareScheduler rs = new ResourceAwareScheduler(); rs.prepare(config); rs.schedule(topologies, cluster); rs.schedule(topologies, cluster);
config.put(Config.TOPOLOGY_SCHEDULER_STRATEGY, strategy); ResourceAwareScheduler rs = new ResourceAwareScheduler(); rs.prepare(config); rs.schedule(topologies, cluster); long schedulingDuration = Time.currentTimeMillis() - startTime; LOG.info("Scheduling took " + schedulingDuration + " ms");
/** * Get User wrappers around cluster. * * @param cluster the cluster to get the users out of. */ private Map<String, User> getUsers(Cluster cluster) { Map<String, User> userMap = new HashMap<>(); Map<String, Map<String, Double>> userResourcePools = getUserResourcePools(); LOG.debug("userResourcePools: {}", userResourcePools); for (TopologyDetails td : cluster.getTopologies()) { String topologySubmitter = td.getTopologySubmitter(); //additional safety check to make sure that topologySubmitter is going to be a valid value if (topologySubmitter == null || topologySubmitter.equals("")) { LOG.error("Cannot determine user for topology {}. Will skip scheduling this topology", td.getName()); continue; } if (!userMap.containsKey(topologySubmitter)) { userMap.put(topologySubmitter, new User(topologySubmitter, userResourcePools.get(topologySubmitter))); } } return userMap; }
genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); ResourceAwareScheduler rs = new ResourceAwareScheduler(); rs.prepare(config); LOG.info("\n\n\t\tScheduling topos 2 to 5..."); rs.schedule(topologies, cluster); LOG.info("\n\n\t\tDone scheduling..."); assertTopologiesFullyScheduled(cluster, "topo-2", "topo-3", "topo-4", "topo-5"); cluster = new Cluster(cluster, topologies); LOG.info("\n\n\t\tScheduling topos 1 to 5"); rs.schedule(topologies, cluster); LOG.info("\n\n\t\tDone scheduling...");
Map<String, Map<String, Double>> userResourcePools = getUserResourcePools(); LOG.debug("userResourcePools: {}", userResourcePools);
TopologyDetails topology1 = new TopologyDetails("topology1", config, stormTopology1, 2, executorMap1, 0, "user"); ResourceAwareScheduler rs = new ResourceAwareScheduler(); Topologies topologies = new Topologies(topology1); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); rs.prepare(config); rs.schedule(topologies, cluster);
genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); ResourceAwareScheduler rs = new ResourceAwareScheduler(); rs.prepare(config); LOG.info("\n\n\t\tScheduling topos 1,3,4,5"); rs.schedule(topologies, cluster); LOG.info("\n\n\t\tDone scheduling..."); cluster = new Cluster(cluster, topologies); LOG.info("\n\n\t\tScheduling topos 1,3,4,5,6"); rs.schedule(topologies, cluster); LOG.info("\n\n\t\tDone scheduling..."); cluster = new Cluster(cluster, topologies); LOG.info("\n\n\t\tScheduling topos 1-6"); rs.schedule(topologies, cluster); LOG.info("\n\n\t\tDone scheduling...");
genTopology("topo-6", config, 1, 0, 1, 0, currentTime - 2, 29, "derek")); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); ResourceAwareScheduler rs = new ResourceAwareScheduler(); rs.prepare(config); LOG.info("\n\n\t\tScheduling topos 1,2,5,6"); rs.schedule(topologies, cluster); LOG.info("\n\n\t\tDone Scheduling..."); cluster = new Cluster(cluster, topologies); LOG.info("\n\n\t\tScheduling topos 1,2,3,5,6"); rs.schedule(topologies, cluster); LOG.info("\n\n\t\tDone Scheduling..."); cluster = new Cluster(cluster, topologies); LOG.info("\n\n\t\tScheduling topos 1-6"); rs.schedule(topologies, cluster); LOG.info("\n\n\t\tDone Scheduling...");
@Test public void testSubmitUsersWithNoGuarantees() { INimbus iNimbus = new INimbusTest(); Map<String, SupervisorDetails> supMap = genSupervisors(4, 4, 100, 1000); Map<String, Map<String, Number>> resourceUserPool = userResourcePool( userRes("jerry", 200, 2000)); Config config = createClusterConfig(100, 500, 500, resourceUserPool); Topologies topologies = new Topologies( genTopology("topo-1", config, 1, 0, 1, 0, currentTime - 2, 10, "jerry"), genTopology("topo-2", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), genTopology("topo-3", config, 1, 0, 1, 0, currentTime - 2, 20, "jerry"), genTopology("topo-4", config, 1, 0, 1, 0, currentTime - 2, 10, "bobby"), genTopology("topo-5", config, 1, 0, 1, 0, currentTime - 2, 20, "bobby")); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); ResourceAwareScheduler rs = new ResourceAwareScheduler(); rs.prepare(config); rs.schedule(topologies, cluster); assertTopologiesFullyScheduled(cluster, "topo-1", "topo-2", "topo-3", "topo-4"); assertTopologiesNotScheduled(cluster, "topo-5"); }
genTopology("topo-2", config, 1, 0, 2, 0, currentTime - 2, 10, "user")); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<>(), topologies, config); ResourceAwareScheduler rs = new ResourceAwareScheduler(); rs.prepare(config); rs.schedule(topologies, cluster);
/** * When the first topology failed to be scheduled make sure subsequent schedulings can still succeed */ @Test public void testSchedulingAfterFailedScheduling() { INimbus iNimbus = new INimbusTest(); Map<String, SupervisorDetails> supMap = genSupervisors(8, 4, 100, 1000); Config config = createClusterConfig(100, 500, 500, null); TopologyDetails topo1 = genTopology("topo-1", config, 8, 0, 2, 0, currentTime - 2, 10, "jerry"); TopologyDetails topo2 = genTopology("topo-2", config, 2, 0, 2, 0, currentTime - 2, 20, "jerry"); TopologyDetails topo3 = genTopology("topo-3", config, 1, 2, 1, 1, currentTime - 2, 20, "jerry"); Topologies topologies = new Topologies(topo1, topo2, topo3); Cluster cluster = new Cluster(iNimbus, new ResourceMetrics(new StormMetricsRegistry()), supMap, new HashMap<String, SchedulerAssignmentImpl>(), topologies, config); ResourceAwareScheduler rs = new ResourceAwareScheduler(); rs.prepare(config); rs.schedule(topologies, cluster); assertTrue("Topo-2 scheduled?", cluster.getAssignmentById(topo2.getId()) != null); assertEquals("Topo-2 all executors scheduled?", 4, cluster.getAssignmentById(topo2.getId()).getExecutorToSlot().size()); assertTrue("Topo-3 scheduled?", cluster.getAssignmentById(topo3.getId()) != null); assertEquals("Topo-3 all executors scheduled?", 3, cluster.getAssignmentById(topo3.getId()).getExecutorToSlot().size()); }