@VisibleForTesting void prepare(Cluster cluster) { this.cluster = cluster; nodes = new RAS_Nodes(cluster); networkTopography = cluster.getNetworkTopography(); Map<String, String> hostToRack = new HashMap<>(); for (Map.Entry<String, List<String>> entry : networkTopography.entrySet()) { String rackId = entry.getKey(); for (String hostName: entry.getValue()) { hostToRack.put(hostName, rackId); } } for (RAS_Node node: nodes.getNodes()) { String superId = node.getId(); String hostName = node.getHostname(); String rackId = hostToRack.getOrDefault(hostName, DNSToSwitchMapping.DEFAULT_RACK); superIdToHostname.put(superId, hostName); superIdToRack.put(superId, rackId); hostnameToNodes.computeIfAbsent(hostName, (hn) -> new ArrayList<>()).add(node); rackIdToNodes.computeIfAbsent(rackId, (hn) -> new ArrayList<>()).add(node); } logClusterInfo(); }
/** * Log a bunch of stuff for debugging. */ private void logClusterInfo() { if (LOG.isDebugEnabled()) { LOG.debug("Cluster:"); for (Map.Entry<String, List<String>> clusterEntry : networkTopography.entrySet()) { String rackId = clusterEntry.getKey(); LOG.debug("Rack: {}", rackId); for (String nodeHostname : clusterEntry.getValue()) { for (RAS_Node node : hostnameToNodes(nodeHostname)) { LOG.debug("-> Node: {} {}", node.getHostname(), node.getId()); LOG.debug( "--> Avail Resources: {Mem {}, CPU {} Slots: {}}", node.getAvailableMemoryResources(), node.getAvailableCpuResources(), node.totalSlotsFree()); LOG.debug( "--> Total Resources: {Mem {}, CPU {} Slots: {}}", node.getTotalMemoryResources(), node.getTotalCpuResources(), node.totalSlots()); } } } } }
+ "slot: {} on Rack: {}", exec, targetNode.getHostname(), targetNode.getAvailableMemoryResources(), targetNode.getAvailableCpuResources(),
for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { Assert.assertEquals("assert worker scheduled on rack-0", "rack-0", resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname())); for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { Assert.assertEquals("assert worker scheduled on rack-1", "rack-1", resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname()));
SchedulerAssignment assignment = cluster.getAssignmentById(topo1.getId()); for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { String hostName = rs.idToNode(ws.getNodeId()).getHostname(); String rackId = resolvedSuperVisors.get(hostName); Assert.assertTrue(ws + " is neither on a favored node " + t1FavoredHostNames + " nor the highest priority rack (rack-0)", Assert.assertEquals("assert worker scheduled on rack-1", "rack-1", resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname()));
/** * Get the rack on which a node is a part of * * @param node the node to find out which rack its on * @return the rack id */ private String nodeToRack(RAS_Node node) { for (Map.Entry<String, List<String>> entry : _clusterInfo .entrySet()) { if (entry.getValue().contains(node.getHostname())) { return entry.getKey(); } } LOG.error("Node: {} not found in any racks", node.getHostname()); return null; }
/** * hostname to Id * * @param hostname the hostname to convert to node id * @return the id of a node */ public String NodeHostnameToId(String hostname) { for (RAS_Node n : _nodes.getNodes()) { if (n.getHostname() == null) { continue; } if (n.getHostname().equals(hostname)) { return n.getId(); } } LOG.error("Cannot find Node with hostname {}", hostname); return null; }
private void freeMemory(double amount) { LOG.debug("freeing {} memory on node {}...avail mem: {}", amount, getHostname(), _availMemory); if((_availMemory + amount) > getTotalMemoryResources()) { LOG.warn("Freeing more memory than there exists! Memory trying to free: {} Total memory on Node: {}", (_availMemory + amount), getTotalMemoryResources()); return; } _availMemory += amount; }
private void freeCPU(double amount) { LOG.debug("freeing {} CPU on node...avail CPU: {}", amount, getHostname(), _availCPU); if ((_availCPU + amount) > getTotalCpuResources()) { LOG.warn("Freeing more CPU than there exists! CPU trying to free: {} Total CPU on Node: {}", (_availCPU + amount), getTotalCpuResources()); return; } _availCPU += amount; }
@Override public int getNumExistingSchedule(String objectId) { String rackId = objectId; //Get execs already assigned in rack Collection<ExecutorDetails> execs = new LinkedList<ExecutorDetails>(); if (_cluster.getAssignmentById(topoId) != null) { for (Map.Entry<ExecutorDetails, WorkerSlot> entry : _cluster.getAssignmentById(topoId).getExecutorToSlot().entrySet()) { String nodeId = entry.getValue().getNodeId(); String hostname = idToNode(nodeId).getHostname(); ExecutorDetails exec = entry.getKey(); if (nodeIdToRackId.get(hostname) != null && nodeIdToRackId.get(hostname).equals(rackId)) { execs.add(exec); } } } // get execs already scheduled in the current scheduling for (Map.Entry<WorkerSlot, Collection<ExecutorDetails>> entry : scheduleAssignmentMap.entrySet()) { WorkerSlot workerSlot = entry.getKey(); String nodeId = workerSlot.getNodeId(); String hostname = idToNode(nodeId).getHostname(); if (nodeIdToRackId.get(hostname).equals(rackId)) { execs.addAll(entry.getValue()); } } return execs.size(); } });
/** * Schedule executor exec from topology td * * @param exec the executor to schedule * @param td the topology executor exec is a part of * @param schedulerAssignmentMap the assignments already calculated * @param scheduledTasks executors that have been scheduled */ private void scheduleExecutor(ExecutorDetails exec, TopologyDetails td, Map<WorkerSlot, Collection<ExecutorDetails>> schedulerAssignmentMap, Collection<ExecutorDetails> scheduledTasks) { WorkerSlot targetSlot = this.findWorkerForExec(exec, td, schedulerAssignmentMap); if (targetSlot != null) { RAS_Node targetNode = this.idToNode(targetSlot.getNodeId()); if (!schedulerAssignmentMap.containsKey(targetSlot)) { schedulerAssignmentMap.put(targetSlot, new LinkedList<ExecutorDetails>()); } schedulerAssignmentMap.get(targetSlot).add(exec); targetNode.consumeResourcesforTask(exec, td); scheduledTasks.add(exec); LOG.debug("TASK {} assigned to Node: {} avail [ mem: {} cpu: {} ] total [ mem: {} cpu: {} ] on slot: {} on Rack: {}", exec, targetNode.getHostname(), targetNode.getAvailableMemoryResources(), targetNode.getAvailableCpuResources(), targetNode.getTotalMemoryResources(), targetNode.getTotalCpuResources(), targetSlot, nodeToRack(targetNode)); } else { LOG.error("Not Enough Resources to schedule Task {}", exec); } }
/** * Get the amount of resources available and total for each node * * @return a String with cluster resource info for debug */ private String getClusterInfo() { String retVal = "Cluster info:\n"; for (Map.Entry<String, List<String>> clusterEntry : _clusterInfo.entrySet()) { String clusterId = clusterEntry.getKey(); retVal += "Rack: " + clusterId + "\n"; for (String nodeHostname : clusterEntry.getValue()) { RAS_Node node = this.idToNode(this.NodeHostnameToId(nodeHostname)); retVal += "-> Node: " + node.getHostname() + " " + node.getId() + "\n"; retVal += "--> Avail Resources: {Mem " + node.getAvailableMemoryResources() + ", CPU " + node.getAvailableCpuResources() + " Slots: " + node.totalSlotsFree() + "}\n"; retVal += "--> Total Resources: {Mem " + node.getTotalMemoryResources() + ", CPU " + node.getTotalCpuResources() + " Slots: " + node.totalSlots() + "}\n"; } } return retVal; }
td.getName(), execsNeedScheduling, targetNode.getHostname(), targetSlot.getPort());