@Override public NodeCapacity getCapacity() { return new NodeCapacity(Runtime.getRuntime().maxMemory(), Runtime.getRuntime().availableProcessors() - 1); }
@Override public NodeCapacity getCapacity() { int allCores = ManagementFactory.getOperatingSystemMXBean().getAvailableProcessors(); return new NodeCapacity(Runtime.getRuntime().maxMemory(), allCores > 1 ? allCores - 1 : allCores); }
@Override public NodeCapacity getCapacity() { StorageProperties storageProperties = runtimeContext.getStorageProperties(); final long memorySize = storageProperties.getJobExecutionMemoryBudget(); int allCores = Runtime.getRuntime().availableProcessors(); return new NodeCapacity(memorySize, allCores); }
@Override @Idempotent public synchronized void removeNode(String nodeId) throws HyracksException { NodeControllerState ncState = nodeRegistry.remove(nodeId); if (ncState == null) { LOGGER.warn("request to remove unknown node {}; ignoring", nodeId); } else { removeNodeFromIpAddressMap(nodeId, ncState); } // Updates the cluster capacity (idempotent) resourceManager.update(nodeId, new NodeCapacity(0L, 0)); }
private NodeCapacity getAdjustedNodeCapacity(NodeCapacity nodeCapacity) { return new NodeCapacity(nodeCapacity.getMemoryByteSize(), nodeCapacity.getCores() * nodeCoresMultiplier); }
@Override public synchronized Pair<Collection<String>, Collection<JobId>> removeDeadNodes() throws HyracksException { Set<String> deadNodes = new HashSet<>(); Set<JobId> affectedJobIds = new HashSet<>(); Iterator<Map.Entry<String, NodeControllerState>> nodeIterator = nodeRegistry.entrySet().iterator(); long deadNodeNanosThreshold = TimeUnit.MILLISECONDS.toNanos(ccConfig.getHeartbeatMaxMisses() * ccConfig.getHeartbeatPeriodMillis()); while (nodeIterator.hasNext()) { Map.Entry<String, NodeControllerState> entry = nodeIterator.next(); String nodeId = entry.getKey(); NodeControllerState state = entry.getValue(); final long nanosSinceLastHeartbeat = state.nanosSinceLastHeartbeat(); if (nanosSinceLastHeartbeat >= deadNodeNanosThreshold) { ensureNodeFailure(nodeId, state); deadNodes.add(nodeId); affectedJobIds.addAll(state.getActiveJobIds()); nodeIterator.remove(); removeNodeFromIpAddressMap(nodeId, state); resourceManager.update(nodeId, new NodeCapacity(0L, 0)); LOGGER.info("{} considered dead. Last heartbeat received {}ms ago. Max miss period: {}ms", nodeId, TimeUnit.NANOSECONDS.toMillis(nanosSinceLastHeartbeat), TimeUnit.NANOSECONDS.toMillis(deadNodeNanosThreshold)); } } return Pair.of(deadNodes, affectedJobIds); }
public synchronized void failNode(String nodeId) throws HyracksException { NodeControllerState state = nodeRegistry.get(nodeId); Set<JobId> affectedJobIds = state.getActiveJobIds(); // Removes the node from node map. nodeRegistry.remove(nodeId); // Removes the node from IP map. removeNodeFromIpAddressMap(nodeId, state); // Updates the cluster capacity. resourceManager.update(nodeId, new NodeCapacity(0L, 0)); LOGGER.info(nodeId + " considered dead"); IJobManager jobManager = ccs.getJobManager(); Set<String> collection = Collections.singleton(nodeId); for (JobId jobId : affectedJobIds) { JobRun run = jobManager.get(jobId); if (run != null) { run.getExecutor().notifyNodeFailures(collection); } } ccs.getContext().notifyNodeFailure(collection); }
capacity.update(nodeId, new NodeCapacity(1024L, 8)); Assert.assertTrue(capacity.getAggregatedMemoryByteSize() == 1024L); Assert.assertTrue(capacity.getAggregatedCores() == 8); capacity.update(nodeId, new NodeCapacity(-1L, -2)); capacity.update(nodeId, new NodeCapacity(1024L, 8)); capacity.update(nodeId, new NodeCapacity(4L, 0));