private RMNodeImpl getRebootedNode() { NodeId nodeId = BuilderUtils.newNodeId("localhost", 0); Resource capability = Resource.newInstance(4096, 4); RMNodeImpl node = new RMNodeImpl(nodeId, rmContext,null, 0, 0, null, capability, null); node.handle(new RMNodeStartedEvent(node.getNodeID(), null, null)); Assert.assertEquals(NodeState.RUNNING, node.getState()); node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.REBOOTING)); Assert.assertEquals(NodeState.REBOOTED, node.getState()); return node; }
@Test public void testResourceUpdateOnRecommissioningNode() { RMNodeImpl node = getDecommissioningNode(); Resource oldCapacity = node.getTotalCapability(); assertEquals("Memory resource is not match.", oldCapacity.getMemorySize(), 4096); assertEquals("CPU resource is not match.", oldCapacity.getVirtualCores(), 4); node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.RECOMMISSION)); Resource originalCapacity = node.getOriginalTotalCapability(); assertEquals("Original total capability not null after recommission", null, originalCapacity); }
@Override public void transition(RMNodeImpl rmNode, RMNodeEvent event) { // The node is not usable, only log a warn message LOG.warn("Try to update resource on a "+ rmNode.getState().toString() + " node: "+rmNode.toString()); updateNodeResourceFromEvent(rmNode, (RMNodeResourceUpdateEvent)event); // No need to notify scheduler as schedulerNode is not function now // and can sync later from RMnode. } }
private static NodeHealthStatus updateRMNodeFromStatusEvents( RMNodeImpl rmNode, RMNodeStatusEvent statusEvent) { // Switch the last heartbeatresponse. NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus(); rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport()); rmNode.setLastHealthReportTime(remoteNodeHealthStatus .getLastHealthReportTime()); rmNode.setAggregatedContainersUtilization(statusEvent .getAggregatedContainersUtilization()); rmNode.setNodeUtilization(statusEvent.getNodeUtilization()); return remoteNodeHealthStatus; }
/** * Put a node in deactivated (decommissioned or shutdown) status. * @param rmNode * @param finalState */ public static void deactivateNode(RMNodeImpl rmNode, NodeState finalState) { if (rmNode.getNodeID().getPort() == -1) { rmNode.updateMetricsForDeactivatedNode(rmNode.getState(), finalState); return; } reportNodeUnusable(rmNode, finalState); // Deactivate the node rmNode.context.getRMNodes().remove(rmNode.nodeId); LOG.info("Deactivating Node " + rmNode.nodeId + " as it is now " + finalState); rmNode.context.getInactiveRMNodes().put(rmNode.nodeId, rmNode); if (rmNode.context.getNodesListManager().isUntrackedNode(rmNode.hostName)) { rmNode.setUntrackedTimeStamp(Time.monotonicNow()); } }
private RMNodeImpl getUnhealthyNode() { RMNodeImpl node = getRunningNode(); NodeHealthStatus status = NodeHealthStatus.newInstance(false, "sick", System.currentTimeMillis()); NodeStatus nodeStatus = NodeStatus.newInstance(node.getNodeID(), 0, new ArrayList<ContainerStatus>(), null, status, null, null, null); node.handle(new RMNodeStatusEvent(node.getNodeID(), nodeStatus, null)); Assert.assertEquals(NodeState.UNHEALTHY, node.getState()); return node; }
@Test public void testUnknownNodeId() { NodeId nodeId = NodesListManager.createUnknownNodeId("host1"); RMNodeImpl node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null); rmContext.getInactiveRMNodes().putIfAbsent(nodeId,node); node.handle( new RMNodeEvent(node.getNodeID(), RMNodeEventType.DECOMMISSION)); Assert.assertNull( "Must be null as there is no NODE_UNUSABLE update", nodesListManagerEvent); }
int initialDecommissioned = cm.getNumDecommisionedNMs(); int initialRebooted = cm.getNumRebootedNMs(); node1.handle(new RMNodeEvent(node1.getNodeID(), RMNodeEventType.EXPIRE)); Assert.assertEquals("Active Nodes", initialActive - 1, cm.getNumActiveNMs()); Assert.assertEquals("Lost Nodes", initialLost + 1, cm.getNumLostNMs()); Assert.assertEquals("Rebooted Nodes", initialRebooted, cm.getNumRebootedNMs()); Assert.assertEquals(NodeState.LOST, node1.getState()); Assert.assertTrue("Node " + node1.toString() + " should be inactive", rmContext.getInactiveRMNodes().containsKey(node1.getNodeID())); Assert.assertFalse("Node " + node2.toString() + " should not be inactive", rmContext.getInactiveRMNodes().containsKey(node2.getNodeID())); node2.handle(new RMNodeEvent(node1.getNodeID(), RMNodeEventType.EXPIRE)); Assert.assertEquals("Active Nodes", initialActive - 2, cm.getNumActiveNMs()); Assert.assertEquals("Lost Nodes", initialLost + 2, cm.getNumLostNMs()); Assert.assertEquals("Rebooted Nodes", initialRebooted, cm.getNumRebootedNMs()); Assert.assertEquals(NodeState.LOST, node2.getState()); Assert.assertTrue("Node " + node1.toString() + " should be inactive", rmContext.getInactiveRMNodes().containsKey(node1.getNodeID())); Assert.assertTrue("Node " + node2.toString() + " should be inactive", rmContext.getInactiveRMNodes().containsKey(node2.getNodeID()));
@Test public void testReconnectWithNewPortOnDecommissioningNode() { RMNodeImpl node = getDecommissioningNode(); Random r= new Random(); node.setHttpPort(r.nextInt(10000)); // Reconnect event with running app node.handle(new RMNodeReconnectEvent(node.getNodeID(), node, getAppIdList(), null)); // still decommissioning Assert.assertEquals(NodeState.DECOMMISSIONING, node.getState()); node.setHttpPort(r.nextInt(10000)); // Reconnect event without any running app node.handle(new RMNodeReconnectEvent(node.getNodeID(), node, null, null)); Assert.assertEquals(NodeState.DECOMMISSIONED, node.getState()); }
@Test public void testForHandlingDuplicatedCompltedContainers() { node.handle(new RMNodeStartedEvent(null, null, null)); node.setNextHeartBeat(false); node.handle(statusEvent1); verify(scheduler, times(1)).handle(any(NodeUpdateSchedulerEvent.class)); Assert.assertEquals(1, node.getQueueSize()); Assert.assertEquals(1, node.getCompletedContainers().size()); node.handle(statusEvent1); Assert.assertEquals(1, node.getQueueSize()); node.handle(new RMNodeFinishedContainersPulledByAMEvent(node.getNodeID(), Collections.singletonList(completedContainerId1))); node.setAndUpdateNodeHeartbeatResponse(hbrsp); Assert.assertEquals(0, node.getCompletedContainers().size());
new NodeRemovedSchedulerEvent(rmNode)); if (rmNode.getHttpPort() == newNode.getHttpPort()) { if (!rmNode.getTotalCapability().equals( newNode.getTotalCapability())) { rmNode.totalCapability = newNode.getTotalCapability(); if (rmNode.getState().equals(NodeState.RUNNING)) { switch (rmNode.getState()) { case RUNNING: ClusterMetrics.getMetrics().decrNumActiveNodes(); rmNode.httpAddress = newNode.getHttpAddress(); boolean isCapabilityChanged = false; if (!rmNode.getTotalCapability().equals( newNode.getTotalCapability())) { rmNode.totalCapability = newNode.getTotalCapability(); && rmNode.getState().equals(NodeState.RUNNING)) {
private void setDecomissionedNMs() { Set<String> excludeList = hostsReader.getExcludedHosts(); for (final String host : excludeList) { NodeId nodeId = createUnknownNodeId(host); RMNodeImpl rmNode = new RMNodeImpl(nodeId, rmContext, host, -1, -1, new UnknownNode(host), Resource.newInstance(0, 0), "unknown"); rmContext.getInactiveRMNodes().put(nodeId, rmNode); rmNode.handle(new RMNodeEvent(nodeId, RMNodeEventType.DECOMMISSION)); } }
private RMNodeImpl getNewNode(Resource capability) { NodeId nodeId = BuilderUtils.newNodeId("localhost", 0); RMNodeImpl node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, capability, null); return node; }
@Test(timeout=20000) public void testUpdateHeartbeatResponseForCleanup() { RMNodeImpl node = getRunningNode(); NodeId nodeId = node.getNodeID(); // Expire a container ContainerId completedContainerId = BuilderUtils.newContainerId( BuilderUtils.newApplicationAttemptId( BuilderUtils.newApplicationId(0, 0), 0), 0); node.handle(new RMNodeCleanContainerEvent(nodeId, completedContainerId)); Assert.assertEquals(1, node.getContainersToCleanUp().size()); // Finish an application ApplicationId finishedAppId = BuilderUtils.newApplicationId(0, 1); node.handle(new RMNodeCleanAppEvent(nodeId, finishedAppId)); Assert.assertEquals(1, node.getAppsToCleanup().size()); // Verify status update does not clear containers/apps to cleanup // but updating heartbeat response for cleanup does RMNodeStatusEvent statusEvent = getMockRMNodeStatusEvent(null); node.handle(statusEvent); Assert.assertEquals(1, node.getContainersToCleanUp().size()); Assert.assertEquals(1, node.getAppsToCleanup().size()); NodeHeartbeatResponse hbrsp = Records.newRecord(NodeHeartbeatResponse.class); node.setAndUpdateNodeHeartbeatResponse(hbrsp); Assert.assertEquals(0, node.getContainersToCleanUp().size()); Assert.assertEquals(0, node.getAppsToCleanup().size()); Assert.assertEquals(1, hbrsp.getContainersToCleanup().size()); Assert.assertEquals(completedContainerId, hbrsp.getContainersToCleanup().get(0)); Assert.assertEquals(1, hbrsp.getApplicationsToCleanup().size()); Assert.assertEquals(finishedAppId, hbrsp.getApplicationsToCleanup().get(0)); }
@Test public void testReconnnectUpdate() { final String nmVersion1 = "nm version 1"; final String nmVersion2 = "nm version 2"; RMNodeImpl node = getRunningNode(nmVersion1); Assert.assertEquals(nmVersion1, node.getNodeManagerVersion()); RMNodeImpl reconnectingNode = getRunningNode(nmVersion2); node.handle(new RMNodeReconnectEvent(node.getNodeID(), reconnectingNode, null, null)); Assert.assertEquals(nmVersion2, node.getNodeManagerVersion()); }
@Test (timeout = 5000) public void testContainerUpdate() throws InterruptedException{ node.handle(new RMNodeStartedEvent(null, null, null)); RMNodeImpl node2 = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null); node2.handle(new RMNodeStartedEvent(null, null, null)); doReturn(Collections.singletonList(containerStatusFromNode1)) .when(statusEventFromNode1).getContainers(); node.handle(statusEventFromNode1); Assert.assertEquals(1, completedContainers.size()); Assert.assertEquals(completedContainerIdFromNode1, .when(statusEventFromNode2_2).getContainers(); node2.setNextHeartBeat(false); node2.handle(statusEventFromNode2_1); node2.setNextHeartBeat(true); node2.handle(statusEventFromNode2_2);
@Test (timeout = 5000) public void testStatusChange(){ node.handle(new RMNodeStartedEvent(null, null, null)); node.setNextHeartBeat(false); node.handle(statusEvent1); node.handle(statusEvent2); verify(scheduler,times(1)).handle(any(NodeUpdateSchedulerEvent.class)); Assert.assertEquals(2, node.getQueueSize()); node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE)); Assert.assertEquals(0, node.getQueueSize());
if (rmNode.getState() == NodeState.DECOMMISSIONING) { deactivateNode(rmNode, NodeState.DECOMMISSIONED); return NodeState.DECOMMISSIONED; new NodeRemovedSchedulerEvent(rmNode)); if (rmNode.getHttpPort() == newNode.getHttpPort()) { if (!rmNode.getTotalCapability().equals( newNode.getTotalCapability())) { rmNode.totalCapability = newNode.getTotalCapability(); if (rmNode.getState().equals(NodeState.RUNNING)) { rmNode.httpAddress = newNode.getHttpAddress(); boolean isCapabilityChanged = false; if (!rmNode.getTotalCapability().equals( newNode.getTotalCapability())) { rmNode.totalCapability = newNode.getTotalCapability(); && rmNode.getState().equals(NodeState.RUNNING)) { return rmNode.getState();
rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport()); rmNode.setLastHealthReportTime( remoteNodeHealthStatus.getLastHealthReportTime()); if (!remoteNodeHealthStatus.getIsNodeHealthy()) { NodesListManagerEventType.NODE_UNUSABLE, rmNode)); rmNode.updateMetricsForDeactivatedNode(rmNode.getState(), NodeState.UNHEALTHY); return NodeState.UNHEALTHY;
@Test(timeout=20000) public void testUpdateHeartbeatResponseForAppLifeCycle() { RMNodeImpl node = getRunningNode(); NodeId nodeId = node.getNodeID(); ApplicationId runningAppId = BuilderUtils.newApplicationId(0, 1); rmContext.getRMApps().put(runningAppId, Mockito.mock(RMApp.class)); // Create a running container ContainerId runningContainerId = BuilderUtils.newContainerId( BuilderUtils.newApplicationAttemptId( runningAppId, 0), 0); ContainerStatus status = ContainerStatus.newInstance(runningContainerId, ContainerState.RUNNING, "", 0); List<ContainerStatus> statusList = new ArrayList<ContainerStatus>(); statusList.add(status); NodeHealthStatus nodeHealth = NodeHealthStatus.newInstance(true, "", System.currentTimeMillis()); NodeStatus nodeStatus = NodeStatus.newInstance(nodeId, 0, statusList, null, nodeHealth, null, null, null); node.handle(new RMNodeStatusEvent(nodeId, nodeStatus, null)); Assert.assertEquals(1, node.getRunningApps().size()); // Finish an application ApplicationId finishedAppId = runningAppId; node.handle(new RMNodeCleanAppEvent(nodeId, finishedAppId)); Assert.assertEquals(1, node.getAppsToCleanup().size()); Assert.assertEquals(0, node.getRunningApps().size()); }