public MockNM registerNode(String nodeIdStr, int memory, int vCores) throws Exception { MockNM nm = new MockNM(nodeIdStr, memory, vCores, getResourceTrackerService()); nm.registerNode(); return nm; }
public NodeHeartbeatResponse nodeHeartbeat(Map<ApplicationId, List<ContainerStatus>> conts, boolean isHealthy, int resId) throws Exception { ArrayList<ContainerStatus> updatedStats = new ArrayList<ContainerStatus>(); for (List<ContainerStatus> stats : conts.values()) { updatedStats.addAll(stats); } return nodeHeartbeat(updatedStats, isHealthy, resId); }
@SuppressWarnings("unchecked") @Test(timeout = 10000) public void testDecommissioningNodeReconnect() throws Exception { MockRM rm = new MockRM(); rm.start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService()); nm1.registerNode(); rm.waitForState(nm1.getNodeId(), NodeState.RUNNING); rm.getRMContext().getDispatcher().getEventHandler().handle( new RMNodeEvent(nm1.getNodeId(), RMNodeEventType.GRACEFUL_DECOMMISSION)); rm.waitForState(nm1.getNodeId(), NodeState.DECOMMISSIONING); MockNM nm2 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService()); RegisterNodeManagerResponse response = nm2.registerNode(); // not SHUTDOWN Assert.assertTrue(response.getNodeAction().equals(NodeAction.NORMAL)); rm.stop(); }
rm1.start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService()); nm1.registerNode(); RMApp app1 = rm1.submitApp(200); MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); nm1.setResourceTrackerService(rm2.getResourceTrackerService()); nm1.registerNode(); ControlledClock clock = new ControlledClock(); long startTime = System.currentTimeMillis(); nm1.nodeHeartbeat(true); nm1.nodeHeartbeat(true);
MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService()); nm1.registerNode(); nm1.setResourceTrackerService(rm2.getResourceTrackerService()); ContainerState.COMPLETE); nm1.registerNode( Arrays.asList(amContainer, runningContainer, completedContainer), null); ((RMNodeImpl) rm2.getRMContext().getRMNodes().get(nm1.getNodeId())) .getLaunchedContainers(); assertTrue(launchedContainers.contains(amContainer.getContainerId())); SchedulerNode schedulerNode1 = scheduler.getSchedulerNode(nm1.getNodeId()); Resource nmResource = Resource.newInstance(nm1.getMemory(), nm1.getvCores());
/** * Test validateAndCreateResourceRequest fails on recovery, app should ignore * this Exception and continue */ @Test (timeout = 30000) public void testAppFailToValidateResourceRequestOnRecovery() throws Exception{ rm1 = new MockRM(conf); rm1.start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService()); nm1.registerNode(); RMApp app1 = rm1.submitApp(200); MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); // Change the config so that validateAndCreateResourceRequest throws // exception on recovery conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 50); conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 100); rm2 = new MockRM(conf, rm1.getRMStateStore()); nm1.setResourceTrackerService(rm2.getResourceTrackerService()); rm2.start(); }
nm1.nodeHeartbeat(true); nm2.nodeHeartbeat(false); rm.drainEvents(); checkUnhealthyNMCount(rm, nm2, true, 1); NodeHeartbeatResponse response = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); rm.drainEvents(); response = nm2.nodeHeartbeat(false); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); rm.drainEvents(); response = nm2.nodeHeartbeat(true); response = nm2.nodeHeartbeat(true); rm.drainEvents(); Assert.assertEquals(5120 + 5120, metrics.getAvailableMB()); response = nm1.nodeHeartbeat(true); rm.drainEvents(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); runningApps.add(ApplicationId.newInstance(1, 0)); nm1 = rm.registerNode("host2:5678", 15360, 2, runningApps); response = nm1.nodeHeartbeat(true); rm.drainEvents(); Assert.assertTrue(NodeAction.NORMAL.equals(response.getNodeAction())); nm1 = new MockNM("host1:1234", 5120, rm.getResourceTrackerService());
@Test (timeout = 60000) public void testAppCleanupWhenNMReconnects() throws Exception { conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1); // start RM MockRM rm1 = new MockRM(conf); rm1.start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService()); nm1.registerNode(); // create app and launch the AM RMApp app0 = rm1.submitApp(200); MockAM am0 = launchAM(app0, rm1, nm1); nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.COMPLETE); rm1.waitForState(app0.getApplicationId(), RMAppState.FAILED); // wait for application cleanup message received waitForAppCleanupMessageRecved(nm1, app0.getApplicationId()); // reconnect NM with application still active nm1.registerNode(Arrays.asList(app0.getApplicationId())); waitForAppCleanupMessageRecved(nm1, app0.getApplicationId()); rm1.stop(); }
nm1.nodeHeartbeat(true); RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); am1.registerAppAttempt(); SchedulerNodeReport report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId()); Assert.assertEquals(2 * GB, report_nm1.getUsedResource().getMemorySize()); nm2.nodeHeartbeat(true); RMAppAttempt attempt2 = app2.getCurrentAppAttempt(); MockAM am2 = rm.sendAMLaunched(attempt2.getAppAttemptId()); am2.registerAppAttempt(); SchedulerNodeReport report_nm2 = rm.getResourceScheduler().getNodeReport(nm2.getNodeId()); Assert.assertEquals(2 * GB, report_nm2.getUsedResource().getMemorySize()); nm1.nodeHeartbeat(true); while (alloc1Response.getAllocatedContainers().size() < 1) { LOG.info("Waiting for containers to be created for app 1..."); nm2.nodeHeartbeat(true); Assert.assertEquals(nm1.getNodeId(), allocated1.get(0).getNodeId()); Assert.assertEquals(nm1.getNodeId(), allocated2.get(0).getNodeId()); report_nm1 = rm.getResourceScheduler().getNodeReport(nm1.getNodeId()); report_nm2 = rm.getResourceScheduler().getNodeReport(nm2.getNodeId()); Assert.assertEquals(0, report_nm1.getAvailableResource().getMemorySize());
MockNM nm2 = rm1.registerNode("host2:1234", 8000); Resource expectedCapability = Resource.newInstance(nm1.getMemory(), nm1.getvCores()); String expectedVersion = nm1.getVersion(); Assert .assertEquals(0, NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert .assertTrue( NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue("The decommisioned metrics are not updated", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction()));
private static void printTags(Collection<MockNM> nodes, AllocationTagsManager atm){ for (MockNM nm : nodes) { Map<String, Long> nmTags = atm .getAllocationTagsWithCount(nm.getNodeId()); StringBuffer sb = new StringBuffer(); if (nmTags != null) { nmTags.forEach((tag, count) -> sb.append(tag + "(" + count + "),")); LOG.info("nm_" + nm.getNodeId() + ": " + sb.toString()); } } }
@Test public void testResponseIdOverflow() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); // prepare the responseId that's about to overflow RMNode node = rm.getRMContext().getRMNodes().get(nm1.getNodeId()); node.getLastNodeHeartBeatResponse().setResponseId(Integer.MAX_VALUE); nm1.setResponseId(Integer.MAX_VALUE); // heartbeat twice and check responseId nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); Assert.assertEquals(0, nodeHeartbeat.getResponseId()); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); Assert.assertEquals(1, nodeHeartbeat.getResponseId()); } }
@Test public void testUnhealthyNMUnregistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm .getResourceTrackerService(); MockNM nm1 = rm.registerNode("host1:1234", 5120); Assert.assertEquals(0, ClusterMetrics.getMetrics().getUnhealthyNMs()); // node healthy nm1.nodeHeartbeat(true); int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs(); // node unhealthy nm1.nodeHeartbeat(false); checkUnhealthyNMCount(rm, nm1, true, 1); UnRegisterNodeManagerRequest request = Records .newRecord(UnRegisterNodeManagerRequest.class); request.setNodeId(nm1.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, ++shutdownNMsCount); }
nm.nodeHeartbeat(true); } catch (Exception ex) { fail("Should not get any exceptions"); Assert.assertEquals("<memory:4096, vCores:4>", resourceAfter.toString()); Assert.assertEquals(4096, nm.getMemory()); Assert.assertEquals(4, nm.getvCores());
nm.registerNode(); } catch (Exception ex) { fail("Should not get any exceptions"); Assert.assertEquals("<memory:4096, vCores:4>", resourceAfter.toString()); Assert.assertEquals(4096, nm.getMemory()); Assert.assertEquals(4, nm.getvCores());
public RegisterNodeManagerResponse registerNode( List<ApplicationId> runningApplications) throws Exception { return registerNode(null, runningApplications); }
private void makeAMContainerExit(MockRM rm, ContainerId amContainer, MockNM node, int exitStatus) throws Exception, InterruptedException { ContainerStatus containerStatus = BuilderUtils.newContainerStatus(amContainer, ContainerState.COMPLETE, "", exitStatus, Resources.createResource(200)); node.containerStatus(containerStatus); ApplicationAttemptId amAttemptID = amContainer.getApplicationAttemptId(); rm.waitForState(amAttemptID, RMAppAttemptState.FAILED); rm.waitForState(amAttemptID.getApplicationId(), RMAppState.ACCEPTED); }
@Test (timeout = 60000) public void testContainerCleanupWhenRMRestartedAppNotRegistered() throws Exception { conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1); // start RM MockRM rm1 = new MockRM(conf); rm1.start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService()); nm1.registerNode(); // create app and launch the AM RMApp app0 = rm1.submitApp(200); MockAM am0 = launchAM(app0, rm1, nm1); nm1.nodeHeartbeat(am0.getApplicationAttemptId(), 1, ContainerState.RUNNING); rm1.waitForState(app0.getApplicationId(), RMAppState.RUNNING); // start new RM MockRM rm2 = new MockRM(conf, rm1.getRMStateStore()); rm2.start(); // nm1 register to rm2, and do a heartbeat nm1.setResourceTrackerService(rm2.getResourceTrackerService()); nm1.registerNode(Arrays.asList(app0.getApplicationId())); rm2.waitForState(app0.getApplicationId(), RMAppState.ACCEPTED); // Add unknown container for application unknown to scheduler NodeHeartbeatResponse response = nm1.nodeHeartbeat(am0 .getApplicationAttemptId(), 2, ContainerState.RUNNING); waitForContainerCleanup(rm2, nm1, response); rm1.stop(); rm2.stop(); }
rm1.start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService()); MockNM nm2 = new MockNM("127.1.1.1:4321", 8192, rm1.getResourceTrackerService()); nm1.registerNode(); nm2.registerNode(); RMApp app1_1 = rm1.submitApp(1024, "app1_1", USER_1, null, A); MockAM am1_1 = MockRM.launchAndRegisterAM(app1_1, rm1, nm1); nm1.setResourceTrackerService(rm2.getResourceTrackerService()); nm2.setResourceTrackerService(rm2.getResourceTrackerService()); createNMContainerStatusForApp(am1_2); am1_1Containers.addAll(am1_2Containers); nm1.registerNode(am1_1Containers, null); nm2.registerNode(am2Containers, null); Resource.newInstance(nm1.getMemory(), nm1.getvCores()); Resource clusterResource = Resources.multiply(nmResource, 2); Resource q1Resource = Resources.multiply(clusterResource, 0.5);
/** * Test validateAndCreateResourceRequest fails on recovery, app should ignore * this Exception and continue */ @Test (timeout = 30000) public void testAppFailToValidateResourceRequestOnRecovery() throws Exception{ MemoryRMStateStore memStore = new MemoryRMStateStore(); memStore.init(conf); rm1 = new MockRM(conf, memStore); rm1.start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService()); nm1.registerNode(); RMApp app1 = rm1.submitApp(200); MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); // Change the config so that validateAndCreateResourceRequest throws // exception on recovery conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 50); conf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 100); rm2 = new MockRM(conf, memStore); nm1.setResourceTrackerService(rm2.getResourceTrackerService()); rm2.start(); }