private void drainEventsImplicitly() { if (!disableDrainEventsImplicitly) { drainEvents(); } }
private void syncNodeHeartbeat(MockNM nm, boolean health) throws Exception { nm.nodeHeartbeat(health); rm.drainEvents(); }
protected void waitForContainerCleanup(MockRM rm, MockNM nm, NodeHeartbeatResponse resp) throws Exception { int waitCount = 0, cleanedConts = 0; List<ContainerId> contsToClean; do { rm.drainEvents(); contsToClean = resp.getContainersToCleanup(); cleanedConts += contsToClean.size(); if (cleanedConts >= 1) { break; } Thread.sleep(100); resp = nm.nodeHeartbeat(true); } while(waitCount++ < 200); if (contsToClean.isEmpty()) { LOG.error("Failed to get any containers to cleanup"); } else { LOG.info("Got cleanup for " + contsToClean.get(0)); } Assert.assertEquals(1, cleanedConts); }
private void syncNodeGracefulDecommission( MockNM nm, int timeout) throws Exception { rm.sendNodeGracefulDecommission(nm, timeout); rm.waitForState(nm.getNodeId(), NodeState.DECOMMISSIONING); rm.drainEvents(); }
private void syncNodeLost(MockNM nm) throws Exception { rm.sendNodeStarted(nm); rm.waitForState(nm.getNodeId(), NodeState.RUNNING); rm.sendNodeLost(nm); rm.drainEvents(); }
private void registerNode(String host, int memory, int vCores) throws Exception { try { resourceManager.registerNode(host, memory, vCores); int attempts = 10; Collection<Plan> plans; do { resourceManager.drainEvents(); LOG.info("Waiting for node capacity to be added to plan"); plans = resourceManager.getRMContext().getReservationSystem() .getAllPlans().values(); if (checkCapacity(plans)) { break; } Thread.sleep(100); } while (attempts-- > 0); if (attempts <= 0) { Assert.fail("Exhausted attempts in checking if node capacity was " + "added to the plan"); } } catch (Exception e) { e.printStackTrace(); Assert.fail(e.getMessage()); } }
private void waitforContainerCompletion(MockRM rm, MockNM nm, ContainerId amContainerId, RMContainer container) throws Exception { ContainerId containerId = container.getContainerId(); if (null != rm.scheduler.getRMContainer(containerId)) { if (containerId.equals(amContainerId)) { rm.waitForState(nm, containerId, RMContainerState.COMPLETED); } else { rm.waitForState(nm, containerId, RMContainerState.KILLED); } } else { rm.drainEvents(); } }
@Test(timeout = 10000) public void testAutoCreateLeafQueueFailsWithNoQueueMapping() throws Exception { final String INVALID_USER = "invalid_user"; // submit an app under a different queue name which does not exist // and queue mapping does not exist for this user RMApp app = mockRM.submitApp(GB, "app", INVALID_USER, null, INVALID_USER, false); mockRM.drainEvents(); mockRM.waitForState(app.getApplicationId(), RMAppState.FAILED); assertEquals(RMAppState.FAILED, app.getState()); }
private NodeManager registerNode(String hostName, int containerManagerPort, int httpPort, String rackName, Resource capability) throws IOException, YarnException { NodeManager nm = new NodeManager(hostName, containerManagerPort, httpPort, rackName, capability, resourceManager); // after YARN-5375, scheduler event is processed in rm main dispatcher, // wait it processed, or may lead dead lock if (resourceManager instanceof MockRM) { ((MockRM) resourceManager).drainEvents(); } NodeAddedSchedulerEvent nodeAddEvent1 = new NodeAddedSchedulerEvent(resourceManager.getRMContext().getRMNodes() .get(nm.getNodeId())); resourceManager.getResourceScheduler().handle(nodeAddEvent1); return nm; }
private void addNodeCapacityToPlan(MockRM rm, int memory, int vCores) { try { rm.registerNode("127.0.0.1:1", memory, vCores); int attempts = 10; do { rm1.drainEvents(); rm.getRMContext().getReservationSystem() .synchronizePlan(ReservationSystemTestUtil.reservationQ, false); if (rm.getRMContext().getReservationSystem() .getPlan(ReservationSystemTestUtil.reservationQ).getTotalCapacity() .getMemorySize() > 0) { break; } LOG.info("Waiting for node capacity to be added to plan"); Thread.sleep(100); } while (attempts-- > 0); if (attempts <= 0) { Assert.fail("Exhausted attempts in checking if node capacity was " + "added to the plan"); } } catch (Exception e) { Assert.fail(e.getMessage()); } }
hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); rm.drainEvents(); Assert.assertEquals("The decommissioned nodes metric should be 1 ", 1, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); rm1.start(); rm1.getNodesListManager().refreshNodes(conf); rm1.drainEvents(); Assert.assertEquals("The decommissioned nodes metric should be 2 ", 2, ClusterMetrics.getMetrics().getNumDecommisionedNMs());
@Test(timeout = 30000) public void testInitDecommMetricNoRegistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); nm1.nodeHeartbeat(true); nm2.nodeHeartbeat(true); //host3 will not register or heartbeat writeToHostsFile(excludeHostFile, "host3", "host2"); conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, excludeHostFile.getAbsolutePath()); writeToHostsFile(hostFile, "host1", "host2"); conf.set(YarnConfiguration.RM_NODES_INCLUDE_FILE_PATH, hostFile.getAbsolutePath()); rm.getNodesListManager().refreshNodes(conf); rm.drainEvents(); Assert.assertEquals("The decommissioned nodes metric should be 1 ", 1, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); rm.stop(); MockRM rm1 = new MockRM(conf); rm1.start(); rm1.getNodesListManager().refreshNodes(conf); rm1.drainEvents(); Assert.assertEquals("The decommissioned nodes metric should be 2 ", 2, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); rm1.stop(); }
writeToHostsFile(excludeHostFile, "host1"); rm.getNodesListManager().refreshNodesGracefully(conf, null); rm.drainEvents(); nm1.nodeHeartbeat(true); rm.drainEvents(); Assert.assertTrue("Node " + nm1.getNodeId().getHost() + " should be Decommissioned", rm.getRMContext() writeToHostsFile(excludeHostFile, ""); rm.getNodesListManager().refreshNodesGracefully(conf, null); rm.drainEvents(); Assert.assertTrue("Node " + nm1.getNodeId().getHost() + " should be Decommissioned", rm.getRMContext()
@Test(timeout = 10000) public void testRMNodeStatusAfterReconnect() throws Exception { // The node(127.0.0.1:1234) reconnected with RM. When it registered with // RM, RM set its lastNodeHeartbeatResponse's id to 0 asynchronously. But // the node's heartbeat come before RM succeeded setting the id to 0. MockRM rm = new MockRM(); rm.start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService()); nm1.registerNode(); int i = 0; while(i < 3) { nm1.nodeHeartbeat(true); rm.drainEvents(); i++; } MockNM nm2 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService()); nm2.registerNode(); RMNode rmNode = rm.getRMContext().getRMNodes().get(nm2.getNodeId()); nm2.nodeHeartbeat(true); rm.drainEvents(); Assert.assertEquals("Node is Not in Running state.", NodeState.RUNNING, rmNode.getState()); rm.stop(); } }
private void killAppAndVerifyOrderingPolicy(MockRM rm, CSQueue defaultQueue, int appsPendingExpected, int activeAppsExpected, RMApp app) throws YarnException { CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); UserGroupInformation ugi = UserGroupInformation .createRemoteUser(app.getUser()); cs.updateApplicationPriority(Priority.newInstance(2), app.getApplicationId(), null, ugi); SchedulerEvent removeAttempt; removeAttempt = new AppAttemptRemovedSchedulerEvent( app.getCurrentAppAttempt().getAppAttemptId(), RMAppAttemptState.KILLED, false); cs.handle(removeAttempt); rm.drainEvents(); Collection<FiCaSchedulerApp> appsPending = ((LeafQueue) defaultQueue).getPendingApplications(); Collection<FiCaSchedulerApp> activeApps = ((LeafQueue) defaultQueue).getApplications(); Assert.assertEquals("Pending apps should be " + appsPendingExpected, appsPendingExpected, appsPending.size()); Assert.assertEquals("Active apps should be " + activeAppsExpected, activeAppsExpected, activeApps.size()); }
@Test public void testAMRMDecommissioningNodes() throws Exception { MockNM nm1 = rm.registerNode("127.0.0.1:1234", 10000); MockNM nm2 = rm.registerNode("127.0.0.2:1234", 10000); rm.drainEvents(); RMApp app1 = rm.submitApp(2000); // Trigger the scheduling so the AM gets 'launched' on nm1 nm1.nodeHeartbeat(true); RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); // register AM returns no unusable node am1.registerAppAttempt(); Integer decommissioningTimeout = 600; syncNodeGracefulDecommission(nm2, decommissioningTimeout); AllocateRequest allocateRequest1 = AllocateRequest.newInstance(0, 0F, null, null, null); AllocateResponse response1 = allocate(attempt1.getAppAttemptId(), allocateRequest1); List<NodeReport> updatedNodes = response1.getUpdatedNodes(); Assert.assertEquals(1, updatedNodes.size()); NodeReport nr = updatedNodes.iterator().next(); Assert.assertEquals( decommissioningTimeout, nr.getDecommissioningTimeout()); Assert.assertEquals( NodeUpdateType.NODE_DECOMMISSIONING, nr.getNodeUpdateType()); }
rm.start(); RMApp app = rm.submitApp(100); rm.drainEvents();
@Test(timeout=1200000) public void testAllocateAfterUnregister() throws Exception { MockRM rm = new MockRM(conf); rm.start(); // Register node1 MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB); // Submit an application RMApp app1 = rm.submitApp(2048); nm1.nodeHeartbeat(true); RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); am1.registerAppAttempt(); // unregister app attempt FinishApplicationMasterRequest req = FinishApplicationMasterRequest.newInstance( FinalApplicationStatus.KILLED, "", ""); am1.unregisterAppAttempt(req, false); // request container after unregister am1.addRequests(new String[] { "127.0.0.1" }, GB, 1, 1); AllocateResponse alloc1Response = am1.schedule(); nm1.nodeHeartbeat(true); rm.drainEvents(); alloc1Response = am1.schedule(); Assert.assertEquals(0, alloc1Response.getAllocatedContainers().size()); }
rm.start(); RMApp app = rm.submitApp(100); rm.drainEvents();