" for " + serverNames); startTime = EnvironmentEdgeManager.currentTime(); batch = new TaskBatch(); for (FileStatus lf : logfiles) {
long t = EnvironmentEdgeManager.currentTime(); long totalSize = 0; TaskBatch batch = new TaskBatch(); Boolean isMetaRecovery = (filter == null) ? null : false; for (FileStatus lf : logfiles) {
long t = EnvironmentEdgeManager.currentTimeMillis(); long totalSize = 0; TaskBatch batch = new TaskBatch(); for (FileStatus lf : logfiles) {
TaskBatch batch = new TaskBatch(); slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
@Test public void testMultipleResubmits() throws Exception { LOG.info("TestMultipleResbmits - no indefinite resubmissions"); conf.setInt("hbase.splitlog.max.resubmit", 2); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); int version = ZKUtil.checkExists(zkw, tasknode); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); final ServerName worker2 = ServerName.valueOf("worker2,1,1"); final ServerName worker3 = ServerName.valueOf("worker3,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); waitForCounter(tot_mgr_resubmit, 0, 1, to + to/2); int version1 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version1 > version); slt = new SplitLogTask.Owned(worker2); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); waitForCounter(tot_mgr_heartbeat, 1, 2, to/2); waitForCounter(tot_mgr_resubmit, 1, 2, to + to/2); int version2 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version2 > version1); slt = new SplitLogTask.Owned(worker3); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); waitForCounter(tot_mgr_heartbeat, 2, 3, to/2); waitForCounter(tot_mgr_resubmit_threshold_reached, 0, 1, to + to/2); Thread.sleep(to + to/2); assertEquals(2L, tot_mgr_resubmit.sum() - tot_mgr_resubmit_force.sum()); }
@Test public void testUnassignedTimeout() throws Exception { LOG.info("TestUnassignedTimeout - iff all tasks are unassigned then" + " resubmit"); // create an orphan task in OWNED state String tasknode1 = ZKSplitLog.getEncodedNodeName(zkw, "orphan/1"); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); zkw.getRecoverableZooKeeper().create(tasknode1, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); slm = new SplitLogManager(master, conf); waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, to/2); // submit another task which will stay in unassigned mode TaskBatch batch = new TaskBatch(); submitTaskAndWait(batch, "foo/1"); // keep updating the orphan owned node every to/2 seconds for (int i = 0; i < (3 * to)/100; i++) { Thread.sleep(100); final ServerName worker2 = ServerName.valueOf("worker1,1,1"); slt = new SplitLogTask.Owned(worker2); ZKUtil.setData(zkw, tasknode1, slt.toByteArray()); } // since we have stopped heartbeating the owned node therefore it should // get resubmitted LOG.info("waiting for manager to resubmit the orphan task"); waitForCounter(tot_mgr_resubmit, 0, 1, to + to/2); // now all the nodes are unassigned. manager should post another rescan waitForCounter(tot_mgr_resubmit_unassigned, 0, 1, 2 * to + to/2); }
@Test public void testTaskResigned() throws Exception { LOG.info("TestTaskResigned - resubmit task node once in RESIGNED state"); assertEquals(0, tot_mgr_resubmit.sum()); slm = new SplitLogManager(master, conf); assertEquals(0, tot_mgr_resubmit.sum()); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); assertEquals(0, tot_mgr_resubmit.sum()); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); assertEquals(0, tot_mgr_resubmit.sum()); SplitLogTask slt = new SplitLogTask.Resigned(worker1); assertEquals(0, tot_mgr_resubmit.sum()); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); ZKUtil.checkExists(zkw, tasknode); // Could be small race here. if (tot_mgr_resubmit.sum() == 0) { waitForCounter(tot_mgr_resubmit, 0, 1, to/2); } assertEquals(1, tot_mgr_resubmit.sum()); byte[] taskstate = ZKUtil.getData(zkw, tasknode); slt = SplitLogTask.parseFrom(taskstate); assertTrue(slt.isUnassigned(master.getServerName())); }
@Test public void testDeadWorker() throws Exception { LOG.info("testDeadWorker"); conf.setLong("hbase.splitlog.max.resubmit", 0); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); int version = ZKUtil.checkExists(zkw, tasknode); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); if (tot_mgr_heartbeat.sum() == 0) waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); slm.handleDeadWorker(worker1); if (tot_mgr_resubmit.sum() == 0) waitForCounter(tot_mgr_resubmit, 0, 1, to+to/2); if (tot_mgr_resubmit_dead_server_task.sum() == 0) { waitForCounter(tot_mgr_resubmit_dead_server_task, 0, 1, to + to/2); } int version1 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version1 > version); byte[] taskstate = ZKUtil.getData(zkw, tasknode); slt = SplitLogTask.parseFrom(taskstate); assertTrue(slt.isUnassigned(master.getServerName())); return; }
@Test public void testRescanCleanup() throws Exception { LOG.info("TestRescanCleanup - ensure RESCAN nodes are cleaned up"); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); int version = ZKUtil.checkExists(zkw, tasknode); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); waitForCounter(new Expr() { @Override public long eval() { return (tot_mgr_resubmit.sum() + tot_mgr_resubmit_failed.sum()); } }, 0, 1, 5*60000); // wait long enough Assert.assertEquals("Could not run test. Lost ZK connection?", 0, tot_mgr_resubmit_failed.sum()); int version1 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version1 > version); byte[] taskstate = ZKUtil.getData(zkw, tasknode); slt = SplitLogTask.parseFrom(taskstate); assertTrue(slt.isUnassigned(master.getServerName())); waitForCounter(tot_mgr_rescan_deleted, 0, 1, to/2); }
@Test public void testTaskErr() throws Exception { LOG.info("TestTaskErr - cleanup task node once in ERR state"); conf.setInt("hbase.splitlog.max.resubmit", 0); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Err(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); synchronized (batch) { while (batch.installed != batch.error) { batch.wait(); } } waitForCounter(tot_mgr_task_deleted, 0, 1, to/2); assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1); conf.setInt("hbase.splitlog.max.resubmit", ZKSplitLogManagerCoordination.DEFAULT_MAX_RESUBMIT); }
@Test public void testWorkerCrash() throws Exception { slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); if (tot_mgr_heartbeat.sum() == 0) waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); // Not yet resubmitted. Assert.assertEquals(0, tot_mgr_resubmit.sum()); // This server becomes dead Mockito.when(sm.isServerOnline(worker1)).thenReturn(false); Thread.sleep(1300); // The timeout checker is done every 1000 ms (hardcoded). // It has been resubmitted Assert.assertEquals(1, tot_mgr_resubmit.sum()); }
@Test public void testTaskDone() throws Exception { LOG.info("TestTaskDone - cleanup task node once in DONE state"); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Done(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); synchronized (batch) { while (batch.installed != batch.done) { batch.wait(); } } waitForCounter(tot_mgr_task_deleted, 0, 1, to/2); assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1); }
/** * Test whether the splitlog correctly creates a task in zookeeper * @throws Exception */ @Test public void testTaskCreation() throws Exception { LOG.info("TestTaskCreation - test the creation of a task in zk"); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); byte[] data = ZKUtil.getData(zkw, tasknode); SplitLogTask slt = SplitLogTask.parseFrom(data); LOG.info("Task node created " + slt.toString()); assertTrue(slt.isUnassigned(master.getServerName())); }
TaskBatch batch = new TaskBatch(); slm.enqueueSplitTask(logfiles[0].getPath().toString(), batch);
@Test public void testMultipleResubmits() throws Exception { LOG.info("TestMultipleResbmits - no indefinite resubmissions"); conf.setInt("hbase.splitlog.max.resubmit", 2); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); int version = ZKUtil.checkExists(zkw, tasknode); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); final ServerName worker2 = ServerName.valueOf("worker2,1,1"); final ServerName worker3 = ServerName.valueOf("worker3,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); waitForCounter(tot_mgr_resubmit, 0, 1, to + to/2); int version1 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version1 > version); slt = new SplitLogTask.Owned(worker2); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); waitForCounter(tot_mgr_heartbeat, 1, 2, to/2); waitForCounter(tot_mgr_resubmit, 1, 2, to + to/2); int version2 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version2 > version1); slt = new SplitLogTask.Owned(worker3); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); waitForCounter(tot_mgr_heartbeat, 2, 3, to/2); waitForCounter(tot_mgr_resubmit_threshold_reached, 0, 1, to + to/2); Thread.sleep(to + to/2); assertEquals(2L, tot_mgr_resubmit.sum() - tot_mgr_resubmit_force.sum()); }
@Test public void testUnassignedTimeout() throws Exception { LOG.info("TestUnassignedTimeout - iff all tasks are unassigned then" + " resubmit"); // create an orphan task in OWNED state String tasknode1 = ZKSplitLog.getEncodedNodeName(zkw, "orphan/1"); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); zkw.getRecoverableZooKeeper().create(tasknode1, slt.toByteArray(), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT); slm = new SplitLogManager(master, conf); waitForCounter(tot_mgr_orphan_task_acquired, 0, 1, to/2); // submit another task which will stay in unassigned mode TaskBatch batch = new TaskBatch(); submitTaskAndWait(batch, "foo/1"); // keep updating the orphan owned node every to/2 seconds for (int i = 0; i < (3 * to)/100; i++) { Thread.sleep(100); final ServerName worker2 = ServerName.valueOf("worker1,1,1"); slt = new SplitLogTask.Owned(worker2); ZKUtil.setData(zkw, tasknode1, slt.toByteArray()); } // since we have stopped heartbeating the owned node therefore it should // get resubmitted LOG.info("waiting for manager to resubmit the orphan task"); waitForCounter(tot_mgr_resubmit, 0, 1, to + to/2); // now all the nodes are unassigned. manager should post another rescan waitForCounter(tot_mgr_resubmit_unassigned, 0, 1, 2 * to + to/2); }
@Test public void testRescanCleanup() throws Exception { LOG.info("TestRescanCleanup - ensure RESCAN nodes are cleaned up"); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); int version = ZKUtil.checkExists(zkw, tasknode); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); waitForCounter(new Expr() { @Override public long eval() { return (tot_mgr_resubmit.sum() + tot_mgr_resubmit_failed.sum()); } }, 0, 1, 5*60000); // wait long enough Assert.assertEquals("Could not run test. Lost ZK connection?", 0, tot_mgr_resubmit_failed.sum()); int version1 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version1 > version); byte[] taskstate = ZKUtil.getData(zkw, tasknode); slt = SplitLogTask.parseFrom(taskstate); assertTrue(slt.isUnassigned(master.getServerName())); waitForCounter(tot_mgr_rescan_deleted, 0, 1, to/2); }
@Test public void testTaskResigned() throws Exception { LOG.info("TestTaskResigned - resubmit task node once in RESIGNED state"); assertEquals(0, tot_mgr_resubmit.sum()); slm = new SplitLogManager(master, conf); assertEquals(0, tot_mgr_resubmit.sum()); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); assertEquals(0, tot_mgr_resubmit.sum()); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); assertEquals(0, tot_mgr_resubmit.sum()); SplitLogTask slt = new SplitLogTask.Resigned(worker1); assertEquals(0, tot_mgr_resubmit.sum()); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); ZKUtil.checkExists(zkw, tasknode); // Could be small race here. if (tot_mgr_resubmit.sum() == 0) { waitForCounter(tot_mgr_resubmit, 0, 1, to/2); } assertEquals(1, tot_mgr_resubmit.sum()); byte[] taskstate = ZKUtil.getData(zkw, tasknode); slt = SplitLogTask.parseFrom(taskstate); assertTrue(slt.isUnassigned(master.getServerName())); }
@Test public void testDeadWorker() throws Exception { LOG.info("testDeadWorker"); conf.setLong("hbase.splitlog.max.resubmit", 0); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); int version = ZKUtil.checkExists(zkw, tasknode); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); if (tot_mgr_heartbeat.sum() == 0) waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); slm.handleDeadWorker(worker1); if (tot_mgr_resubmit.sum() == 0) waitForCounter(tot_mgr_resubmit, 0, 1, to+to/2); if (tot_mgr_resubmit_dead_server_task.sum() == 0) { waitForCounter(tot_mgr_resubmit_dead_server_task, 0, 1, to + to/2); } int version1 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version1 > version); byte[] taskstate = ZKUtil.getData(zkw, tasknode); slt = SplitLogTask.parseFrom(taskstate); assertTrue(slt.isUnassigned(master.getServerName())); return; }
@Test public void testTaskErr() throws Exception { LOG.info("TestTaskErr - cleanup task node once in ERR state"); conf.setInt("hbase.splitlog.max.resubmit", 0); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Err(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); synchronized (batch) { while (batch.installed != batch.error) { batch.wait(); } } waitForCounter(tot_mgr_task_deleted, 0, 1, to/2); assertTrue(ZKUtil.checkExists(zkw, tasknode) == -1); conf.setInt("hbase.splitlog.max.resubmit", ZKSplitLogManagerCoordination.DEFAULT_MAX_RESUBMIT); }