public MasterWalManager(Configuration conf, FileSystem fs, Path rootDir, MasterServices services) throws IOException { this.fs = fs; this.conf = conf; this.rootDir = rootDir; this.services = services; this.splitLogManager = new SplitLogManager(services, conf); this.oldLogDir = new Path(rootDir, HConstants.HREGION_OLDLOGDIR_NAME); }
TaskBatch batch = null; long startTime = 0; FileStatus[] logfiles = getFileList(logDirs, filter); if (logfiles.length != 0) { status.setStatus("Checking directory contents..."); if (!enqueueSplitTask(pathToLog, batch)) { throw new IOException("duplicate log split scheduled for " + lf.getPath()); waitForSplittingCompletion(batch, status);
resubmitted++; } else { handleDeadWorker(cur_worker); LOG.warn("Failed to resubmit task " + path + " owned by dead " + cur_worker + ", will retry.");
/** * Add a task entry to coordination if it is not already there. * @param taskname the path of the log to be split * @param batch the batch this task belongs to * @return true if a new entry is created, false if it is already there. */ boolean enqueueSplitTask(String taskname, TaskBatch batch) { lastTaskCreateTime = EnvironmentEdgeManager.currentTime(); String task = getSplitLogManagerCoordination().prepareTask(taskname); Task oldtask = createTaskIfAbsent(task, batch); if (oldtask == null) { // publish the task in the coordination engine getSplitLogManagerCoordination().submitTask(task); return true; } return false; }
+ batch.installed + " done=" + batch.done + " error=" + batch.error); int remaining = batch.installed - (batch.done + batch.error); int actual = activeTasks(batch); if (remaining != actual) { LOG.warn("Expected " + remaining + " active tasks, but actually there are " + actual); int remainingTasks = getSplitLogManagerCoordination().remainingTasksInCoordination(); if (remainingTasks >= 0 && actual > remainingTasks) { LOG.warn("Expected at least" + actual + " tasks remaining, but actually there are "
/** * This method is the base split method that splits WAL files matching a filter. Callers should * pass the appropriate filter for meta and non-meta WALs. * @param serverNames logs belonging to these servers will be split; this will rename the log * directory out from under a soft-failed server */ public void splitLog(final Set<ServerName> serverNames, PathFilter filter) throws IOException { long splitTime = 0, splitLogSize = 0; List<Path> logDirs = getLogDirs(serverNames); splitLogManager.handleDeadWorkers(serverNames); splitTime = EnvironmentEdgeManager.currentTime(); splitLogSize = splitLogManager.splitLogDistributed(serverNames, logDirs, filter); splitTime = EnvironmentEdgeManager.currentTime() - splitTime; if (this.metricsMasterFilesystem != null) { if (filter == META_FILTER) { this.metricsMasterFilesystem.addMetaWALSplit(splitTime, splitLogSize); } else { this.metricsMasterFilesystem.addSplit(splitTime, splitLogSize); } } }
@Test public void testEmptyLogDir() throws Exception { LOG.info("testEmptyLogDir"); slm = new SplitLogManager(master, conf); FileSystem fs = TEST_UTIL.getTestFileSystem(); Path emptyLogDirPath = new Path(new Path(fs.getWorkingDirectory(), HConstants.HREGION_LOGDIR_NAME), ServerName.valueOf("emptyLogDir", 1, 1).toString()); fs.mkdirs(emptyLogDirPath); slm.splitLogDistributed(emptyLogDirPath); assertFalse(fs.exists(emptyLogDirPath)); }
@Test public void testDeadWorker() throws Exception { LOG.info("testDeadWorker"); conf.setLong("hbase.splitlog.max.resubmit", 0); slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); int version = ZKUtil.checkExists(zkw, tasknode); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); if (tot_mgr_heartbeat.sum() == 0) waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); slm.handleDeadWorker(worker1); if (tot_mgr_resubmit.sum() == 0) waitForCounter(tot_mgr_resubmit, 0, 1, to+to/2); if (tot_mgr_resubmit_dead_server_task.sum() == 0) { waitForCounter(tot_mgr_resubmit_dead_server_task, 0, 1, to + to/2); } int version1 = ZKUtil.checkExists(zkw, tasknode); assertTrue(version1 > version); byte[] taskstate = ZKUtil.getData(zkw, tasknode); slt = SplitLogTask.parseFrom(taskstate); assertTrue(slt.isUnassigned(master.getServerName())); return; }
MonitoredTask status = TaskMonitor.get().createStatus("Doing distributed log split in " + logDirs + " for serverName=" + serverNames); FileStatus[] logfiles = getFileList(logDirs, filter); status.setStatus("Checking directory contents..."); SplitLogCounters.tot_mgr_log_split_batch_start.incrementAndGet(); if (!enqueueSplitTask(pathToLog, batch)) { throw new IOException("duplicate log split scheduled for " + lf.getPath()); waitForSplittingCompletion(batch, status); removeRecoveringRegions(serverNames, isMetaRecovery);
/** * @param logDir one region sever wal dir path in .logs * @throws IOException if there was an error while splitting any log file * @return cumulative size of the logfiles split * @throws IOException */ public long splitLogDistributed(final Path logDir) throws IOException { List<Path> logDirs = new ArrayList<>(); logDirs.add(logDir); return splitLogDistributed(logDirs); }
public void stop() { if (splitLogManager != null) { splitLogManager.stop(); } }
private String submitTaskAndWait(TaskBatch batch, String name) throws KeeperException, InterruptedException { String tasknode = ZKSplitLog.getEncodedNodeName(zkw, name); NodeCreationListener listener = new NodeCreationListener(zkw, tasknode); zkw.registerListener(listener); ZKUtil.watchAndCheckExists(zkw, tasknode); slm.enqueueSplitTask(name, batch); assertEquals(1, batch.installed); assertTrue(findOrCreateOrphanTask(tasknode).batch == batch); assertEquals(1L, tot_mgr_node_create_queued.sum()); LOG.debug("waiting for task node creation"); listener.waitForCreation(); LOG.debug("task created"); return tasknode; }
private FileStatus[] getFileList(List<Path> logDirs, PathFilter filter) throws IOException { return getFileList(conf, logDirs, filter); }
conf.getBoolean("hbase.master.distributed.log.splitting", true); if (this.distributedLogSplitting) { this.splitLogManager = new SplitLogManager(master.getZooKeeper(), master.getConfiguration(), master, master.getServerName().toString()); this.splitLogManager.finishInitialization(masterRecovery); } else { this.splitLogManager = null;
public MasterFileSystem(Server master, MasterServices services) throws IOException { this.conf = master.getConfiguration(); this.master = master; this.services = services; // Set filesystem to be that of this.rootdir else we get complaints about // mismatched filesystems if hbase.rootdir is hdfs and fs.defaultFS is // default localfs. Presumption is that rootdir is fully-qualified before // we get to here with appropriate fs scheme. this.rootdir = FSUtils.getRootDir(conf); this.tempdir = new Path(this.rootdir, HConstants.HBASE_TEMP_DIRECTORY); // Cover both bases, the old way of setting default fs and the new. // We're supposed to run on 0.20 and 0.21 anyways. this.fs = this.rootdir.getFileSystem(conf); FSUtils.setFsDefault(conf, new Path(this.fs.getUri())); // make sure the fs has the same conf fs.setConf(conf); // setup the filesystem variable // set up the archived logs path this.oldLogDir = createInitialFileSystemLayout(); HFileSystem.addLocationsOrderInterceptor(conf); this.splitLogManager = new SplitLogManager(master, master.getConfiguration(), master, services, master.getServerName()); this.distributedLogReplay = this.splitLogManager.isLogReplaying(); }
/** * Add a task entry to splitlog znode if it is not already there. * * @param taskname the path of the log to be split * @param batch the batch this task belongs to * @return true if a new entry is created, false if it is already there. */ boolean enqueueSplitTask(String taskname, TaskBatch batch) { tot_mgr_log_split_start.incrementAndGet(); String path = ZKSplitLog.getEncodedNodeName(watcher, taskname); Task oldtask = createTaskIfAbsent(path, batch); if (oldtask == null) { // publish the task in zk createNode(path, zkretries); return true; } return false; }
+ " error=" + batch.error); int remaining = batch.installed - (batch.done + batch.error); int actual = activeTasks(batch); if (remaining != actual) { LOG.warn("Expected " + remaining + " active tasks, but actually there are " + actual); int remainingInZK = remainingTasksInZK(); if (remainingInZK >= 0 && actual > remainingInZK) { LOG.warn("Expected at least" + actual
+ batch.installed + " done=" + batch.done + " error=" + batch.error); int remaining = batch.installed - (batch.done + batch.error); int actual = activeTasks(batch); if (remaining != actual) { LOG.warn("Expected " + remaining + " active tasks, but actually there are " + actual);
/** * Add a task entry to coordination if it is not already there. * @param taskname the path of the log to be split * @param batch the batch this task belongs to * @return true if a new entry is created, false if it is already there. */ boolean enqueueSplitTask(String taskname, TaskBatch batch) { lastTaskCreateTime = EnvironmentEdgeManager.currentTime(); String task = ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().prepareTask(taskname); Task oldtask = createTaskIfAbsent(task, batch); if (oldtask == null) { // publish the task in the coordination engine ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().submitTask(task); return true; } return false; }
@Test public void testLogFilesAreArchived() throws Exception { LOG.info("testLogFilesAreArchived"); slm = new SplitLogManager(master, conf); FileSystem fs = TEST_UTIL.getTestFileSystem(); Path dir = TEST_UTIL.getDataTestDirOnTestFS("testLogFilesAreArchived"); slm.splitLogDistributed(logDirPath);