public synchronized boolean removeServerFromDrainList(final ServerName sn) { // Warn if the server (sn) is not online. ServerName is of the form: // <hostname> , <port> , <startcode> if (!this.isServerOnline(sn)) { LOG.warn("Server " + sn + " is not currently online. " + "Removing from draining list anyway, as requested."); } // Remove the server from the draining servers lists. return this.drainingServers.remove(sn); }
/** * Add the server to the drain list. * @param sn * @return True if the server is added or the server is already on the drain list. */ public synchronized boolean addServerToDrainList(final ServerName sn) { // Warn if the server (sn) is not online. ServerName is of the form: // <hostname> , <port> , <startcode> if (!this.isServerOnline(sn)) { LOG.warn("Server " + sn + " is not currently online. " + "Ignoring request to add it to draining list."); return false; } // Add the server to the draining servers lists, if it's not already in // it. if (this.drainingServers.contains(sn)) { LOG.warn("Server " + sn + " is already in the draining server list." + "Ignoring request to add it again."); return true; } LOG.info("Server " + sn + " added to draining server list."); return this.drainingServers.add(sn); }
&& master.getServerManager().isServerOnline(serverName)); if (retry) {
@Override protected void remoteDispatch(final ServerName serverName, final Set<RemoteProcedure> remoteProcedures) { final int rsVersion = master.getServerManager().getVersionNumber(serverName); if (rsVersion >= RS_VERSION_WITH_EXEC_PROCS) { LOG.trace("Using procedure batch rpc execution for serverName={} version={}", serverName, rsVersion); submitTask(new ExecuteProceduresRemoteCall(serverName, remoteProcedures)); } else if (rsVersion == 0 && !master.getServerManager().isServerOnline(serverName)) { submitTask(new DeadRSRemoteCall(serverName, remoteProcedures)); } else { LOG.info(String.format( "Fallback to compat rpc execution for serverName=%s version=%s", serverName, rsVersion)); submitTask(new CompatRemoteProcedureResolver(serverName, remoteProcedures)); } }
final boolean alive = details.getMaster().getServerManager() != null ? details.getMaster().getServerManager() .isServerOnline(task.cur_worker_name) : true; if (alive && time < timeout) { LOG.trace("Skipping the resubmit of " + task.toString() + " because the server "
/** * @return True if region is online and scannable else false if an error or shutdown (Otherwise * we just block in here holding up all forward-progess). */ private boolean isRegionOnline(RegionInfo ri) throws InterruptedException { RetryCounter rc = null; while (!isStopped()) { RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri); if (rs.isOpened()) { if (this.getServerManager().isServerOnline(rs.getServerName())) { return true; } } // Region is not OPEN. Optional<Procedure<MasterProcedureEnv>> optProc = this.procedureExecutor.getProcedures(). stream().filter(p -> p instanceof ServerCrashProcedure).findAny(); // TODO: Add a page to refguide on how to do repair. Have this log message point to it. // Page will talk about loss of edits, how to schedule at least the meta WAL recovery, and // then how to assign including how to break region lock if one held. LOG.warn("{} is NOT online; state={}; ServerCrashProcedures={}. Master startup cannot " + "progress, in holding-pattern until region onlined.", ri.getRegionNameAsString(), rs, optProc.isPresent()); // Check once-a-minute. if (rc == null) { rc = new RetryCounterFactory(1000).create(); } Threads.sleep(rc.getBackoffTimeAndIncrementAttempts()); } return false; }
return; if (!this.serverManager.isServerOnline(server)) { return;
@Before public void setup() throws Exception { TEST_UTIL = new HBaseTestingUtility(); TEST_UTIL.startMiniZKCluster(); conf = TEST_UTIL.getConfiguration(); // Use a different ZK wrapper instance for each tests. zkw = new ZKWatcher(conf, "split-log-manager-tests" + TEST_UTIL.getRandomUUID().toString(), null); master = new DummyMasterServices(zkw, conf); ZKUtil.deleteChildrenRecursively(zkw, zkw.getZNodePaths().baseZNode); ZKUtil.createAndFailSilent(zkw, zkw.getZNodePaths().baseZNode); assertTrue(ZKUtil.checkExists(zkw, zkw.getZNodePaths().baseZNode) != -1); LOG.debug(zkw.getZNodePaths().baseZNode + " created"); ZKUtil.createAndFailSilent(zkw, zkw.getZNodePaths().splitLogZNode); assertTrue(ZKUtil.checkExists(zkw, zkw.getZNodePaths().splitLogZNode) != -1); LOG.debug(zkw.getZNodePaths().splitLogZNode + " created"); resetCounters(); // By default, we let the test manage the error as before, so the server // does not appear as dead from the master point of view, only from the split log pov. Mockito.when(sm.isServerOnline(Mockito.any())).thenReturn(true); to = 12000; conf.setInt(HConstants.HBASE_SPLITLOG_MANAGER_TIMEOUT, to); conf.setInt("hbase.splitlog.manager.unassigned.timeout", 2 * to); conf.setInt("hbase.splitlog.manager.timeoutmonitor.period", 100); to = to + 16 * 100; }
@Test public void testWorkerCrash() throws Exception { slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); if (tot_mgr_heartbeat.sum() == 0) waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); // Not yet resubmitted. Assert.assertEquals(0, tot_mgr_resubmit.sum()); // This server becomes dead Mockito.when(sm.isServerOnline(worker1)).thenReturn(false); Thread.sleep(1300); // The timeout checker is done every 1000 ms (hardcoded). // It has been resubmitted Assert.assertEquals(1, tot_mgr_resubmit.sum()); }
HRegionServer destServer = regionServer.getRegionServer(); destServerName = destServer.getServerName(); if (!destServerName.equals(serverName) && serverManager.isServerOnline(destServerName)) { break;
assertTrue(master.getMaster().getServerManager().isServerOnline(killedRS.get()));
public static ServerName getServerHoldingRegion(final HBaseTestingUtility util, final RegionInfo hri) throws Exception { ServerName serverName = util.getMiniHBaseCluster().getServerHoldingRegion( hri.getTable(), hri.getRegionName()); ServerName amServerName = getMaster(util).getAssignmentManager().getRegionStates() .getRegionServerOfRegion(hri); // Make sure AM and MiniCluster agrees on the Server holding the region // and that the server is online. assertEquals(amServerName, serverName); assertEquals(true, getMaster(util).getServerManager().isServerOnline(serverName)); return serverName; }
master.getServerManager().isServerOnline(deadServer)); } finally { master.stopMaster();
/** * Check whether the RegionServer is online. * @param serverName * @return True if online. */ public boolean isServerOnline(ServerName serverName) { return this.serverManager.isServerOnline(serverName); } /**
public boolean removeServerFromDrainList(final ServerName sn) { // Warn if the server (sn) is not online. ServerName is of the form: // <hostname> , <port> , <startcode> if (!this.isServerOnline(sn)) { LOG.warn("Server " + sn + " is not currently online. " + "Removing from draining list anyway, as requested."); } // Remove the server from the draining servers lists. return this.drainingServers.remove(sn); }
public boolean removeServerFromDrainList(final ServerName sn) { // Warn if the server (sn) is not online. ServerName is of the form: // <hostname> , <port> , <startcode> if (!this.isServerOnline(sn)) { LOG.warn("Server " + sn + " is not currently online. " + "Removing from draining list anyway, as requested."); } // Remove the server from the draining servers lists. return this.drainingServers.remove(sn); }
/** * Split a server's log and expire it if we find it is one of the online * servers. * @param sn ServerName to check. * @throws IOException */ private void splitLogAndExpireIfOnline(final ServerName sn) throws IOException { if (sn == null || !serverManager.isServerOnline(sn)) { return; } LOG.info("Forcing splitLog and expire of " + sn); fileSystemManager.splitLog(sn); serverManager.expireServer(sn); }
@Override public void nodeDeleted(String path) { if (path.startsWith(watcher.rsZNode)) { String serverName = ZKUtil.getNodeName(path); LOG.info("RegionServer ephemeral node deleted, processing expiration [" + serverName + "]"); ServerName sn = ServerName.parseServerName(serverName); if (!serverManager.isServerOnline(sn)) { LOG.warn(serverName.toString() + " is not online or isn't known to the master."+ "The latter could be caused by a DNS misconfiguration."); return; } remove(sn); this.serverManager.expireServer(sn); } }
@Test public void testWorkerCrash() throws Exception { slm = new SplitLogManager(master, conf); TaskBatch batch = new TaskBatch(); String tasknode = submitTaskAndWait(batch, "foo/1"); final ServerName worker1 = ServerName.valueOf("worker1,1,1"); SplitLogTask slt = new SplitLogTask.Owned(worker1); ZKUtil.setData(zkw, tasknode, slt.toByteArray()); if (tot_mgr_heartbeat.sum() == 0) waitForCounter(tot_mgr_heartbeat, 0, 1, to/2); // Not yet resubmitted. Assert.assertEquals(0, tot_mgr_resubmit.sum()); // This server becomes dead Mockito.when(sm.isServerOnline(worker1)).thenReturn(false); Thread.sleep(1300); // The timeout checker is done every 1000 ms (hardcoded). // It has been resubmitted Assert.assertEquals(1, tot_mgr_resubmit.sum()); }
public static ServerName getServerHoldingRegion(final HBaseTestingUtility util, final RegionInfo hri) throws Exception { ServerName serverName = util.getMiniHBaseCluster().getServerHoldingRegion( hri.getTable(), hri.getRegionName()); ServerName amServerName = getMaster(util).getAssignmentManager().getRegionStates() .getRegionServerOfRegion(hri); // Make sure AM and MiniCluster agrees on the Server holding the region // and that the server is online. assertEquals(amServerName, serverName); assertEquals(true, getMaster(util).getServerManager().isServerOnline(serverName)); return serverName; }