@Override public String getRegionServers() { ServerManager serverManager = this.master.getServerManager(); if (serverManager == null) { return ""; } return StringUtils.join(serverManager.getOnlineServers().keySet(), ";"); }
@Override public int getNumRegionServers() { ServerManager serverManager = this.master.getServerManager(); if (serverManager == null) { return 0; } return serverManager.getOnlineServers().size(); }
private void dumpServers(HMaster master, PrintWriter out) { ServerManager sm = master.getServerManager(); if (sm == null) { out.println("ServerManager is not initialized"); return; } Map<ServerName, ServerMetrics> servers = sm.getOnlineServers(); for (Map.Entry<ServerName, ServerMetrics> e : servers.entrySet()) { out.println(e.getKey() + ": " + e.getValue()); } } }
/** * Get the servers which died since a given timestamp. * protected because it can be subclassed by the tests. */ protected List<Pair<ServerName, Long>> getDeadServers(long since) { if (master.getServerManager() == null) { return Collections.emptyList(); } return master.getServerManager().getDeadServers().copyDeadServersSince(since); }
@Override public String getDeadRegionServers() { ServerManager serverManager = this.master.getServerManager(); if (serverManager == null) { return ""; } return StringUtils.join(serverManager.getDeadServers().copyServerNames(), ";"); }
@Override public int getNumDeadRegionServers() { ServerManager serverManager = this.master.getServerManager(); if (serverManager == null) { return 0; } return serverManager.getDeadServers().size(); }
@Override public boolean evaluate() { return getMaster(util).getServerManager().isServerDead(serverName); }
public HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>> getReplicationLoad(ServerName[] serverNames) { List<ReplicationPeerDescription> peerList = this.getReplicationPeerManager().listPeers(null); if (peerList == null) { return null; } HashMap<String, List<Pair<ServerName, ReplicationLoadSource>>> replicationLoadSourceMap = new HashMap<>(peerList.size()); peerList.stream() .forEach(peer -> replicationLoadSourceMap.put(peer.getPeerId(), new ArrayList<>())); for (ServerName serverName : serverNames) { List<ReplicationLoadSource> replicationLoadSources = getServerManager().getLoad(serverName).getReplicationLoadSourceList(); for (ReplicationLoadSource replicationLoadSource : replicationLoadSources) { replicationLoadSourceMap.get(replicationLoadSource.getPeerID()) .add(new Pair<>(serverName, replicationLoadSource)); } } for (List<Pair<ServerName, ReplicationLoadSource>> loads : replicationLoadSourceMap.values()) { if (loads.size() > 0) { loads.sort(Comparator.comparingLong(load -> (-1) * load.getSecond().getReplicationLag())); } } return replicationLoadSourceMap; }
private void waitForRSShutdownToStartAndFinish(JVMClusterUtil.MasterThread activeMaster, ServerName serverName) throws InterruptedException { ServerManager sm = activeMaster.getMaster().getServerManager(); // First wait for it to be in dead list while (!sm.getDeadServers().isDeadServer(serverName)) { LOG.debug("Waiting for [" + serverName + "] to be listed as dead in master"); Thread.sleep(SLEEP_TIME); } LOG.debug("Server [" + serverName + "] marked as dead, waiting for it to " + "finish dead processing"); while (sm.areDeadServersInProgress()) { LOG.debug("Server [" + serverName + "] still being processed, waiting"); Thread.sleep(SLEEP_TIME); } LOG.debug("Server [" + serverName + "] done with server shutdown processing"); } }
/** * Start Master. Get as far as the state where Master is waiting on * RegionServers to check in, then return. */ private MasterThread startMaster(MasterThread master) { master.start(); // It takes a while until ServerManager creation to happen inside Master startup. while (master.getMaster().getServerManager() == null) { continue; } // Set a listener for the waiting-on-RegionServers state. We want to wait // until this condition before we leave this method and start regionservers. final AtomicBoolean waiting = new AtomicBoolean(false); if (master.getMaster().getServerManager() == null) throw new NullPointerException("SM"); master.getMaster().getServerManager().registerListener(new ServerListener() { @Override public void waiting() { waiting.set(true); } }); // Wait until the Master gets to place where it is waiting on RegionServers to check in. while (!waiting.get()) { continue; } // Set the global master-is-active; gets picked up by regionservers later. masterActive.set(true); return master; }
/** * @return True if region is online and scannable else false if an error or shutdown (Otherwise * we just block in here holding up all forward-progess). */ private boolean isRegionOnline(RegionInfo ri) throws InterruptedException { RetryCounter rc = null; while (!isStopped()) { RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri); if (rs.isOpened()) { if (this.getServerManager().isServerOnline(rs.getServerName())) { return true; } } // Region is not OPEN. Optional<Procedure<MasterProcedureEnv>> optProc = this.procedureExecutor.getProcedures(). stream().filter(p -> p instanceof ServerCrashProcedure).findAny(); // TODO: Add a page to refguide on how to do repair. Have this log message point to it. // Page will talk about loss of edits, how to schedule at least the meta WAL recovery, and // then how to assign including how to break region lock if one held. LOG.warn("{} is NOT online; state={}; ServerCrashProcedures={}. Master startup cannot " + "progress, in holding-pattern until region onlined.", ri.getRegionNameAsString(), rs, optProc.isPresent()); // Check once-a-minute. if (rc == null) { rc = new RetryCounterFactory(1000).create(); } Threads.sleep(rc.getBackoffTimeAndIncrementAttempts()); } return false; }
/** * return the subset of all regionservers * (actually returns set of ServerLoads) * which host some region in a given table. * used by assertAllRegionServers() below to * test reporting of loaded coprocessors. * @param tableName : given table. * @return subset of all servers. */ Map<ServerName, ServerMetrics> serversForTable(String tableName) { Map<ServerName, ServerMetrics> serverLoadHashMap = new HashMap<>(); for(Map.Entry<ServerName, ServerMetrics> server: TEST_UTIL.getMiniHBaseCluster().getMaster().getServerManager(). getOnlineServers().entrySet()) { for(Map.Entry<byte[], RegionMetrics> region: server.getValue().getRegionMetrics().entrySet()) { if (region.getValue().getNameAsString().equals(tableName)) { // this server hosts a region of tableName: add this server.. serverLoadHashMap.put(server.getKey(),server.getValue()); // .. and skip the rest of the regions that it hosts. break; } } } return serverLoadHashMap; }
/** * Wait on crash processing. Balancer won't run if processing a crashed server. */ private void waitOnCrashProcessing() { while (UTIL.getHBaseCluster().getMaster().getServerManager().areDeadServersInProgress()) { LOG.info("Waiting on processing of crashed server before proceeding..."); Threads.sleep(1000); } }
@Override @QosPriority(priority = HConstants.ADMIN_QOS) public GetLastFlushedSequenceIdResponse getLastFlushedSequenceId(RpcController controller, GetLastFlushedSequenceIdRequest request) throws ServiceException { try { master.checkServiceStarted(); } catch (IOException ioe) { throw new ServiceException(ioe); } byte[] encodedRegionName = request.getRegionName().toByteArray(); RegionStoreSequenceIds ids = master.getServerManager() .getLastFlushedSequenceId(encodedRegionName); return ResponseConverter.buildGetLastFlushedSequenceIdResponse(ids); }
@Test public void testNewStartedRegionServerVersion() throws Exception { UTIL.startMiniCluster(1); // Start 3 new region server Thread t = new Thread(() -> { for (int i = 0; i < 3; i++) { try { JVMClusterUtil.RegionServerThread newRS = UTIL.getMiniHBaseCluster().startRegionServer(); newRS.waitForServerOnline(); } catch (IOException e) { LOG.error("Failed to start a new RS", e); } } }); t.start(); HMaster master = UTIL.getMiniHBaseCluster().getMaster(); while (t.isAlive()) { List<ServerName> serverNames = master.getServerManager().getOnlineServersList(); for (ServerName serverName : serverNames) { assertNotEquals(0, master.getServerManager().getVersionNumber(serverName)); } Thread.sleep(100); } }
@Test public void testFlushedSequenceIdPersistLoad() throws Exception { Configuration conf = TEST_UTIL.getConfiguration(); int msgInterval = conf.getInt("hbase.regionserver.msginterval", 100); // insert some data into META TableName tableName = TableName.valueOf("testFlushSeqId"); HTableDescriptor desc = new HTableDescriptor(tableName); desc.addFamily(new HColumnDescriptor(Bytes.toBytes("cf"))); Table table = TEST_UTIL.createTable(desc, null); // flush META region TEST_UTIL.flush(TableName.META_TABLE_NAME); // wait for regionserver report Threads.sleep(msgInterval * 2); // record flush seqid before cluster shutdown Map<byte[], Long> regionMapBefore = TEST_UTIL.getHBaseCluster().getMaster().getServerManager() .getFlushedSequenceIdByRegion(); // restart hbase cluster which will cause flushed sequence id persist and reload TEST_UTIL.getMiniHBaseCluster().shutdown(); TEST_UTIL.restartHBaseCluster(2); TEST_UTIL.waitUntilNoRegionsInTransition(); // check equality after reloading flushed sequence id map Map<byte[], Long> regionMapAfter = TEST_UTIL.getHBaseCluster().getMaster().getServerManager() .getFlushedSequenceIdByRegion(); assertTrue(regionMapBefore.equals(regionMapAfter)); }
private void waitForRSShutdownToStartAndFinish(MasterThread activeMaster, ServerName serverName) throws InterruptedException { ServerManager sm = activeMaster.getMaster().getServerManager(); // First wait for it to be in dead list while (!sm.getDeadServers().isDeadServer(serverName)) { log("Waiting for [" + serverName + "] to be listed as dead in master"); Thread.sleep(1); } log("Server [" + serverName + "] marked as dead, waiting for it to " + "finish dead processing"); while (sm.areDeadServersInProgress()) { log("Server [" + serverName + "] still being processed, waiting"); Thread.sleep(100); } log("Server [" + serverName + "] done with server shutdown processing"); }
@Test public void testRewritingClusterIdToPB() throws Exception { TEST_UTIL.startMiniZKCluster(); TEST_UTIL.startMiniDFSCluster(1); TEST_UTIL.createRootDir(); Path rootDir = FSUtils.getRootDir(TEST_UTIL.getConfiguration()); FileSystem fs = rootDir.getFileSystem(TEST_UTIL.getConfiguration()); Path filePath = new Path(rootDir, HConstants.CLUSTER_ID_FILE_NAME); FSDataOutputStream s = null; try { s = fs.create(filePath); s.writeUTF(TEST_UTIL.getRandomUUID().toString()); } finally { if (s != null) { s.close(); } } TEST_UTIL.startMiniHBaseCluster(); HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); int expected = LoadBalancer.isTablesOnMaster(TEST_UTIL.getConfiguration())? 2: 1; assertEquals(expected, master.getServerManager().getOnlineServersList().size()); }
@Test public void testCrashProcedureReplay() { HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); final ProcedureExecutor<MasterProcedureEnv> pExecutor = master.getMasterProcedureExecutor(); ServerCrashProcedure proc = new ServerCrashProcedure( pExecutor.getEnvironment(), hostname123, false, false); ProcedureTestingUtility.submitAndWait(pExecutor, proc); assertFalse(master.getServerManager().getDeadServers().areDeadServersInProgress()); }
public static ServerName getServerHoldingRegion(final HBaseTestingUtility util, final RegionInfo hri) throws Exception { ServerName serverName = util.getMiniHBaseCluster().getServerHoldingRegion( hri.getTable(), hri.getRegionName()); ServerName amServerName = getMaster(util).getAssignmentManager().getRegionStates() .getRegionServerOfRegion(hri); // Make sure AM and MiniCluster agrees on the Server holding the region // and that the server is online. assertEquals(amServerName, serverName); assertEquals(true, getMaster(util).getServerManager().isServerOnline(serverName)); return serverName; }