/** * @return A retry counter factory configured for retrying znode creation. */ private static RetryCounterFactory createZnodeRetryCounterFactory(Configuration conf) { return new RetryCounterFactory( conf.getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS), conf.getInt("hbase.hbck.createznode.attempt.sleep.interval", DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL), conf.getInt("hbase.hbck.createznode.attempt.maxsleeptime", DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME)); }
/** * @return A retry counter factory configured for retrying lock file creation. */ public static RetryCounterFactory createLockRetryCounterFactory(Configuration conf) { return new RetryCounterFactory( conf.getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS), conf.getInt("hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL), conf.getInt("hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME)); }
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="DE_MIGHT_IGNORE", justification="None. Its always been this way.") public RecoverableZooKeeper(String quorumServers, int sessionTimeout, Watcher watcher, int maxRetries, int retryIntervalMillis, int maxSleepTime, String identifier) throws IOException { // TODO: Add support for zk 'chroot'; we don't add it to the quorumServers String as we should. this.retryCounterFactory = new RetryCounterFactory(maxRetries+1, retryIntervalMillis, maxSleepTime); if (identifier == null || identifier.length() == 0) { // the identifier = processID@hostName identifier = ManagementFactory.getRuntimeMXBean().getName(); } LOG.info("Process identifier=" + identifier + " connecting to ZooKeeper ensemble=" + quorumServers); this.identifier = identifier; this.id = Bytes.toBytes(identifier); this.watcher = watcher; this.sessionTimeout = sessionTimeout; this.quorumServers = quorumServers; try { checkZk(); } catch (Exception x) { /* ignore */ } }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf == null) { // Configured gets passed null before real conf. Why? I don't know. return; } sshUserName = conf.get("hbase.it.clustermanager.ssh.user", ""); String extraSshOptions = conf.get("hbase.it.clustermanager.ssh.opts", ""); sshOptions = System.getenv("HBASE_SSH_OPTS"); if (!extraSshOptions.isEmpty()) { sshOptions = StringUtils.join(new Object[] { sshOptions, extraSshOptions }, " "); } sshOptions = (sshOptions == null) ? "" : sshOptions; sshUserName = (sshUserName == null) ? "" : sshUserName; tunnelCmd = conf.get("hbase.it.clustermanager.ssh.cmd", DEFAULT_TUNNEL_CMD); // Print out ssh special config if any. if ((sshUserName != null && sshUserName.length() > 0) || (sshOptions != null && sshOptions.length() > 0)) { LOG.info("Running with SSH user [" + sshUserName + "] and options [" + sshOptions + "]"); } this.retryCounterFactory = new RetryCounterFactory(new RetryConfig() .setMaxAttempts(conf.getInt(RETRY_ATTEMPTS_KEY, DEFAULT_RETRY_ATTEMPTS)) .setSleepInterval(conf.getLong(RETRY_SLEEP_INTERVAL_KEY, DEFAULT_RETRY_SLEEP_INTERVAL))); }
/** * @return True if region is online and scannable else false if an error or shutdown (Otherwise * we just block in here holding up all forward-progess). */ private boolean isRegionOnline(RegionInfo ri) throws InterruptedException { RetryCounter rc = null; while (!isStopped()) { RegionState rs = this.assignmentManager.getRegionStates().getRegionState(ri); if (rs.isOpened()) { if (this.getServerManager().isServerOnline(rs.getServerName())) { return true; } } // Region is not OPEN. Optional<Procedure<MasterProcedureEnv>> optProc = this.procedureExecutor.getProcedures(). stream().filter(p -> p instanceof ServerCrashProcedure).findAny(); // TODO: Add a page to refguide on how to do repair. Have this log message point to it. // Page will talk about loss of edits, how to schedule at least the meta WAL recovery, and // then how to assign including how to break region lock if one held. LOG.warn("{} is NOT online; state={}; ServerCrashProcedures={}. Master startup cannot " + "progress, in holding-pattern until region onlined.", ri.getRegionNameAsString(), rs, optProc.isPresent()); // Check once-a-minute. if (rc == null) { rc = new RetryCounterFactory(1000).create(); } Threads.sleep(rc.getBackoffTimeAndIncrementAttempts()); } return false; }
@Test public void testBasics() throws InterruptedException { int maxAttempts = 10; RetryCounterFactory factory = new RetryCounterFactory(maxAttempts, 10, 1000); RetryCounter retryCounter = factory.create(); while (retryCounter.shouldRetry()) { LOG.info("Attempt={}, backoffTime={}", retryCounter.getAttemptTimes(), retryCounter.getBackoffTime()); retryCounter.sleepUntilNextRetry(); } assertTrue(retryCounter.getAttemptTimes() == maxAttempts); } }
/** * Get an exponential backoff retry counter. The base unit is 100 milliseconds, and the max * backoff time is 30 seconds. */ public static RetryCounter getRetryCounter() { return new RetryCounterFactory( new RetryCounter.RetryConfig().setBackoffPolicy(new RetryCounter.ExponentialBackoffPolicy()) .setSleepInterval(100).setMaxSleepTime(30000).setMaxAttempts(Integer.MAX_VALUE) .setTimeUnit(TimeUnit.MILLISECONDS).setJitter(0.01f)).create(); } }
RetryCounterFactory rcf = new RetryCounterFactory(Integer.MAX_VALUE, this.sleeper.getPeriod(), 1000 * 60 * 5); RetryCounter rc = rcf.create();
RetryCounter counter = new RetryCounterFactory(maxAttempts, (int)pause).create();
public RecoverableZooKeeper(String quorumServers, int sessionTimeout, Watcher watcher, int maxRetries, int retryIntervalMillis) throws IOException { this.zk = new ZooKeeper(quorumServers, sessionTimeout, watcher); this.retryCounterFactory = new RetryCounterFactory(maxRetries, retryIntervalMillis); // the identifier = processID@hostName this.identifier = ManagementFactory.getRuntimeMXBean().getName(); LOG.info("The identifier of this process is " + identifier); this.id = Bytes.toBytes(identifier); this.watcher = watcher; this.sessionTimeout = sessionTimeout; this.quorumServers = quorumServers; }
public RecoverableZooKeeper(String quorumServers, int sessionTimeout, Watcher watcher, int maxRetries, int retryIntervalMillis) throws IOException { this.zk = new ZooKeeper(quorumServers, sessionTimeout, watcher); this.retryCounterFactory = new RetryCounterFactory(maxRetries, retryIntervalMillis); // the identifier = processID@hostName this.identifier = ManagementFactory.getRuntimeMXBean().getName(); LOG.info("The identifier of this process is " + identifier); this.id = Bytes.toBytes(identifier); this.watcher = watcher; this.sessionTimeout = sessionTimeout; this.quorumServers = quorumServers; salter = new SecureRandom(); }
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="DE_MIGHT_IGNORE", justification="None. Its always been this way.") public RecoverableZooKeeper(String quorumServers, int sessionTimeout, Watcher watcher, int maxRetries, int retryIntervalMillis, int maxSleepTime, String identifier) throws IOException { // TODO: Add support for zk 'chroot'; we don't add it to the quorumServers String as we should. this.retryCounterFactory = new RetryCounterFactory(maxRetries+1, retryIntervalMillis, maxSleepTime); if (identifier == null || identifier.length() == 0) { // the identifier = processID@hostName identifier = ManagementFactory.getRuntimeMXBean().getName(); } LOG.info("Process identifier=" + identifier + " connecting to ZooKeeper ensemble=" + quorumServers); this.identifier = identifier; this.id = Bytes.toBytes(identifier); this.watcher = watcher; this.sessionTimeout = sessionTimeout; this.quorumServers = quorumServers; try { checkZk(); } catch (Exception x) { /* ignore */ } }
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DE_MIGHT_IGNORE", justification = "None. Its always been this way.") public RecoverableZooKeeper2(String quorumServers, int sessionTimeout, Watcher watcher, int maxRetries, int retryIntervalMillis, String identifier) throws IOException { // TODO: Add support for zk 'chroot'; we don't add it to the quorumServers String as we should. this.retryCounterFactory = new RetryCounterFactory(maxRetries + 1, retryIntervalMillis); if (identifier == null || identifier.length() == 0) { // the identifier = processID@hostName identifier = ManagementFactory.getRuntimeMXBean().getName(); } LOG.info("Process identifier=" + identifier + " connecting to ZooKeeper ensemble=" + quorumServers); this.identifier = identifier; this.id = Bytes.toBytes(identifier); this.watcher = watcher; this.sessionTimeout = sessionTimeout; this.quorumServers = quorumServers; try { checkZk(); } catch (Exception x) {/* ignore */} salter = new Random(); }
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="DE_MIGHT_IGNORE", justification="None. Its always been this way.") public RecoverableZooKeeper(String quorumServers, int sessionTimeout, Watcher watcher, int maxRetries, int retryIntervalMillis, String identifier) throws IOException { // TODO: Add support for zk 'chroot'; we don't add it to the quorumServers String as we should. this.retryCounterFactory = new RetryCounterFactory(maxRetries+1, retryIntervalMillis); if (identifier == null || identifier.length() == 0) { // the identifier = processID@hostName identifier = ManagementFactory.getRuntimeMXBean().getName(); } LOG.info("Process identifier=" + identifier + " connecting to ZooKeeper ensemble=" + quorumServers); this.identifier = identifier; this.id = Bytes.toBytes(identifier); this.watcher = watcher; this.sessionTimeout = sessionTimeout; this.quorumServers = quorumServers; try {checkZk();} catch (Exception x) {/* ignore */} salter = new Random(); }
ServerManager(final Server master, final MasterServices services, final boolean connect) throws IOException { this.master = master; this.services = services; Configuration c = master.getConfiguration(); maxSkew = c.getLong("hbase.master.maxclockskew", 30000); warningSkew = c.getLong("hbase.master.warningclockskew", 10000); this.connection = connect ? (ClusterConnection)ConnectionFactory.createConnection(c) : null; int pingMaxAttempts = Math.max(1, master.getConfiguration().getInt( "hbase.master.maximum.ping.server.attempts", 10)); int pingSleepInterval = Math.max(1, master.getConfiguration().getInt( "hbase.master.ping.server.retry.sleep.interval", 100)); this.pingRetryCounterFactory = new RetryCounterFactory(pingMaxAttempts, pingSleepInterval); }
/** * Constructor * * @param conf * Configuration object * @throws MasterNotRunningException * if the master is not running * @throws ZooKeeperConnectionException * if unable to connect to ZooKeeper */ public HBaseFsck(Configuration conf, ExecutorService exec) throws MasterNotRunningException, ZooKeeperConnectionException, IOException, ClassNotFoundException { super(conf); errors = getErrorReporter(getConf()); this.executor = exec; lockFileRetryCounterFactory = new RetryCounterFactory( getConf().getInt("hbase.hbck.lockfile.attempts", DEFAULT_MAX_LOCK_FILE_ATTEMPTS), getConf().getInt( "hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL), getConf().getInt( "hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME)); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf == null) { // Configured gets passed null before real conf. Why? I don't know. return; } sshUserName = conf.get("hbase.it.clustermanager.ssh.user", ""); String extraSshOptions = conf.get("hbase.it.clustermanager.ssh.opts", ""); sshOptions = System.getenv("HBASE_SSH_OPTS"); if (!extraSshOptions.isEmpty()) { sshOptions = StringUtils.join(new Object[] { sshOptions, extraSshOptions }, " "); } sshOptions = (sshOptions == null) ? "" : sshOptions; sshUserName = (sshUserName == null) ? "" : sshUserName; tunnelCmd = conf.get("hbase.it.clustermanager.ssh.cmd", DEFAULT_TUNNEL_CMD); // Print out ssh special config if any. if ((sshUserName != null && sshUserName.length() > 0) || (sshOptions != null && sshOptions.length() > 0)) { LOG.info("Running with SSH user [" + sshUserName + "] and options [" + sshOptions + "]"); } this.retryCounterFactory = new RetryCounterFactory(new RetryConfig() .setMaxAttempts(conf.getInt(RETRY_ATTEMPTS_KEY, DEFAULT_RETRY_ATTEMPTS)) .setSleepInterval(conf.getLong(RETRY_SLEEP_INTERVAL_KEY, DEFAULT_RETRY_SLEEP_INTERVAL))); }
@Test public void testBasics() throws InterruptedException { int maxAttempts = 10; RetryCounterFactory factory = new RetryCounterFactory(maxAttempts, 10, 1000); RetryCounter retryCounter = factory.create(); while (retryCounter.shouldRetry()) { LOG.info("Attempt={}, backoffTime={}", retryCounter.getAttemptTimes(), retryCounter.getBackoffTime()); retryCounter.sleepUntilNextRetry(); } assertTrue(retryCounter.getAttemptTimes() == maxAttempts); } }
RetryCounter counter = new RetryCounterFactory(maxAttempts, (int)pause).create();