public void healthCheckerTest(String script, HealthCheckerExitStatus expectedStatus) throws Exception { Configuration config = getConfForNodeHealthScript(); config.addResource(healthScriptFile.getName()); String location = healthScriptFile.getAbsolutePath(); long timeout = config.getLong(HConstants.HEALTH_SCRIPT_TIMEOUT, SCRIPT_TIMEOUT); HealthChecker checker = new HealthChecker(); checker.init(location, timeout); createScript(script, true); HealthReport report = checker.checkHealth(); assertEquals(expectedStatus, report.getStatus()); LOG.info("Health Status:" + report.getHealthReport()); this.healthScriptFile.delete(); }
public HealthReport checkHealth() { HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS; try { // Calling this execute leaves around running executor threads. shexec.execute(); } catch (ExitCodeException e) { // ignore the exit code of the script LOG.warn("Caught exception : " + e + ",exit code:" + e.getExitCode()); status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE; } catch (IOException e) { LOG.warn("Caught exception : " + e); status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION; exceptionStackTrace = org.apache.hadoop.util.StringUtils.stringifyException(e); } finally { if (shexec.isTimedOut()) { status = HealthCheckerExitStatus.TIMED_OUT; } if (status == HealthCheckerExitStatus.SUCCESS) { if (hasErrors(shexec.getOutput())) { status = HealthCheckerExitStatus.FAILED; } } } return new HealthReport(status, getHealthReport(status)); }
public HealthCheckChore(int sleepTime, Stoppable stopper, Configuration conf) { super("HealthChecker", stopper, sleepTime); LOG.info("Health Check Chore runs every " + StringUtils.formatTime(sleepTime)); this.config = conf; String healthCheckScript = this.config.get(HConstants.HEALTH_SCRIPT_LOC); long scriptTimeout = this.config.getLong(HConstants.HEALTH_SCRIPT_TIMEOUT, HConstants.DEFAULT_HEALTH_SCRIPT_TIMEOUT); healthChecker = new HealthChecker(); healthChecker.init(healthCheckScript, scriptTimeout); this.threshold = config.getInt(HConstants.HEALTH_FAILURE_THRESHOLD, HConstants.DEFAULT_HEALTH_FAILURE_THRESHOLD); this.failureWindow = (long)this.threshold * (long)sleepTime; }
@Override protected void chore() { HealthReport report = healthChecker.checkHealth(); boolean isHealthy = (report.getStatus() == HealthCheckerExitStatus.SUCCESS); if (!isHealthy) { boolean needToStop = decideToStop(); if (needToStop) { this.getStopper().stop("The node reported unhealthy " + threshold + " number of times consecutively."); } // Always log health report. LOG.info("Health status at " + StringUtils.formatTime(System.currentTimeMillis()) + " : " + report.getHealthReport()); } }
public HealthCheckChore(int sleepTime, Stoppable stopper, Configuration conf) { super("HealthChecker", sleepTime, stopper); LOG.info("Health Check Chore runs every " + StringUtils.formatTime(sleepTime)); this.config = conf; String healthCheckScript = this.config.get(HConstants.HEALTH_SCRIPT_LOC); long scriptTimeout = this.config.getLong(HConstants.HEALTH_SCRIPT_TIMEOUT, HConstants.DEFAULT_HEALTH_SCRIPT_TIMEOUT); healthChecker = new HealthChecker(); healthChecker.init(healthCheckScript, scriptTimeout); this.threshold = config.getInt(HConstants.HEALTH_FAILURE_THRESHOLD, HConstants.DEFAULT_HEALTH_FAILURE_THRESHOLD); this.failureWindow = this.threshold * sleepTime; }
@Override protected void chore() { HealthReport report = healthChecker.checkHealth(); boolean isHealthy = (report.getStatus() == HealthCheckerExitStatus.SUCCESS); if (!isHealthy) { boolean needToStop = decideToStop(); if (needToStop) { this.stopper.stop("The region server reported unhealthy " + threshold + " number of times consecutively."); } // Always log health report. LOG.info("Health status at " + StringUtils.formatTime(System.currentTimeMillis()) + " : " + report.getHealthReport()); } }
public void healthCheckerTest(String script, HealthCheckerExitStatus expectedStatus) throws Exception { Configuration config = getConfForNodeHealthScript(); config.addResource(healthScriptFile.getName()); String location = healthScriptFile.getAbsolutePath(); long timeout = config.getLong(HConstants.HEALTH_SCRIPT_TIMEOUT, SCRIPT_TIMEOUT); HealthChecker checker = new HealthChecker(); checker.init(location, timeout); createScript(script, true); HealthReport report = checker.checkHealth(); assertEquals(expectedStatus, report.getStatus()); LOG.info("Health Status:" + report.getHealthReport()); this.healthScriptFile.delete(); }
public HealthCheckChore(int sleepTime, Stoppable stopper, Configuration conf) { super("HealthChecker", stopper, sleepTime); LOG.info("Health Check Chore runs every " + StringUtils.formatTime(sleepTime)); this.config = conf; String healthCheckScript = this.config.get(HConstants.HEALTH_SCRIPT_LOC); long scriptTimeout = this.config.getLong(HConstants.HEALTH_SCRIPT_TIMEOUT, HConstants.DEFAULT_HEALTH_SCRIPT_TIMEOUT); healthChecker = new HealthChecker(); healthChecker.init(healthCheckScript, scriptTimeout); this.threshold = config.getInt(HConstants.HEALTH_FAILURE_THRESHOLD, HConstants.DEFAULT_HEALTH_FAILURE_THRESHOLD); this.failureWindow = (long)this.threshold * (long)sleepTime; }
public HealthReport checkHealth() { HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS; try { shexec.execute(); } catch (ExitCodeException e) { // ignore the exit code of the script LOG.warn("Caught exception : " + e); status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE; } catch (IOException e) { LOG.warn("Caught exception : " + e); if (!shexec.isTimedOut()) { status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION; exceptionStackTrace = org.apache.hadoop.util.StringUtils.stringifyException(e); } else { status = HealthCheckerExitStatus.TIMED_OUT; } } finally { if (status == HealthCheckerExitStatus.SUCCESS) { if (hasErrors(shexec.getOutput())) { status = HealthCheckerExitStatus.FAILED; } } } return new HealthReport(status, getHealthReport(status)); }
@Override protected void chore() { HealthReport report = healthChecker.checkHealth(); boolean isHealthy = (report.getStatus() == HealthCheckerExitStatus.SUCCESS); if (!isHealthy) { boolean needToStop = decideToStop(); if (needToStop) { getStopper().stop( "The node reported unhealthy " + threshold + " number of times consecutively."); } // Always log health report. LOG.info("Health status at " + StringUtils.formatTime(System.currentTimeMillis()) + " : " + report.getHealthReport()); } }
public HealthReport checkHealth() { HealthCheckerExitStatus status = HealthCheckerExitStatus.SUCCESS; try { // Calling this execute leaves around running executor threads. shexec.execute(); } catch (ExitCodeException e) { // ignore the exit code of the script LOG.warn("Caught exception : " + e + ",exit code:" + e.getExitCode()); status = HealthCheckerExitStatus.FAILED_WITH_EXIT_CODE; } catch (IOException e) { LOG.warn("Caught exception : " + e); status = HealthCheckerExitStatus.FAILED_WITH_EXCEPTION; exceptionStackTrace = org.apache.hadoop.util.StringUtils.stringifyException(e); } finally { if (shexec.isTimedOut()) { status = HealthCheckerExitStatus.TIMED_OUT; } if (status == HealthCheckerExitStatus.SUCCESS) { if (hasErrors(shexec.getOutput())) { status = HealthCheckerExitStatus.FAILED; } } } return new HealthReport(status, getHealthReport(status)); }