FailoverController fc = new FailoverController(getConf(), requestSource); fc.failover(fromNode, toNode, forceFence, forceActive); out.println("Failover from "+args[0]+" to "+args[1]+" successful"); } catch (FailoverFailedException ffe) {
Preconditions.checkArgument(fromSvc.getFencer() != null, "failover requires a fencer"); preFailoverChecks(fromSvc, toSvc, forceActive); if (tryGracefulFence(fromSvc)) { tryFence = forceFence; HAServiceProtocolHelper.transitionToActive( toSvc.getProxy(conf, rpcTimeoutToNewActive), createReqInfo()); } catch (ServiceFailedException sfe) { LOG.error("Unable to make {} active ({}). Failing back.", failover(toSvc, fromSvc, true, true); } catch (FailoverFailedException ffe) { msg += ". Failback to " + fromSvc +
public FailoverController(Configuration conf, RequestSource source) { this.conf = conf; this.gracefulFenceConf = new Configuration(conf); this.requestSource = source; this.gracefulFenceTimeout = getGracefulFenceTimeout(conf); this.rpcTimeoutToNewActive = getRpcTimeoutToNewActive(conf); //Configure less retries for graceful fence int gracefulFenceConnectRetries = conf.getInt( CommonConfigurationKeys.HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES, CommonConfigurationKeys.HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES_DEFAULT); gracefulFenceConf.setInt( CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, gracefulFenceConnectRetries); gracefulFenceConf.setInt( CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, gracefulFenceConnectRetries); }
private void doFence(HAServiceTarget target) { LOG.info("Should fence: " + target); boolean gracefulWorked = new FailoverController(conf, RequestSource.REQUEST_BY_ZKFC).tryGracefulFence(target); if (gracefulWorked) { // It's possible that it's in standby but just about to go into active, // no? Is there some race here? LOG.info("Successfully transitioned " + target + " to standby " + "state without fencing"); return; } try { target.checkFencingConfigured(); } catch (BadFencingConfigurationException e) { LOG.error("Couldn't fence old active " + target, e); recordActiveAttempt(new ActiveAttemptRecord(false, "Unable to fence old active")); throw new RuntimeException(e); } if (!target.getFencer().fence(target)) { throw new RuntimeException("Unable to fence " + target); } }
private synchronized void becomeStandby() { LOG.info("ZK Election indicated that " + localTarget + " should become standby"); try { int timeout = FailoverController.getGracefulFenceTimeout(conf); localTarget.getProxy(conf, timeout).transitionToStandby(createReqInfo()); LOG.info("Successfully transitioned " + localTarget + " to standby state"); } catch (Exception e) { LOG.error("Couldn't transition " + localTarget + " to standby state", e); // TODO handle this. It's a likely case since we probably got fenced // at the same time. } serviceState = HAServiceState.STANDBY; }
/** * Initiate a graceful failover by talking to the target node's ZKFC. * This sends an RPC to the ZKFC, which coordinates the failover. * * @param toNode the node to fail to * @return status code (0 for success) * @throws IOException if failover does not succeed */ private int gracefulFailoverThroughZKFCs(HAServiceTarget toNode) throws IOException { int timeout = FailoverController.getRpcTimeoutToNewActive(getConf()); ZKFCProtocol proxy = toNode.getZKFCProxy(getConf(), timeout); try { proxy.gracefulFailover(); out.println("Failover to " + toNode + " successful"); } catch (ServiceFailedException sfe) { errOut.println("Failover failed: " + sfe.getLocalizedMessage()); return -1; } return 0; }
/** * Try to get the HA state of the node at the given address. This * function is guaranteed to be "quick" -- ie it has a short timeout * and no retries. Its only purpose is to avoid fencing a node that * has already restarted. */ boolean tryGracefulFence(HAServiceTarget svc) { HAServiceProtocol proxy = null; try { proxy = svc.getProxy(gracefulFenceConf, gracefulFenceTimeout); proxy.transitionToStandby(createReqInfo()); return true; } catch (ServiceFailedException sfe) { LOG.warn("Unable to gracefully make {} standby ({})", svc, sfe.getMessage()); } catch (IOException ioe) { LOG.warn("Unable to gracefully make {} standby (unable to connect)", svc, ioe); } finally { if (proxy != null) { RPC.stopProxy(proxy); } } return false; }
private void doFence(HAServiceTarget target) { LOG.info("Should fence: " + target); boolean gracefulWorked = new FailoverController(conf, RequestSource.REQUEST_BY_ZKFC).tryGracefulFence(target); if (gracefulWorked) { // It's possible that it's in standby but just about to go into active, // no? Is there some race here? LOG.info("Successfully transitioned " + target + " to standby " + "state without fencing"); return; } try { target.checkFencingConfigured(); } catch (BadFencingConfigurationException e) { LOG.error("Couldn't fence old active " + target, e); recordActiveAttempt(new ActiveAttemptRecord(false, "Unable to fence old active")); throw new RuntimeException(e); } if (!target.getFencer().fence(target)) { throw new RuntimeException("Unable to fence " + target); } }
private void doCedeActive(int millisToCede) throws AccessControlException, ServiceFailedException, IOException { int timeout = FailoverController.getGracefulFenceTimeout(conf);
try { HAServiceProtocolHelper.transitionToActive(localTarget.getProxy( conf, FailoverController.getRpcTimeoutToNewActive(conf)), createReqInfo()); String msg = "Successfully transitioned " + localTarget +
HAServiceProtocolHelper.monitorHealth(toSvc, createReqInfo()); } catch (HealthCheckFailedException hce) { throw new FailoverFailedException(
Preconditions.checkArgument(fromSvc.getFencer() != null, "failover requires a fencer"); preFailoverChecks(fromSvc, toSvc, forceActive); if (tryGracefulFence(fromSvc)) { tryFence = forceFence; HAServiceProtocolHelper.transitionToActive( toSvc.getProxy(conf, rpcTimeoutToNewActive), createReqInfo()); } catch (ServiceFailedException sfe) { LOG.error("Unable to make " + toSvc + " active (" + failover(toSvc, fromSvc, true, true); } catch (FailoverFailedException ffe) { msg += ". Failback to " + fromSvc +
private void doFailover(HAServiceTarget tgt1, HAServiceTarget tgt2, boolean forceFence, boolean forceActive) throws FailoverFailedException { FailoverController fc = new FailoverController(conf, RequestSource.REQUEST_BY_USER); fc.failover(tgt1, tgt2, forceFence, forceActive); }
private void doFence(HAServiceTarget target) { LOG.info("Should fence: " + target); boolean gracefulWorked = new FailoverController(conf, RequestSource.REQUEST_BY_ZKFC).tryGracefulFence(target); if (gracefulWorked) { // It's possible that it's in standby but just about to go into active, // no? Is there some race here? LOG.info("Successfully transitioned " + target + " to standby " + "state without fencing"); return; } try { target.checkFencingConfigured(); } catch (BadFencingConfigurationException e) { LOG.error("Couldn't fence old active " + target, e); recordActiveAttempt(new ActiveAttemptRecord(false, "Unable to fence old active")); throw new RuntimeException(e); } if (!target.getFencer().fence(target)) { throw new RuntimeException("Unable to fence " + target); } }
public FailoverController(Configuration conf, RequestSource source) { this.conf = conf; this.gracefulFenceConf = new Configuration(conf); this.requestSource = source; this.gracefulFenceTimeout = getGracefulFenceTimeout(conf); this.rpcTimeoutToNewActive = getRpcTimeoutToNewActive(conf); //Configure less retries for graceful fence int gracefulFenceConnectRetries = conf.getInt( CommonConfigurationKeys.HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES, CommonConfigurationKeys.HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES_DEFAULT); gracefulFenceConf.setInt( CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, gracefulFenceConnectRetries); gracefulFenceConf.setInt( CommonConfigurationKeys.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, gracefulFenceConnectRetries); }
int timeout = FailoverController.getGracefulFenceTimeout(conf) * 2;
/** * Initiate a graceful failover by talking to the target node's ZKFC. * This sends an RPC to the ZKFC, which coordinates the failover. * * @param toNode the node to fail to * @return status code (0 for success) * @throws IOException if failover does not succeed */ private int gracefulFailoverThroughZKFCs(HAServiceTarget toNode) throws IOException { int timeout = FailoverController.getRpcTimeoutToNewActive(getConf()); ZKFCProtocol proxy = toNode.getZKFCProxy(getConf(), timeout); try { proxy.gracefulFailover(); out.println("Failover to " + toNode + " successful"); } catch (ServiceFailedException sfe) { errOut.println("Failover failed: " + sfe.getLocalizedMessage()); return -1; } return 0; }
/** * Try to get the HA state of the node at the given address. This * function is guaranteed to be "quick" -- ie it has a short timeout * and no retries. Its only purpose is to avoid fencing a node that * has already restarted. */ boolean tryGracefulFence(HAServiceTarget svc) { HAServiceProtocol proxy = null; try { proxy = svc.getProxy(gracefulFenceConf, gracefulFenceTimeout); proxy.transitionToStandby(createReqInfo()); return true; } catch (ServiceFailedException sfe) { LOG.warn("Unable to gracefully make " + svc + " standby (" + sfe.getMessage() + ")"); } catch (IOException ioe) { LOG.warn("Unable to gracefully make " + svc + " standby (unable to connect)", ioe); } finally { if (proxy != null) { RPC.stopProxy(proxy); } } return false; }
Preconditions.checkArgument(fromSvc.getFencer() != null, "failover requires a fencer"); preFailoverChecks(fromSvc, toSvc, forceActive); if (tryGracefulFence(fromSvc)) { tryFence = forceFence; HAServiceProtocolHelper.transitionToActive( toSvc.getProxy(conf, rpcTimeoutToNewActive), createReqInfo()); } catch (ServiceFailedException sfe) { LOG.error("Unable to make " + toSvc + " active (" + failover(toSvc, fromSvc, true, true); } catch (FailoverFailedException ffe) { msg += ". Failback to " + fromSvc +
private void doFailover(HAServiceTarget tgt1, HAServiceTarget tgt2, boolean forceFence, boolean forceActive) throws FailoverFailedException { FailoverController fc = new FailoverController(conf, RequestSource.REQUEST_BY_USER); fc.failover(tgt1, tgt2, forceFence, forceActive); }