private int doRun(String[] args) throws Exception { try { initZK(); } catch (KeeperException ke) { LOG.error("Unable to start failover controller. Unable to connect " interactive = false; } else { badArg(args[i]); return formatZK(force, interactive); badArg(args[0]); initRPC(); initHM(); startRPC(); mainLoop(); } catch (Exception e) { LOG.error("The failover controller encounters runtime error: ", e);
/** * Ensure that the local node is in a healthy state, and thus * eligible for graceful failover. * @throws ServiceFailedException if the node is unhealthy */ private synchronized void checkEligibleForFailover() throws ServiceFailedException { // Check health if (this.getLastHealthState() != State.SERVICE_HEALTHY) { throw new ServiceFailedException( localTarget + " is not currently healthy. " + "Cannot be failover target"); } }
@Override public void cedeActive(int millisToCede) throws IOException, AccessControlException { zkfc.checkRpcAdminAccess(); zkfc.cedeActive(millisToCede); }
@Override public void gracefulFailover() throws IOException, AccessControlException { zkfc.checkRpcAdminAccess(); zkfc.gracefulFailoverToYou(); }
private synchronized void fenceOldActive(byte[] data) { HAServiceTarget target = dataToTarget(data); try { doFence(target); } catch (Throwable t) { recordActiveAttempt(new ActiveAttemptRecord(false, "Unable to fence old active: " + StringUtils.stringifyException(t))); throw t; } }
checkEligibleForFailover(); HAServiceTarget oldActive = getCurrentActive(); if (oldActive == null) { ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000);
checkEligibleForFailover(); HAServiceTarget oldActive = getCurrentActive(); if (oldActive == null) { List<HAServiceTarget> otherNodes = getAllOtherNodes(); List<ZKFCProtocol> otherZkfcs = new ArrayList<ZKFCProtocol>(otherNodes.size()); continue; otherZkfcs.add(cedeRemoteActive(remote, timeout)); otherZkfcs.add(cedeRemoteActive(activeNode, timeout)); ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000);
if (millisToCede <= 0) { delayJoiningUntilNanotime = 0; recheckElectability(); return; boolean needFence = false; try { localTarget.getProxy(conf, timeout).transitionToStandby(createReqInfo()); LOG.info("Successfully ensured local node is in standby mode"); } catch (IOException ioe) { recheckElectability();
HAServiceProtocolHelper.transitionToActive(localTarget.getProxy( conf, FailoverController.getRpcTimeoutToNewActive(conf)), createReqInfo()); String msg = "Successfully transitioned " + localTarget + " to active state"; LOG.info(msg); serviceState = HAServiceState.ACTIVE; recordActiveAttempt(new ActiveAttemptRecord(true, msg)); LOG.error(msg, t); recordActiveAttempt(new ActiveAttemptRecord(false, msg + "\n" + StringUtils.stringifyException(t)));
@Override protected void initRPC() throws IOException { super.initRPC(); localNNTarget.setZkfcPort(rpcServer.getAddress().getPort()); }
private int formatZK(boolean force, boolean interactive) throws IOException, InterruptedException, KeeperException { if (elector.parentZNodeExists()) { if (!force && (!interactive || !confirmFormat())) { return ERR_CODE_FORMAT_DENIED; } try { elector.clearParentZNode(); } catch (IOException e) { LOG.error("Unable to clear zk parent znode", e); return 1; } } elector.ensureParentZNode(); return 0; }
private synchronized void becomeStandby() { LOG.info("ZK Election indicated that " + localTarget + " should become standby"); try { int timeout = FailoverController.getGracefulFenceTimeout(conf); localTarget.getProxy(conf, timeout).transitionToStandby(createReqInfo()); LOG.info("Successfully transitioned " + localTarget + " to standby state"); } catch (Exception e) { LOG.error("Couldn't transition " + localTarget + " to standby state", e); // TODO handle this. It's a likely case since we probably got fenced // at the same time. } serviceState = HAServiceState.STANDBY; }
/** * @return an {@link HAServiceTarget} for the current active node * in the cluster, or null if no node is active. * @throws IOException if a ZK-related issue occurs * @throws InterruptedException if thread is interrupted */ private HAServiceTarget getCurrentActive() throws IOException, InterruptedException { synchronized (elector) { synchronized (this) { byte[] activeData; try { activeData = elector.getActiveData(); } catch (ActiveNotFoundException e) { return null; } catch (KeeperException ke) { throw new IOException( "Unexpected ZooKeeper issue fetching active node info", ke); } HAServiceTarget oldActive = dataToTarget(activeData); return oldActive; } } }
private synchronized void fenceOldActive(byte[] data) { HAServiceTarget target = dataToTarget(data); try { doFence(target); } catch (Throwable t) { recordActiveAttempt(new ActiveAttemptRecord(false, "Unable to fence old active: " + StringUtils.stringifyException(t))); Throwables.propagate(t); } }
checkEligibleForFailover(); HAServiceTarget oldActive = getCurrentActive(); if (oldActive == null) { ActiveAttemptRecord attempt = waitForActiveAttempt(timeout + 60000);
@Override public void gracefulFailover() throws IOException, AccessControlException { zkfc.checkRpcAdminAccess(); zkfc.gracefulFailoverToYou(); }
if (millisToCede <= 0) { delayJoiningUntilNanotime = 0; recheckElectability(); return; boolean needFence = false; try { localTarget.getProxy(conf, timeout).transitionToStandby(createReqInfo()); LOG.info("Successfully ensured local node is in standby mode"); } catch (IOException ioe) { recheckElectability();
HAServiceProtocolHelper.transitionToActive(localTarget.getProxy( conf, FailoverController.getRpcTimeoutToNewActive(conf)), createReqInfo()); String msg = "Successfully transitioned " + localTarget + " to active state"; LOG.info(msg); serviceState = HAServiceState.ACTIVE; recordActiveAttempt(new ActiveAttemptRecord(true, msg)); LOG.fatal(msg, t); recordActiveAttempt(new ActiveAttemptRecord(false, msg + "\n" + StringUtils.stringifyException(t)));
@Override protected void initRPC() throws IOException { super.initRPC(); localNNTarget.setZkfcPort(rpcServer.getAddress().getPort()); }
private int formatZK(boolean force, boolean interactive) throws IOException, InterruptedException { if (elector.parentZNodeExists()) { if (!force && (!interactive || !confirmFormat())) { return ERR_CODE_FORMAT_DENIED; } try { elector.clearParentZNode(); } catch (IOException e) { LOG.error("Unable to clear zk parent znode", e); return 1; } } elector.ensureParentZNode(); return 0; }