@Override public GracefulFailoverResponseProto gracefulFailover( RpcController controller, GracefulFailoverRequestProto request) throws ServiceException { try { server.gracefulFailover(); return GracefulFailoverResponseProto.getDefaultInstance(); } catch (IOException e) { throw new ServiceException(e); } }
/** * Initiate a graceful failover by talking to the target node's ZKFC. * This sends an RPC to the ZKFC, which coordinates the failover. * * @param toNode the node to fail to * @return status code (0 for success) * @throws IOException if failover does not succeed */ private int gracefulFailoverThroughZKFCs(HAServiceTarget toNode) throws IOException { int timeout = FailoverController.getRpcTimeoutToNewActive(getConf()); ZKFCProtocol proxy = toNode.getZKFCProxy(getConf(), timeout); try { proxy.gracefulFailover(); out.println("Failover to " + toNode + " successful"); } catch (ServiceFailedException sfe) { errOut.println("Failover failed: " + sfe.getLocalizedMessage()); return -1; } return 0; }
@Override public GracefulFailoverResponseProto gracefulFailover( RpcController controller, GracefulFailoverRequestProto request) throws ServiceException { try { server.gracefulFailover(); return GracefulFailoverResponseProto.getDefaultInstance(); } catch (IOException e) { throw new ServiceException(e); } }
@Override public GracefulFailoverResponseProto gracefulFailover( RpcController controller, GracefulFailoverRequestProto request) throws ServiceException { try { server.gracefulFailover(); return GracefulFailoverResponseProto.getDefaultInstance(); } catch (IOException e) { throw new ServiceException(e); } }
@Override public GracefulFailoverResponseProto gracefulFailover( RpcController controller, GracefulFailoverRequestProto request) throws ServiceException { try { server.gracefulFailover(); return GracefulFailoverResponseProto.getDefaultInstance(); } catch (IOException e) { throw new ServiceException(e); } }
@Override public GracefulFailoverResponseProto gracefulFailover( RpcController controller, GracefulFailoverRequestProto request) throws ServiceException { try { server.gracefulFailover(); return GracefulFailoverResponseProto.getDefaultInstance(); } catch (IOException e) { throw new ServiceException(e); } }
/** * Initiate a graceful failover by talking to the target node's ZKFC. * This sends an RPC to the ZKFC, which coordinates the failover. * * @param toNode the node to fail to * @return status code (0 for success) * @throws IOException if failover does not succeed */ private int gracefulFailoverThroughZKFCs(HAServiceTarget toNode) throws IOException { int timeout = FailoverController.getRpcTimeoutToNewActive(getConf()); ZKFCProtocol proxy = toNode.getZKFCProxy(getConf(), timeout); try { proxy.gracefulFailover(); out.println("Failover to " + toNode + " successful"); } catch (ServiceFailedException sfe) { errOut.println("Failover failed: " + sfe.getLocalizedMessage()); return -1; } return 0; }
/** * Initiate a graceful failover by talking to the target node's ZKFC. * This sends an RPC to the ZKFC, which coordinates the failover. * * @param toNode the node to fail to * @return status code (0 for success) * @throws IOException if failover does not succeed */ private int gracefulFailoverThroughZKFCs(HAServiceTarget toNode) throws IOException { int timeout = FailoverController.getRpcTimeoutToNewActive(getConf()); ZKFCProtocol proxy = toNode.getZKFCProxy(getConf(), timeout); try { proxy.gracefulFailover(); out.println("Failover to " + toNode + " successful"); } catch (ServiceFailedException sfe) { errOut.println("Failover failed: " + sfe.getLocalizedMessage()); return -1; } return 0; }
/** * Initiate a graceful failover by talking to the target node's ZKFC. * This sends an RPC to the ZKFC, which coordinates the failover. * * @param toNode the node to fail to * @return status code (0 for success) * @throws IOException if failover does not succeed */ private int gracefulFailoverThroughZKFCs(HAServiceTarget toNode) throws IOException { int timeout = FailoverController.getRpcTimeoutToNewActive(getConf()); ZKFCProtocol proxy = toNode.getZKFCProxy(getConf(), timeout); try { proxy.gracefulFailover(); out.println("Failover to " + toNode + " successful"); } catch (ServiceFailedException sfe) { errOut.println("Failover failed: " + sfe.getLocalizedMessage()); return -1; } return 0; }
/** * Initiate a graceful failover by talking to the target node's ZKFC. * This sends an RPC to the ZKFC, which coordinates the failover. * * @param toNode the node to fail to * @return status code (0 for success) * @throws IOException if failover does not succeed */ private int gracefulFailoverThroughZKFCs(HAServiceTarget toNode) throws IOException { int timeout = FailoverController.getRpcTimeoutToNewActive(getConf()); ZKFCProtocol proxy = toNode.getZKFCProxy(getConf(), timeout); try { proxy.gracefulFailover(); out.println("Failover to " + toNode + " successful"); } catch (ServiceFailedException sfe) { errOut.println("Failover failed: " + sfe.getLocalizedMessage()); return -1; } return 0; }
@Test public void testFailoverWithAutoHa() throws Exception { Mockito.doReturn(STANDBY_READY_RESULT).when(mockProtocol).getServiceStatus(); // Turn on auto-HA in the config HdfsConfiguration conf = getHAConf(); conf.setBoolean(DFSConfigKeys.DFS_HA_AUTO_FAILOVER_ENABLED_KEY, true); conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY, getFencerTrueCommand()); tool.setConf(conf); assertEquals(0, runTool("-failover", "nn1", "nn2")); Mockito.verify(mockZkfcProtocol).gracefulFailover(); }
@Test(timeout=30000) public void testManualFailover() throws Exception { thr2.zkfc.getLocalTarget().getZKFCProxy(conf, 15000).gracefulFailover(); waitForHAState(0, HAServiceState.STANDBY); waitForHAState(1, HAServiceState.ACTIVE); thr1.zkfc.getLocalTarget().getZKFCProxy(conf, 15000).gracefulFailover(); waitForHAState(0, HAServiceState.ACTIVE); waitForHAState(1, HAServiceState.STANDBY); }
@Test public void testGracefulFailover() throws Exception { cluster.start(); cluster.waitForActiveLockHolder(0); cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover(); cluster.waitForActiveLockHolder(1); cluster.getService(0).getZKFCProxy(conf, 5000).gracefulFailover(); cluster.waitForActiveLockHolder(0); Thread.sleep(10000); // allow to quiesce assertEquals(0, cluster.getService(0).fenceCount); assertEquals(0, cluster.getService(1).fenceCount); assertEquals(2, cluster.getService(0).activeTransitionCount); assertEquals(1, cluster.getService(1).activeTransitionCount); }
@Test public void testGracefulFailoverFailBecomingStandby() throws Exception { cluster.start(); cluster.waitForActiveLockHolder(0); // Ask for failover when old node fails to transition to standby. // This should trigger fencing, since the cedeActive() command // still works, but leaves the breadcrumb in place. cluster.setFailToBecomeStandby(0, true); cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover(); // Check that the old node was fenced assertEquals(1, cluster.getService(0).fenceCount); }
@Test(timeout=25000) public void testGracefulFailover() throws Exception { try { cluster.start(); cluster.waitForActiveLockHolder(0); cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover(); cluster.waitForActiveLockHolder(1); cluster.getService(0).getZKFCProxy(conf, 5000).gracefulFailover(); cluster.waitForActiveLockHolder(0); Thread.sleep(10000); // allow to quiesce assertEquals(0, cluster.getService(0).fenceCount); assertEquals(0, cluster.getService(1).fenceCount); assertEquals(2, cluster.getService(0).activeTransitionCount); assertEquals(1, cluster.getService(1).activeTransitionCount); } finally { cluster.stop(); } }
@Test(timeout=15000) public void testGracefulFailoverFailBecomingStandby() throws Exception { try { cluster.start(); cluster.waitForActiveLockHolder(0); // Ask for failover when old node fails to transition to standby. // This should trigger fencing, since the cedeActive() command // still works, but leaves the breadcrumb in place. cluster.setFailToBecomeStandby(0, true); cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover(); // Check that the old node was fenced assertEquals(1, cluster.getService(0).fenceCount); } finally { cluster.stop(); } }
@Test public void testGracefulFailoverFailBecomingStandbyAndFailFence() throws Exception { cluster.start(); cluster.waitForActiveLockHolder(0); // Ask for failover when old node fails to transition to standby. // This should trigger fencing, since the cedeActive() command // still works, but leaves the breadcrumb in place. cluster.setFailToBecomeStandby(0, true); cluster.setFailToFence(0, true); try { cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover(); fail("Failover should have failed when old node wont fence"); } catch (ServiceFailedException sfe) { GenericTestUtils.assertExceptionContains( "Unable to fence " + cluster.getService(0), sfe); } }
@Test public void testGracefulFailoverToUnhealthy() throws Exception { cluster.start(); cluster.waitForActiveLockHolder(0); // Mark it unhealthy, wait for it to exit election cluster.setHealthy(1, false); cluster.waitForElectorState(1, ActiveStandbyElector.State.INIT); // Ask for failover, it should fail, because it's unhealthy try { cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover(); fail("Did not fail to graceful failover to unhealthy service!"); } catch (ServiceFailedException sfe) { GenericTestUtils.assertExceptionContains( cluster.getService(1).toString() + " is not currently healthy.", sfe); } }
@Test(timeout=15000) public void testGracefulFailoverToUnhealthy() throws Exception { try { cluster.start(); cluster.waitForActiveLockHolder(0); // Mark it unhealthy, wait for it to exit election cluster.setHealthy(1, false); cluster.waitForElectorState(1, ActiveStandbyElector.State.INIT); // Ask for failover, it should fail, because it's unhealthy try { cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover(); fail("Did not fail to graceful failover to unhealthy service!"); } catch (ServiceFailedException sfe) { GenericTestUtils.assertExceptionContains( cluster.getService(1).toString() + " is not currently healthy.", sfe); } } finally { cluster.stop(); } }
@Test public void testGracefulFailoverFailBecomingActive() throws Exception { cluster.start(); cluster.waitForActiveLockHolder(0); cluster.setFailToBecomeActive(1, true); // Ask for failover, it should fail and report back to user. try { cluster.getService(1).getZKFCProxy(conf, 5000).gracefulFailover(); fail("Did not fail to graceful failover when target failed " + "to become active!"); } catch (ServiceFailedException sfe) { GenericTestUtils.assertExceptionContains( "Couldn't make " + cluster.getService(1) + " active", sfe); GenericTestUtils.assertExceptionContains( "injected failure", sfe); } // No fencing assertEquals(0, cluster.getService(0).fenceCount); assertEquals(0, cluster.getService(1).fenceCount); // Service 0 should go back to being active after the failed failover cluster.waitForActiveLockHolder(0); }