@GET @Path("{clusterId}") public Response getClusterInfo(@PathParam("clusterId") String clusterId) { if (!isClusterExist(clusterId)) { return notFound(); } HelixDataAccessor dataAccessor = getDataAccssor(clusterId); PropertyKey.Builder keyBuilder = dataAccessor.keyBuilder(); Map<String, Object> clusterInfo = new HashMap<>(); clusterInfo.put(Properties.id.name(), clusterId); LiveInstance controller = dataAccessor.getProperty(keyBuilder.controllerLeader()); if (controller != null) { clusterInfo.put(ClusterProperties.controller.name(), controller.getInstanceName()); } else { clusterInfo.put(ClusterProperties.controller.name(), "No Lead Controller!"); } boolean paused = (dataAccessor.getProperty(keyBuilder.pause()) == null ? false : true); clusterInfo.put(ClusterProperties.paused.name(), paused); boolean maintenance = (dataAccessor.getProperty(keyBuilder.maintenance()) == null ? false : true); clusterInfo.put(ClusterProperties.maintenance.name(), maintenance); List<String> idealStates = dataAccessor.getChildNames(keyBuilder.idealStates()); clusterInfo.put(ClusterProperties.resources.name(), idealStates); List<String> instances = dataAccessor.getChildNames(keyBuilder.instanceConfigs()); clusterInfo.put(ClusterProperties.instances.name(), instances); List<String> liveInstances = dataAccessor.getChildNames(keyBuilder.liveInstances()); clusterInfo.put(ClusterProperties.liveInstances.name(), liveInstances); return JSONRepresentation(clusterInfo); }
MaintenanceSignal maintenanceSignal = accessor.getProperty(keyBuilder.maintenance()); _isMaintenanceModeEnabled = maintenanceSignal != null;
MaintenanceSignal maintenanceSignal = accessor.getProperty(keyBuilder.maintenance()); _isMaintenanceModeEnabled = maintenanceSignal != null;
Builder keyBuilder = accessor.keyBuilder(); PauseSignal pauseSignal = accessor.getProperty(keyBuilder.pause()); MaintenanceSignal maintenanceSignal = accessor.getProperty(keyBuilder.maintenance()); _paused = updateControllerState(changeContext, pauseSignal, _paused); _inMaintenanceMode =
Builder keyBuilder = accessor.keyBuilder(); PauseSignal pauseSignal = accessor.getProperty(keyBuilder.pause()); MaintenanceSignal maintenanceSignal = accessor.getProperty(keyBuilder.maintenance()); _paused = updateControllerState(changeContext, pauseSignal, _paused); _inMaintenanceMode =
@Test public void testWithDisabledInstancesLimit() throws Exception { MaintenanceSignal maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance()); Assert.assertNull(maintenanceSignal); checkForRebalanceError(false); HelixAdmin admin = new ZKHelixAdmin(_gZkClient); // disable instance int i; for (i = 2; i < 2 + _maxOfflineInstancesAllowed; i++) { String instance = _participants.get(i).getInstanceName(); admin.enableInstance(CLUSTER_NAME, instance, false); } Thread.sleep(500); maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance()); Assert.assertNull(maintenanceSignal); String instance = _participants.get(i).getInstanceName(); admin.enableInstance(CLUSTER_NAME, instance, false); Thread.sleep(500); maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance()); Assert.assertNotNull(maintenanceSignal); Assert.assertNotNull(maintenanceSignal.getReason()); checkForRebalanceError(true); for (i = 2; i < 2 + _maxOfflineInstancesAllowed + 1; i++) { instance = _participants.get(i).getInstanceName(); admin.enableInstance(CLUSTER_NAME, instance, true); } admin.enableMaintenanceMode(CLUSTER_NAME, false); }
private void validateOfflineInstancesLimit(final ClusterDataCache cache, final HelixManager manager, final ClusterStatusMonitor clusterStatusMonitor) { int maxOfflineInstancesAllowed = cache.getClusterConfig().getMaxOfflineInstancesAllowed(); if (maxOfflineInstancesAllowed >= 0) { int offlineCount = cache.getAllInstances().size() - cache.getEnabledLiveInstances().size(); if (offlineCount > maxOfflineInstancesAllowed) { String errMsg = String.format( "Offline Instances count %d greater than allowed count %d. Stop rebalance pipeline and pause the cluster %s", offlineCount, maxOfflineInstancesAllowed, cache.getClusterName()); if (manager != null) { if (manager.getHelixDataAccessor() .getProperty(manager.getHelixDataAccessor().keyBuilder().maintenance()) == null) { manager.getClusterManagmentTool() .enableMaintenanceMode(manager.getClusterName(), true, errMsg); } } else { LogUtil.logError(logger, _eventId, "Failed to pause cluster, HelixManager is not set!"); } if (!cache.isTaskCache()) { updateRebalanceStatus(true, manager, cache, clusterStatusMonitor, errMsg); } } } }
private boolean validateOfflineInstancesLimit(final ClusterDataCache cache, final HelixManager manager) { int maxOfflineInstancesAllowed = cache.getClusterConfig().getMaxOfflineInstancesAllowed(); if (maxOfflineInstancesAllowed >= 0) { int offlineCount = cache.getAllInstances().size() - cache.getEnabledLiveInstances().size(); if (offlineCount > maxOfflineInstancesAllowed) { String errMsg = String.format( "Offline Instances count %d greater than allowed count %d. Stop rebalance and put the cluster %s into maintenance mode.", offlineCount, maxOfflineInstancesAllowed, cache.getClusterName()); if (manager != null) { if (manager.getHelixDataAccessor() .getProperty(manager.getHelixDataAccessor().keyBuilder().maintenance()) == null) { manager.getClusterManagmentTool() .enableMaintenanceMode(manager.getClusterName(), true, errMsg); LogUtil.logWarn(logger, _eventId, errMsg); } } else { LogUtil.logError(logger, _eventId, "Failed to put cluster " + cache.getClusterName() + " into maintenance mode, HelixManager is not set!"); } return false; } } return true; }
@Test public void testDisableCluster() throws Exception { ConfigAccessor configAccessor = new ConfigAccessor(_gZkClient); ClusterConfig clusterConfig = configAccessor.getClusterConfig(CLUSTER_NAME); clusterConfig.setMaxPartitionsPerInstance(10); configAccessor.setClusterConfig(CLUSTER_NAME, clusterConfig); int i = 0; for (String stateModel : TestStateModels) { String db = "Test-DB-" + i++; createResourceWithDelayedRebalance(CLUSTER_NAME, db, stateModel, _PARTITIONS, _replica, _replica, -1); _testDBs.add(db); } Thread.sleep(100); Assert.assertTrue(_clusterVerifier.verifyByPolling()); MaintenanceSignal maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance()); Assert.assertNull(maintenanceSignal); for (i = 2; i < NUM_NODE; i++) { _participants.get(i).syncStop(); } Thread.sleep(500); maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance()); Assert.assertNotNull(maintenanceSignal); Assert.assertNotNull(maintenanceSignal.getReason()); }
@Test (dependsOnMethods = "testWithDisabledInstancesLimit") public void testWithOfflineInstancesLimit() throws Exception { MaintenanceSignal maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance()); Assert.assertNull(maintenanceSignal); checkForRebalanceError(false); int i; for (i = 2; i < 2 + _maxOfflineInstancesAllowed; i++) { _participants.get(i).syncStop(); } Thread.sleep(500); maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance()); Assert.assertNull(maintenanceSignal); _participants.get(i).syncStop(); Thread.sleep(500); maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance()); Assert.assertNotNull(maintenanceSignal); Assert.assertNotNull(maintenanceSignal.getReason()); // Verify there is rebalance error logged checkForRebalanceError(true); }
@Override public void enableMaintenanceMode(String clusterName, boolean enabled, String reason) { logger.info("Cluster {} {} maintenance mode for reason {}.", enabled ? "enters" : "exits", clusterName, reason == null ? "NULL" : reason); HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient)); Builder keyBuilder = accessor.keyBuilder(); if (!enabled) { accessor.removeProperty(keyBuilder.maintenance()); } else { MaintenanceSignal maintenanceSignal = new MaintenanceSignal("maintenance"); if (reason != null) { maintenanceSignal.setReason(reason); } if (!accessor.createMaintenance(maintenanceSignal)) { throw new HelixException("Failed to create maintenance signal"); } } }
@Override public void enableMaintenanceMode(String clusterName, boolean enabled, String reason) { logger.info("Cluster {} {} maintenance mode for reason {}.", enabled ? "enters" : "exits", clusterName, reason == null ? "NULL" : reason); HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient)); Builder keyBuilder = accessor.keyBuilder(); if (!enabled) { accessor.removeProperty(keyBuilder.maintenance()); } else { MaintenanceSignal maintenanceSignal = new MaintenanceSignal("maintenance"); if (reason != null) { maintenanceSignal.setReason(reason); } if (!accessor.createMaintenance(maintenanceSignal)) { throw new HelixException("Failed to create maintenance signal"); } } }