/** * Suspending job, all the running tasks will be cancelled, and communication with other components * will be disposed. * * <p>Mostly job is suspended because of the leadership has been revoked, one can be restart this job by * calling the {@link #start(JobMasterId, Time)} method once we take the leadership back again. * * <p>This method is executed asynchronously * * @param cause The reason of why this job been suspended. * @param timeout for this operation * @return Future acknowledge indicating that the job has been suspended. Otherwise the future contains an exception */ public CompletableFuture<Acknowledge> suspend(final Exception cause, final Time timeout) { CompletableFuture<Acknowledge> suspendFuture = callAsyncWithoutFencing(() -> suspendExecution(cause), timeout); stop(); return suspendFuture; }
/** * Suspending job, all the running tasks will be cancelled, and communication with other components * will be disposed. * * <p>Mostly job is suspended because of the leadership has been revoked, one can be restart this job by * calling the {@link #start(JobMasterId, Time)} method once we take the leadership back again. * * <p>This method is executed asynchronously * * @param cause The reason of why this job been suspended. * @param timeout for this operation * @return Future acknowledge indicating that the job has been suspended. Otherwise the future contains an exception */ public CompletableFuture<Acknowledge> suspend(final Exception cause, final Time timeout) { CompletableFuture<Acknowledge> suspendFuture = callAsyncWithoutFencing(() -> suspendExecution(cause), timeout); stop(); return suspendFuture; }
/** * Suspending job, all the running tasks will be cancelled, and communication with other components * will be disposed. * * <p>Mostly job is suspended because of the leadership has been revoked, one can be restart this job by * calling the {@link #start(JobMasterId, Time)} method once we take the leadership back again. * * <p>This method is executed asynchronously * * @param cause The reason of why this job been suspended. * @param timeout for this operation * @return Future acknowledge indicating that the job has been suspended. Otherwise the future contains an exception */ public CompletableFuture<Acknowledge> suspend(final Exception cause, final Time timeout) { CompletableFuture<Acknowledge> suspendFuture = callAsyncWithoutFencing(() -> suspendExecution(cause), timeout); stop(); return suspendFuture; }
private void setNewFencingToken(JobMasterId newJobMasterId) { if (getFencingToken() != null) { log.info("Restarting old job with JobMasterId {}. The new JobMasterId is {}.", getFencingToken(), newJobMasterId); // first we have to suspend the current execution suspendExecution(new FlinkException("Old job with JobMasterId " + getFencingToken() + " is restarted with a new JobMasterId " + newJobMasterId + '.')); } // set new leader id setFencingToken(newJobMasterId); }
private void setNewFencingToken(JobMasterId newJobMasterId) { if (getFencingToken() != null) { log.info("Restarting old job with JobMasterId {}. The new JobMasterId is {}.", getFencingToken(), newJobMasterId); // first we have to suspend the current execution suspendExecution(new FlinkException("Old job with JobMasterId " + getFencingToken() + " is restarted with a new JobMasterId " + newJobMasterId + '.')); } // set new leader id setFencingToken(newJobMasterId); }
/** * Suspend the job and shutdown all other services including rpc. */ @Override public CompletableFuture<Void> postStop() { log.info("Stopping the JobMaster for job {}({}).", jobGraph.getName(), jobGraph.getJobID()); // disconnect from all registered TaskExecutors final Set<ResourceID> taskManagerResourceIds = new HashSet<>(registeredTaskManagers.keySet()); final FlinkException cause = new FlinkException("Stopping JobMaster for job " + jobGraph.getName() + '(' + jobGraph.getJobID() + ")."); for (ResourceID taskManagerResourceId : taskManagerResourceIds) { disconnectTaskManager(taskManagerResourceId, cause); } taskManagerHeartbeatManager.stop(); resourceManagerHeartbeatManager.stop(); // make sure there is a graceful exit suspendExecution(new FlinkException("JobManager is shutting down.")); // shut down will internally release all registered slots slotPool.shutDown(); final CompletableFuture<Void> disposeInternalSavepointFuture; if (lastInternalSavepoint != null) { disposeInternalSavepointFuture = CompletableFuture.runAsync(() -> disposeSavepoint(lastInternalSavepoint)); } else { disposeInternalSavepointFuture = CompletableFuture.completedFuture(null); } final CompletableFuture<Void> slotPoolTerminationFuture = slotPool.getTerminationFuture(); return FutureUtils.completeAll(Arrays.asList(disposeInternalSavepointFuture, slotPoolTerminationFuture)); }
/** * Suspend the job and shutdown all other services including rpc. */ @Override public CompletableFuture<Void> postStop() { log.info("Stopping the JobMaster for job {}({}).", jobGraph.getName(), jobGraph.getJobID()); // disconnect from all registered TaskExecutors final Set<ResourceID> taskManagerResourceIds = new HashSet<>(registeredTaskManagers.keySet()); final FlinkException cause = new FlinkException("Stopping JobMaster for job " + jobGraph.getName() + '(' + jobGraph.getJobID() + ")."); for (ResourceID taskManagerResourceId : taskManagerResourceIds) { disconnectTaskManager(taskManagerResourceId, cause); } taskManagerHeartbeatManager.stop(); resourceManagerHeartbeatManager.stop(); // make sure there is a graceful exit suspendExecution(new FlinkException("JobManager is shutting down.")); // shut down will internally release all registered slots slotPool.shutDown(); final CompletableFuture<Void> disposeInternalSavepointFuture; if (lastInternalSavepoint != null) { disposeInternalSavepointFuture = CompletableFuture.runAsync(() -> disposeSavepoint(lastInternalSavepoint)); } else { disposeInternalSavepointFuture = CompletableFuture.completedFuture(null); } final CompletableFuture<Void> slotPoolTerminationFuture = slotPool.getTerminationFuture(); return FutureUtils.completeAll(Arrays.asList(disposeInternalSavepointFuture, slotPoolTerminationFuture)); }
suspendExecution(new FlinkException("JobManager is shutting down."));