private void setNewFencingToken(JobMasterId newJobMasterId) { if (getFencingToken() != null) { log.info("Restarting old job with JobMasterId {}. The new JobMasterId is {}.", getFencingToken(), newJobMasterId); } // set new leader id setFencingToken(newJobMasterId); }
private void setNewFencingToken(JobMasterId newJobMasterId) { if (getFencingToken() != null) { log.info("Restarting old job with JobMasterId {}. The new JobMasterId is {}.", getFencingToken(), newJobMasterId); // first we have to suspend the current execution suspendExecution(new FlinkException("Old job with JobMasterId " + getFencingToken() + " is restarted with a new JobMasterId " + newJobMasterId + '.')); } // set new leader id setFencingToken(newJobMasterId); }
private void setNewFencingToken(JobMasterId newJobMasterId) { if (getFencingToken() != null) { log.info("Restarting old job with JobMasterId {}. The new JobMasterId is {}.", getFencingToken(), newJobMasterId); // first we have to suspend the current execution suspendExecution(new FlinkException("Old job with JobMasterId " + getFencingToken() + " is restarted with a new JobMasterId " + newJobMasterId + '.')); } // set new leader id setFencingToken(newJobMasterId); }
/** * Suspending job, all the running tasks will be cancelled, and communication with other components * will be disposed. * * <p>Mostly job is suspended because of the leadership has been revoked, one can be restart this job by * calling the {@link #start(JobMasterId, Time)} method once we take the leadership back again. * * @param cause The reason of why this job been suspended. */ private Acknowledge suspendExecution(final Exception cause) { validateRunsInMainThread(); if (getFencingToken() == null) { log.debug("Job has already been suspended or shutdown."); return Acknowledge.get(); } // not leader anymore --> set the JobMasterId to null setFencingToken(null); try { resourceManagerLeaderRetriever.stop(); } catch (Throwable t) { log.warn("Failed to stop resource manager leader retriever when suspending.", t); } suspendAndClearExecutionGraphFields(cause); // the slot pool stops receiving messages and clears its pooled slots slotPoolGateway.suspend(); // disconnect from resource manager: closeResourceManagerConnection(cause); return Acknowledge.get(); }
/** * Suspending job, all the running tasks will be cancelled, and communication with other components * will be disposed. * * <p>Mostly job is suspended because of the leadership has been revoked, one can be restart this job by * calling the {@link #start(JobMasterId, Time)} method once we take the leadership back again. * * @param cause The reason of why this job been suspended. */ private Acknowledge suspendExecution(final Exception cause) { validateRunsInMainThread(); if (getFencingToken() == null) { log.debug("Job has already been suspended or shutdown."); return Acknowledge.get(); } // not leader anymore --> set the JobMasterId to null setFencingToken(null); try { resourceManagerLeaderRetriever.stop(); } catch (Throwable t) { log.warn("Failed to stop resource manager leader retriever when suspending.", t); } suspendAndClearExecutionGraphFields(cause); // the slot pool stops receiving messages and clears its pooled slots slotPoolGateway.suspend(); // disconnect from resource manager: closeResourceManagerConnection(cause); return Acknowledge.get(); }
/** * Suspending job, and communication with other components will be disposed. * * <p>Mostly job is suspended without cancelling running tasks because of the leadership has been revoked, * the one who takes the leadership can take over the control. * * @param cause The reason of why this job been suspended. */ private Acknowledge suspendExecution(final Exception cause) { validateRunsInMainThread(); if (getFencingToken() == null) { log.debug("Job has already been suspended or shutdown."); return Acknowledge.get(); } // not leader anymore --> set the JobMasterId to null setFencingToken(null); try { resourceManagerLeaderRetriever.stop(); } catch (Throwable t) { log.warn("Failed to stop resource manager leader retriever when suspending.", t); } suspendAndClearExecutionGraphFields(cause); // flush the operation logs. operationLogManager.stop(); // the slot pool stops receiving messages and clears its pooled slots. slotPoolGateway.suspend(); // disconnect from resource manager: closeResourceManagerConnection(cause); return Acknowledge.get(); }