@Override public void declineCheckpoint(DeclineCheckpoint decline) { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator != null) { getRpcService().execute(() -> { try { checkpointCoordinator.receiveDeclineMessage(decline); } catch (Exception e) { log.error("Error in CheckpointCoordinator while processing {}", decline, e); } }); } else { String errorMessage = "Received DeclineCheckpoint message for job {} with no CheckpointCoordinator"; if (executionGraph.getState() == JobStatus.RUNNING) { log.error(errorMessage, jobGraph.getJobID()); } else { log.debug(errorMessage, jobGraph.getJobID()); } } }
@Override public void declineCheckpoint(DeclineCheckpoint decline) { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator != null) { getRpcService().execute(() -> { try { checkpointCoordinator.receiveDeclineMessage(decline); } catch (Exception e) { log.error("Error in CheckpointCoordinator while processing {}", decline, e); } }); } else { String errorMessage = "Received DeclineCheckpoint message for job {} with no CheckpointCoordinator"; if (executionGraph.getState() == JobStatus.RUNNING) { log.error(errorMessage, jobGraph.getJobID()); } else { log.debug(errorMessage, jobGraph.getJobID()); } } }
@Override public void declineCheckpoint( final JobID jobID, final ExecutionAttemptID executionAttemptID, final long checkpointID, final Throwable reason) { final DeclineCheckpoint decline = new DeclineCheckpoint( jobID, executionAttemptID, checkpointID, reason); final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator != null) { getRpcService().execute(() -> { try { checkpointCoordinator.receiveDeclineMessage(decline); } catch (Exception e) { log.error("Error in CheckpointCoordinator while processing {}", decline, e); } }); } else { log.error("Received DeclineCheckpoint message for job {} with no CheckpointCoordinator", jobGraph.getJobID()); } }
@Override public void acknowledgeCheckpoint( final JobID jobID, final ExecutionAttemptID executionAttemptID, final long checkpointId, final CheckpointMetrics checkpointMetrics, final TaskStateSnapshot checkpointState) { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); final AcknowledgeCheckpoint ackMessage = new AcknowledgeCheckpoint( jobID, executionAttemptID, checkpointId, checkpointMetrics, checkpointState); if (checkpointCoordinator != null) { getRpcService().execute(() -> { try { checkpointCoordinator.receiveAcknowledgeMessage(ackMessage); } catch (Throwable t) { log.warn("Error while processing checkpoint acknowledgement message"); } }); } else { log.error("Received AcknowledgeCheckpoint message for job {} with no CheckpointCoordinator", jobGraph.getJobID()); } }
@Override public CompletableFuture<String> triggerSavepoint( @Nullable final String targetDirectory, final boolean cancelJob, final Time timeout) { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator == null) { return FutureUtils.completedExceptionally(new IllegalStateException( String.format("Job %s is not a streaming job.", jobGraph.getJobID()))); } if (cancelJob) { checkpointCoordinator.stopCheckpointScheduler(); } return checkpointCoordinator .triggerSavepoint(System.currentTimeMillis(), targetDirectory) .thenApply(CompletedCheckpoint::getExternalPointer) .thenApplyAsync(path -> { if (cancelJob) { log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID()); cancel(timeout); } return path; }, getMainThreadExecutor()) .exceptionally(throwable -> { if (cancelJob) { startCheckpointScheduler(checkpointCoordinator); } throw new CompletionException(throwable); }); }
@RpcMethod public void declineCheckpoint( final JobID jobID, final ExecutionAttemptID executionAttemptID, final long checkpointID, final Throwable reason) { final DeclineCheckpoint decline = new DeclineCheckpoint( jobID, executionAttemptID, checkpointID, reason); final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator != null) { getRpcService().execute(new Runnable() { @Override public void run() { try { checkpointCoordinator.receiveDeclineMessage(decline); } catch (Exception e) { log.error("Error in CheckpointCoordinator while processing {}", decline, e); } } }); } else { log.error("Received DeclineCheckpoint message for job {} with no CheckpointCoordinator", jobGraph.getJobID()); } }
final TaskStateSnapshot checkpointState) { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); final AcknowledgeCheckpoint ackMessage = new AcknowledgeCheckpoint( jobID,
@RpcMethod public void acknowledgeCheckpoint( final JobID jobID, final ExecutionAttemptID executionAttemptID, final long checkpointId, final CheckpointMetrics checkpointMetrics, final SubtaskState checkpointState) throws CheckpointException { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); final AcknowledgeCheckpoint ackMessage = new AcknowledgeCheckpoint(jobID, executionAttemptID, checkpointId, checkpointMetrics, checkpointState); if (checkpointCoordinator != null) { getRpcService().execute(new Runnable() { @Override public void run() { try { checkpointCoordinator.receiveAcknowledgeMessage(ackMessage); } catch (Throwable t) { log.warn("Error while processing checkpoint acknowledgement message"); } } }); } else { log.error("Received AcknowledgeCheckpoint message for job {} with no CheckpointCoordinator", jobGraph.getJobID()); } }
final TaskStateSnapshot checkpointState) { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); final AcknowledgeCheckpoint ackMessage = new AcknowledgeCheckpoint( jobID,
private ExecutionGraph createAndRestoreExecutionGraph(JobManagerJobMetricGroup currentJobManagerJobMetricGroup) throws Exception { ExecutionGraph newExecutionGraph = createExecutionGraph(currentJobManagerJobMetricGroup); final CheckpointCoordinator checkpointCoordinator = newExecutionGraph.getCheckpointCoordinator(); if (checkpointCoordinator != null) { // check whether we find a valid checkpoint if (!checkpointCoordinator.restoreLatestCheckpointedState( newExecutionGraph.getAllVertices(), false, false)) { // check whether we can restore from a savepoint tryRestoreExecutionGraphFromSavepoint(newExecutionGraph, jobGraph.getSavepointRestoreSettings()); } } return newExecutionGraph; }
private ExecutionGraph createAndRestoreExecutionGraph(JobManagerJobMetricGroup currentJobManagerJobMetricGroup) throws Exception { ExecutionGraph newExecutionGraph = createExecutionGraph(currentJobManagerJobMetricGroup); final CheckpointCoordinator checkpointCoordinator = newExecutionGraph.getCheckpointCoordinator(); if (checkpointCoordinator != null) { // check whether we find a valid checkpoint if (!checkpointCoordinator.restoreLatestCheckpointedState( newExecutionGraph.getAllVertices(), false, false)) { // check whether we can restore from a savepoint tryRestoreExecutionGraphFromSavepoint(newExecutionGraph, jobGraph.getSavepointRestoreSettings()); } } return newExecutionGraph; }
private ExecutionGraph createAndRestoreExecutionGraph( JobGraph jobGraph, JobManagerJobMetricGroup currentJobManagerJobMetricGroup) throws Exception { ExecutionGraph newExecutionGraph = createExecutionGraph(jobGraph, currentJobManagerJobMetricGroup); final CheckpointCoordinator checkpointCoordinator = newExecutionGraph.getCheckpointCoordinator(); if (checkpointCoordinator != null) { // check whether we find a valid checkpoint if (!checkpointCoordinator.restoreLatestCheckpointedState( newExecutionGraph.getAllVertices(), false, false)) { // check whether we can restore from a savepoint tryRestoreExecutionGraphFromSavepoint(newExecutionGraph, jobGraph.getSavepointRestoreSettings()); } } return newExecutionGraph; }
final Time timeout) { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator == null) { return FutureUtils.completedExceptionally(new IllegalStateException(
final Time timeout) { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator == null) { return FutureUtils.completedExceptionally(new IllegalStateException(
/** * Tries to restore the given {@link ExecutionGraph} from the provided {@link SavepointRestoreSettings}. * * @param executionGraphToRestore {@link ExecutionGraph} which is supposed to be restored * @param savepointRestoreSettings {@link SavepointRestoreSettings} containing information about the savepoint to restore from * @throws Exception if the {@link ExecutionGraph} could not be restored */ private void tryRestoreExecutionGraphFromSavepoint(ExecutionGraph executionGraphToRestore, SavepointRestoreSettings savepointRestoreSettings) throws Exception { if (savepointRestoreSettings.restoreSavepoint()) { final CheckpointCoordinator checkpointCoordinator = executionGraphToRestore.getCheckpointCoordinator(); if (checkpointCoordinator != null) { checkpointCoordinator.restoreSavepoint( savepointRestoreSettings.getRestorePath(), savepointRestoreSettings.allowNonRestoredState(), executionGraphToRestore.getAllVertices(), userCodeLoader); } } }
/** * Tries to restore the given {@link ExecutionGraph} from the provided {@link SavepointRestoreSettings}. * * @param executionGraphToRestore {@link ExecutionGraph} which is supposed to be restored * @param savepointRestoreSettings {@link SavepointRestoreSettings} containing information about the savepoint to restore from * @throws Exception if the {@link ExecutionGraph} could not be restored */ private void tryRestoreExecutionGraphFromSavepoint(ExecutionGraph executionGraphToRestore, SavepointRestoreSettings savepointRestoreSettings) throws Exception { if (savepointRestoreSettings.restoreSavepoint()) { final CheckpointCoordinator checkpointCoordinator = executionGraphToRestore.getCheckpointCoordinator(); if (checkpointCoordinator != null) { checkpointCoordinator.restoreSavepoint( savepointRestoreSettings.getRestorePath(), savepointRestoreSettings.allowNonRestoredState(), executionGraphToRestore.getAllVertices(), userCodeLoader); } } }
/** * Tries to restore the given {@link ExecutionGraph} from the provided {@link SavepointRestoreSettings}. * * @param executionGraphToRestore {@link ExecutionGraph} which is supposed to be restored * @param savepointRestoreSettings {@link SavepointRestoreSettings} containing information about the savepoint to restore from * @throws Exception if the {@link ExecutionGraph} could not be restored */ private void tryRestoreExecutionGraphFromSavepoint(ExecutionGraph executionGraphToRestore, SavepointRestoreSettings savepointRestoreSettings) throws Exception { if (savepointRestoreSettings.restoreSavepoint()) { final CheckpointCoordinator checkpointCoordinator = executionGraphToRestore.getCheckpointCoordinator(); if (checkpointCoordinator != null) { checkpointCoordinator.restoreSavepoint( savepointRestoreSettings.getRestorePath(), savepointRestoreSettings.allowNonRestoredState(), savepointRestoreSettings.resumeFromLatestCheckpoint(), executionGraphToRestore.getAllVertices(), userCodeLoader); } } }
final CheckpointCoordinator checkpointCoordinator = currentExecutionGraph.getCheckpointCoordinator(); if (checkpointCoordinator != null) { checkpointCoordinator.stopCheckpointScheduler();
final CheckpointCoordinator checkpointCoordinator = currentExecutionGraph.getCheckpointCoordinator(); checkpointCoordinator.stopCheckpointScheduler();
final CheckpointCoordinator checkpointCoordinator = currentExecutionGraph.getCheckpointCoordinator(); checkpointCoordinator.stopCheckpointScheduler();