org.apache.flink.runtime.checkpoint.CheckpointCoordinator java code examples

  @Override
  public void run() {
    try {
      checkpointCoordinator.receiveAcknowledgeMessage(ackMessage);
    } catch (Throwable t) {
      log.warn("Error while processing checkpoint acknowledgement message");
    }
  }
});

  @Override
  public void run() {
    try {
      checkpointCoordinator.receiveDeclineMessage(decline);
    } catch (Exception e) {
      log.error("Error in CheckpointCoordinator while processing {}", decline, e);
    }
  }
});

/**
 * Discards the given pending checkpoint because of the given cause.
 *
 * @param pendingCheckpoint to discard
 * @param cause for discarding the checkpoint
 */
private void discardCheckpoint(PendingCheckpoint pendingCheckpoint, @Nullable Throwable cause) {
  assert(Thread.holdsLock(lock));
  Preconditions.checkNotNull(pendingCheckpoint);
  final long checkpointId = pendingCheckpoint.getCheckpointId();
  final String reason = (cause != null) ? cause.getMessage() : "";
  LOG.info("Discarding checkpoint {} of job {} because: {}", checkpointId, job, reason);
  pendingCheckpoint.abortDeclined();
  rememberRecentCheckpointId(checkpointId);
  // we don't have to schedule another "dissolving" checkpoint any more because the
  // cancellation barriers take care of breaking downstream alignments
  // we only need to make sure that suspended queued requests are resumed
  boolean haveMoreRecentPending = false;
  for (PendingCheckpoint p : pendingCheckpoints.values()) {
    if (!p.isDiscarded() && p.getCheckpointId() >= pendingCheckpoint.getCheckpointId()) {
      haveMoreRecentPending = true;
      break;
    }
  }
  if (!haveMoreRecentPending) {
    triggerQueuedRequests();
  }
}

public void resetExecutionVerticesAndNotify(long modVersion, List<ExecutionVertex> executionVertices) throws Exception {
  final long resetTimestamp = System.currentTimeMillis();
  List<ExecutionVertexID> evIds = new ArrayList<>(executionVertices.size());
  for (ExecutionVertex ev : executionVertices) {
    ev.resetForNewExecution(resetTimestamp, modVersion);
    evIds.add(ev.getExecutionVertexID());
  }
  // if we have checkpointed state, reload it into the executions
  // we restart scheduler to ensure EXACTLY_ONCE mechanism and
  // to trigger new checkpoint without having to wait for old checkpoint expired
  if (checkpointCoordinator != null) {
    checkpointCoordinator.stopCheckpointScheduler();
    checkpointCoordinator.restoreLatestCheckpointedState(executionVertices, false, true);
    checkpointCoordinator.startCheckpointScheduler();
  }
  graphManager.notifyExecutionVertexFailover(evIds);
}

checkpointCoordinator = new CheckpointCoordinator(
  jobInformation.getJobId(),
  interval,
  if (!checkpointCoordinator.addMasterHook(hook)) {
    LOG.warn("Trying to register multiple checkpoint hooks with the name: {}", hook.getIdentifier());
checkpointCoordinator.setCheckpointStatsTracker(checkpointStatsTracker);
  registerJobStatusListener(checkpointCoordinator.createActivatorDeactivator());

  pendingCheckpoints.remove(checkpointId);
  triggerQueuedRequests();
rememberRecentCheckpointId(checkpointId);
dropSubsumedCheckpoints(checkpointId);

      completePendingCheckpoint(checkpoint);
      message.getTaskExecutionId(), message.getJob());
    discardSubtaskState(message.getJob(), message.getTaskExecutionId(), message.getCheckpointId(), message.getSubtaskState());
      message.getCheckpointId(), message.getTaskExecutionId(), message.getJob());
    discardSubtaskState(message.getJob(), message.getTaskExecutionId(), message.getCheckpointId(), message.getSubtaskState());
discardSubtaskState(message.getJob(), message.getTaskExecutionId(), message.getCheckpointId(), message.getSubtaskState());

/**
 * Restores the latest checkpointed state.
 *
 * <p>The recovery of checkpoints might block. Make sure that calls to this method don't
 * block the job manager actor and run asynchronously.
 *
 * @param errorIfNoCheckpoint Fail if there is no checkpoint available
 * @param allowNonRestoredState Allow to skip checkpoint state that cannot be mapped
 * to the ExecutionGraph vertices (if the checkpoint contains state for a
 * job vertex that is not part of this ExecutionGraph).
 */
public void restoreLatestCheckpointedState(boolean errorIfNoCheckpoint, boolean allowNonRestoredState) throws Exception {
  synchronized (progressLock) {
    if (checkpointCoordinator != null) {
      checkpointCoordinator.restoreLatestCheckpointedState(getAllVertices(), errorIfNoCheckpoint, allowNonRestoredState);
    }
  }
}

private void dropSubsumedCheckpoints(long checkpointId) {
  Iterator<Map.Entry<Long, PendingCheckpoint>> entries = pendingCheckpoints.entrySet().iterator();
  while (entries.hasNext()) {
    PendingCheckpoint p = entries.next().getValue();
    // remove all pending checkpoints that are lesser than the current completed checkpoint
    if (p.getCheckpointId() < checkpointId && p.canBeSubsumed()) {
      rememberRecentCheckpointId(p.getCheckpointId());
      p.abortSubsumed();
      entries.remove();
    }
  }
}

checkpointCoordinator = new CheckpointCoordinator(
  jobInformation.getJobId(),
  interval,
  if (!checkpointCoordinator.addMasterHook(hook)) {
    LOG.warn("Trying to register multiple checkpoint hooks with the name: {}", hook.getIdentifier());
checkpointCoordinator.setCheckpointStatsTracker(checkpointStatsTracker);
  registerJobStatusListener(checkpointCoordinator.createActivatorDeactivator());

  pendingCheckpoints.remove(checkpointId);
  triggerQueuedRequests();
rememberRecentCheckpointId(checkpointId);
dropSubsumedCheckpoints(checkpointId);

      completePendingCheckpoint(checkpoint);
      message.getTaskExecutionId(), message.getJob());
    discardSubtaskState(message.getJob(), message.getTaskExecutionId(), message.getCheckpointId(), message.getSubtaskState());
      message.getCheckpointId(), message.getTaskExecutionId(), message.getJob());
    discardSubtaskState(message.getJob(), message.getTaskExecutionId(), message.getCheckpointId(), message.getSubtaskState());
discardSubtaskState(message.getJob(), message.getTaskExecutionId(), message.getCheckpointId(), message.getSubtaskState());

/**
 * Restores the latest checkpointed state.
 *
 * <p>The recovery of checkpoints might block. Make sure that calls to this method don't
 * block the job manager actor and run asynchronously.
 *
 * @param errorIfNoCheckpoint Fail if there is no checkpoint available
 * @param allowNonRestoredState Allow to skip checkpoint state that cannot be mapped
 * to the the ExecutionGraph vertices (if the checkpoint contains state for a
 * job vertex that is not part of this ExecutionGraph).
 */
public void restoreLatestCheckpointedState(boolean errorIfNoCheckpoint, boolean allowNonRestoredState) throws Exception {
  synchronized (progressLock) {
    if (checkpointCoordinator != null) {
      checkpointCoordinator.restoreLatestCheckpointedState(getAllVertices(), errorIfNoCheckpoint, allowNonRestoredState);
    }
  }
}

private void dropSubsumedCheckpoints(long checkpointId) {
  Iterator<Map.Entry<Long, PendingCheckpoint>> entries = pendingCheckpoints.entrySet().iterator();
  while (entries.hasNext()) {
    PendingCheckpoint p = entries.next().getValue();
    // remove all pending checkpoints that are lesser than the current completed checkpoint
    if (p.getCheckpointId() < checkpointId && p.canBeSubsumed()) {
      rememberRecentCheckpointId(p.getCheckpointId());
      p.abortSubsumed();
      entries.remove();
    }
  }
}

checkpointCoordinator = new CheckpointCoordinator(
  jobInformation.getJobId(),
  interval,
  if (!checkpointCoordinator.addMasterHook(hook)) {
    LOG.warn("Trying to register multiple checkpoint hooks with the name: {}", hook.getIdentifier());
checkpointCoordinator.setCheckpointStatsTracker(checkpointStatsTracker);
  registerJobStatusListener(checkpointCoordinator.createActivatorDeactivator());

    dropSubsumedCheckpoints(checkpointId);
  triggerQueuedRequests();
rememberRecentCheckpointId(checkpointId);

rememberRecentCheckpointId(checkpointId);
  triggerQueuedRequests();

      completePendingCheckpoint(checkpoint);
      message.getTaskExecutionId(), message.getJob());
    discardSubtaskState(message.getJob(), message.getTaskExecutionId(), message.getCheckpointId(), message.getSubtaskState());
      message.getCheckpointId(), message.getTaskExecutionId(), message.getJob());
    discardSubtaskState(message.getJob(), message.getTaskExecutionId(), message.getCheckpointId(), message.getSubtaskState());
discardSubtaskState(message.getJob(), message.getTaskExecutionId(), message.getCheckpointId(), message.getSubtaskState());

/**
 * Restores the latest checkpointed state.
 *
 * <p>The recovery of checkpoints might block. Make sure that calls to this method don't
 * block the job manager actor and run asynchronously.
 *
 * @param errorIfNoCheckpoint Fail if there is no checkpoint available
 * @param allowNonRestoredState Allow to skip checkpoint state that cannot be mapped
 * to the ExecutionGraph vertices (if the checkpoint contains state for a
 * job vertex that is not part of this ExecutionGraph).
 */
public void restoreLatestCheckpointedState(boolean errorIfNoCheckpoint, boolean allowNonRestoredState) throws Exception {
  synchronized (progressLock) {
    if (checkpointCoordinator != null) {
      checkpointCoordinator.restoreLatestCheckpointedState(getAllVertices(), errorIfNoCheckpoint, allowNonRestoredState);
    }
  }
}

private void dropSubsumedCheckpoints(long checkpointId) {
  Iterator<Map.Entry<Long, PendingCheckpoint>> entries = pendingCheckpoints.entrySet().iterator();
  while (entries.hasNext()) {
    PendingCheckpoint p = entries.next().getValue();
    // remove all pending checkpoints that are lesser than the current completed checkpoint
    if (p.getCheckpointId() < checkpointId && p.canBeSubsumed()) {
      rememberRecentCheckpointId(p.getCheckpointId());
      p.abortSubsumed();
      entries.remove();
    }
  }
}

Javadoc

The checkpoint coordinator coordinates the distributed snapshots of operators and state. It triggers the checkpoint by sending the messages to the relevant tasks and collects the checkpoint acknowledgements. It also collects and maintains the overview of the state handles reported by the tasks that acknowledge the checkpoint.

Most used methods

<init>
addMasterHook
Adds the given master hook to the checkpoint coordinator. This method does nothing, if the checkpoin
completePendingCheckpoint
Try to complete the given pending checkpoint.Important: This method should only be called in the che
createActivatorDeactivator
discardSubtaskState
Discards the given state object asynchronously belonging to the given job, execution attempt id and
dropSubsumedCheckpoints
receiveAcknowledgeMessage
Receives an AcknowledgeCheckpoint message and returns whether the message was associated with a pend
receiveDeclineMessage
Receives a DeclineCheckpoint message for a pending checkpoint.
rememberRecentCheckpointId
restoreLatestCheckpointedState
Restores the latest checkpointed state.
setCheckpointStatsTracker
Sets the checkpoint stats tracker.
shutdown
Shuts down the checkpoint coordinator.After this method has been called, the coordinator does not ac

Popular in Java

Making http requests using okhttp
getApplicationContext (Context)
findViewById (Activity)
scheduleAtFixedRate (ScheduledExecutorService)
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
Top 12 Jupyter Notebook extensions

How to useCheckpointCoordinator in org.apache.flink.runtime.checkpoint

Best Java code snippets using org.apache.flink.runtime.checkpoint.CheckpointCoordinator (Showing top 20 results out of 315)

How to use
CheckpointCoordinator
in
org.apache.flink.runtime.checkpoint