@Override public void tryHandleCheckpointException( CheckpointMetaData checkpointMetaData, Exception exception) throws Exception { environment.declineCheckpoint(checkpointMetaData.getCheckpointId(), exception); } }
private void logFailedCleanupAttempt() { LOG.debug("{} - asynchronous checkpointing operation for checkpoint {} has " + "already been completed. Thus, the state handles are not cleaned up.", owner.getName(), checkpointMetaData.getCheckpointId()); } }
@Override public boolean matches(Object o) { return o != null && o.getClass() == CheckpointMetaData.class && ((CheckpointMetaData) o).getCheckpointId() == checkpointId; }
@Override public boolean matches(Object o) { return o != null && o.getClass() == CheckpointMetaData.class && ((CheckpointMetaData) o).getCheckpointId() == checkpointId; }
@Override public void triggerCheckpointOnBarrier( CheckpointMetaData checkpointMetaData, CheckpointOptions checkpointOptions, CheckpointMetrics checkpointMetrics) throws Exception { try { performCheckpoint(checkpointMetaData, checkpointOptions, checkpointMetrics); } catch (CancelTaskException e) { LOG.info("Operator {} was cancelled while performing checkpoint {}.", getName(), checkpointMetaData.getCheckpointId()); throw e; } catch (Exception e) { throw new Exception("Could not perform checkpoint " + checkpointMetaData.getCheckpointId() + " for operator " + getName() + '.', e); } }
@SuppressWarnings("deprecation") private void checkpointStreamOperator(StreamOperator<?> op) throws Exception { if (null != op) { OperatorSnapshotFutures snapshotInProgress = op.snapshotState( checkpointMetaData.getCheckpointId(), checkpointMetaData.getTimestamp(), checkpointOptions, storageLocation); operatorSnapshotsInProgress.put(op.getOperatorID(), snapshotInProgress); } }
"Could not materialize checkpoint " + checkpointMetaData.getCheckpointId() + " for operator " + owner.getName() + '.', e);
private void cleanup() throws Exception { LOG.debug( "Cleanup AsyncCheckpointRunnable for checkpoint {} of {}.", checkpointMetaData.getCheckpointId(), owner.getName()); Exception exception = null; // clean up ongoing operator snapshot results and non partitioned state handles for (OperatorSnapshotFutures operatorSnapshotResult : operatorSnapshotsInProgress.values()) { if (operatorSnapshotResult != null) { try { operatorSnapshotResult.cancel(); } catch (Exception cancelException) { exception = ExceptionUtils.firstOrSuppressed(cancelException, exception); } } } if (null != exception) { throw exception; } }
@Override public boolean triggerCheckpoint(CheckpointMetaData checkpointMetaData, CheckpointOptions checkpointOptions) throws Exception { try { // No alignment if we inject a checkpoint CheckpointMetrics checkpointMetrics = new CheckpointMetrics() .setBytesBufferedInAlignment(0L) .setAlignmentDurationNanos(0L); return performCheckpoint(checkpointMetaData, checkpointOptions, checkpointMetrics); } catch (Exception e) { // propagate exceptions only if the task is still in "running" state if (isRunning) { throw new Exception("Could not perform checkpoint " + checkpointMetaData.getCheckpointId() + " for operator " + getName() + '.', e); } else { LOG.debug("Could not perform checkpoint {} for operator {} while the " + "invokable was not in state running.", checkpointMetaData.getCheckpointId(), getName(), e); return false; } } }
checkpointMetaData.getCheckpointId(), checkpointOptions.getCheckpointType(), getName()); operatorChain.prepareSnapshotPreBarrier(checkpointMetaData.getCheckpointId()); checkpointMetaData.getCheckpointId(), checkpointMetaData.getTimestamp(), checkpointOptions); final CancelCheckpointMarker message = new CancelCheckpointMarker(checkpointMetaData.getCheckpointId()); Exception exception = null;
private void reportCompletedSnapshotStates( TaskStateSnapshot acknowledgedTaskStateSnapshot, TaskStateSnapshot localTaskStateSnapshot, long asyncDurationMillis) { TaskStateManager taskStateManager = owner.getEnvironment().getTaskStateManager(); boolean hasAckState = acknowledgedTaskStateSnapshot.hasState(); boolean hasLocalState = localTaskStateSnapshot.hasState(); Preconditions.checkState(hasAckState || !hasLocalState, "Found cached state but no corresponding primary state is reported to the job " + "manager. This indicates a problem."); // we signal stateless tasks by reporting null, so that there are no attempts to assign empty state // to stateless tasks on restore. This enables simple job modifications that only concern // stateless without the need to assign them uids to match their (always empty) states. taskStateManager.reportTaskStateSnapshots( checkpointMetaData, checkpointMetrics, hasAckState ? acknowledgedTaskStateSnapshot : null, hasLocalState ? localTaskStateSnapshot : null); LOG.debug("{} - finished asynchronous part of checkpoint {}. Asynchronous duration: {} ms", owner.getName(), checkpointMetaData.getCheckpointId(), asyncDurationMillis); LOG.trace("{} - reported the following states in snapshot for checkpoint {}: {}.", owner.getName(), checkpointMetaData.getCheckpointId(), acknowledgedTaskStateSnapshot); }
private void checkpointState( CheckpointMetaData checkpointMetaData, CheckpointOptions checkpointOptions, CheckpointMetrics checkpointMetrics) throws Exception { CheckpointStreamFactory storage = checkpointStorage.resolveCheckpointStorageLocation( checkpointMetaData.getCheckpointId(), checkpointOptions.getTargetLocation()); CheckpointingOperation checkpointingOperation = new CheckpointingOperation( this, checkpointMetaData, checkpointOptions, storage, checkpointMetrics); checkpointingOperation.executeCheckpointing(); }
@Override public void acknowledgeCheckpoint( JobID lJobID, ExecutionAttemptID lExecutionAttemptID, long lCheckpointId, CheckpointMetrics lCheckpointMetrics, TaskStateSnapshot lSubtaskState) { Assert.assertEquals(jobID, lJobID); Assert.assertEquals(executionAttemptID, lExecutionAttemptID); Assert.assertEquals(checkpointMetaData.getCheckpointId(), lCheckpointId); Assert.assertEquals(checkpointMetrics, lCheckpointMetrics); jmReported.set(true); } };
@Override public void triggerCheckpointOnBarrier(CheckpointMetaData checkpointMetaData, CheckpointOptions checkpointOptions, CheckpointMetrics checkpointMetrics) throws Exception { assertTrue("More checkpoints than expected", i < checkpointIDs.length); final long expectedId = checkpointIDs[i++]; if (expectedId >= 0) { assertEquals("wrong checkpoint id", expectedId, checkpointMetaData.getCheckpointId()); assertTrue(checkpointMetaData.getTimestamp() > 0); } else { fail("got 'triggerCheckpointOnBarrier()' when expecting an 'abortCheckpointOnBarrier()'"); } }
@Override public boolean triggerCheckpoint(final CheckpointMetaData checkpointMetaData, final CheckpointOptions checkpointOptions) { final TaskStateSnapshot checkpointStateHandles = new TaskStateSnapshot(); checkpointStateHandles.putSubtaskStateByOperatorID( OperatorID.fromJobVertexID(getEnvironment().getJobVertexId()), new OperatorSubtaskState()); getEnvironment().acknowledgeCheckpoint( checkpointMetaData.getCheckpointId(), new CheckpointMetrics(), checkpointStateHandles); triggerCheckpointLatch.countDown(); return true; }
@Override public void triggerCheckpointOnBarrier( CheckpointMetaData checkpointMetaData, CheckpointOptions checkpointOptions, CheckpointMetrics checkpointMetrics) throws Exception { assertTrue("wrong checkpoint id", nextExpectedCheckpointId == -1L || nextExpectedCheckpointId == checkpointMetaData.getCheckpointId()); assertTrue(checkpointMetaData.getTimestamp() > 0); assertTrue(checkpointMetrics.getBytesBufferedInAlignment() >= 0); assertTrue(checkpointMetrics.getAlignmentDurationNanos() >= 0); nextExpectedCheckpointId++; lastReportedBytesBufferedInAlignment = checkpointMetrics.getBytesBufferedInAlignment(); }
@Test public void testDecliningHandler() { DeclineDummyEnvironment environment = new DeclineDummyEnvironment(); CheckpointExceptionHandlerFactory checkpointExceptionHandlerFactory = new CheckpointExceptionHandlerFactory(); CheckpointExceptionHandler exceptionHandler = checkpointExceptionHandlerFactory.createCheckpointExceptionHandler(false, environment); CheckpointMetaData failedCheckpointMetaData = new CheckpointMetaData(42L, 4711L); Exception testException = new Exception("test"); try { exceptionHandler.tryHandleCheckpointException(failedCheckpointMetaData, testException); } catch (Exception e) { Assert.fail("Exception not handled, but rethrown."); } Assert.assertEquals(failedCheckpointMetaData.getCheckpointId(), environment.getLastDeclinedCheckpointId()); Assert.assertEquals(testException, environment.getLastDeclinedCheckpointCause()); }
@Override public void tryHandleCheckpointException( CheckpointMetaData checkpointMetaData, Exception exception) throws Exception { environment.declineCheckpoint(checkpointMetaData.getCheckpointId(), exception); } }
@Override public void tryHandleCheckpointException( CheckpointMetaData checkpointMetaData, Exception exception) throws Exception { environment.declineCheckpoint(checkpointMetaData.getCheckpointId(), exception); } }
@SuppressWarnings("deprecation") private void checkpointStreamOperator(StreamOperator<?> op) throws Exception { if (null != op) { OperatorSnapshotFutures snapshotInProgress = op.snapshotState( checkpointMetaData.getCheckpointId(), checkpointMetaData.getTimestamp(), checkpointOptions, storageLocation); operatorSnapshotsInProgress.put(op.getOperatorID(), snapshotInProgress); } }