private Throwable runTestTaskFailingOnCheckpointError(AbstractStateBackend backend) throws Exception { Task task = createTask(new FilterOperator(), backend, mock(CheckpointResponder.class), true); // start the task and wait until it is in "restore" task.startTaskThread(); task.getExecutingThread().join(); assertEquals(ExecutionState.FAILED, task.getExecutionState()); return task.getFailureCause(); }
@Test public void testBlockingNonInterruptibleCheckpoint() throws Exception { StateBackend lockingStateBackend = new BackendForTestStream(LockingOutputStream::new); Task task = createTask(new TestOperator(), lockingStateBackend, mock(CheckpointResponder.class), true); // start the task and wait until it is in "restore" task.startTaskThread(); IN_CHECKPOINT_LATCH.await(); // cancel the task and wait. unless cancellation properly closes // the streams, this will never terminate task.cancelExecution(); task.getExecutingThread().join(); assertEquals(ExecutionState.CANCELED, task.getExecutionState()); assertNull(task.getFailureCause()); }
if (task.getFailureCause() != null) { throw new Exception("Task failed", task.getFailureCause());
private void testRestoreWithInterrupt(int mode) throws Exception { IN_RESTORE_LATCH.reset(); Configuration taskConfig = new Configuration(); StreamConfig cfg = new StreamConfig(taskConfig); cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); switch (mode) { case OPERATOR_MANAGED: case OPERATOR_RAW: case KEYED_MANAGED: case KEYED_RAW: cfg.setStateKeySerializer(IntSerializer.INSTANCE); cfg.setStreamOperator(new StreamSource<>(new TestSource(mode))); break; default: throw new IllegalArgumentException(); } StreamStateHandle lockingHandle = new InterruptLockingStateHandle(); Task task = createTask(cfg, taskConfig, lockingHandle, mode); // start the task and wait until it is in "restore" task.startTaskThread(); IN_RESTORE_LATCH.await(); // trigger cancellation and signal to continue task.cancelExecution(); task.getExecutingThread().join(30000); if (task.getExecutionState() == ExecutionState.CANCELING) { fail("Task is stuck and not canceling"); } assertEquals(ExecutionState.CANCELED, task.getExecutionState()); assertNull(task.getFailureCause()); }
resultPartitionManager.releasePartitionsProducedBy(executionId, task.getFailureCause());
resultPartitionManager.releasePartitionsProducedBy(executionId, task.getFailureCause());
resultPartitionManager.releasePartitionsProducedBy(executionId, task.getFailureCause());
resultPartitionManager.releasePartitionsProducedBy(executionId, task.getFailureCause());
private void unregisterTaskAndNotifyFinalState( final JobMasterGateway jobMasterGateway, final ExecutionAttemptID executionAttemptID) { Task task = taskSlotTable.removeTask(executionAttemptID); if (task != null) { if (!task.getExecutionState().isTerminal()) { try { task.failExternally(new IllegalStateException("Task is being remove from TaskManager.")); } catch (Exception e) { log.error("Could not properly fail task.", e); } } log.info("Un-registering task and sending final execution state {} to JobManager for task {} {}.", task.getExecutionState(), task.getTaskInfo().getTaskName(), task.getExecutionId()); AccumulatorSnapshot accumulatorSnapshot = task.getAccumulatorRegistry().getSnapshot(); updateTaskExecutionState( jobMasterGateway, new TaskExecutionState( task.getJobID(), task.getExecutionId(), task.getExecutionState(), task.getFailureCause(), accumulatorSnapshot, task.getMetricGroup().getIOMetricGroup().createSnapshot())); } else { log.error("Cannot find task with ID {} to unregister.", executionAttemptID); } }
private void unregisterTaskAndNotifyFinalState( final JobMasterGateway jobMasterGateway, final ExecutionAttemptID executionAttemptID) { Task task = taskSlotTable.removeTask(executionAttemptID); if (task != null) { if (!task.getExecutionState().isTerminal()) { try { task.failExternally(new IllegalStateException("Task is being remove from TaskManager.")); } catch (Exception e) { log.error("Could not properly fail task.", e); } } log.info("Un-registering task and sending final execution state {} to JobManager for task {} {}.", task.getExecutionState(), task.getTaskInfo().getTaskName(), task.getExecutionId()); AccumulatorSnapshot accumulatorSnapshot = task.getAccumulatorRegistry().getSnapshot(); updateTaskExecutionState( jobMasterGateway, new TaskExecutionState( task.getJobID(), task.getExecutionId(), task.getExecutionState(), task.getFailureCause(), accumulatorSnapshot, task.getMetricGroup().getIOMetricGroup().createSnapshot())); } else { log.error("Cannot find task with ID {} to unregister.", executionAttemptID); } }
private void unregisterTaskAndNotifyFinalState( final UUID jobMasterLeaderId, final JobMasterGateway jobMasterGateway, final ExecutionAttemptID executionAttemptID) { Task task = taskSlotTable.removeTask(executionAttemptID); if (task != null) { if (!task.getExecutionState().isTerminal()) { try { task.failExternally(new IllegalStateException("Task is being remove from TaskManager.")); } catch (Exception e) { log.error("Could not properly fail task.", e); } } log.info("Un-registering task and sending final execution state {} to JobManager for task {} {}.", task.getExecutionState(), task.getTaskInfo().getTaskName(), task.getExecutionId()); AccumulatorSnapshot accumulatorSnapshot = task.getAccumulatorRegistry().getSnapshot(); updateTaskExecutionState( jobMasterLeaderId, jobMasterGateway, new TaskExecutionState( task.getJobID(), task.getExecutionId(), task.getExecutionState(), task.getFailureCause(), accumulatorSnapshot, task.getMetricGroup().getIOMetricGroup().createSnapshot())); } else { log.error("Cannot find task with ID {} to unregister.", executionAttemptID); } }
task.getExecutionId(), task.getExecutionState(), task.getFailureCause(), accumulatorSnapshot, task.getMetricGroup().getIOMetricGroup().createSnapshot()));