@Override public Void apply(Acknowledge acknowledge, Throwable throwable) { if (acknowledge != null) { updateSlot(slotId, allocationId); } else { if (throwable instanceof SlotOccupiedException) { SlotOccupiedException exception = (SlotOccupiedException) throwable; updateSlot(slotId, exception.getAllocationId()); } else { removeSlotRequestFromSlot(slotId, allocationId); } if (!(throwable instanceof CancellationException)) { handleFailedSlotRequest(slotId, allocationId, throwable); } else { LOG.debug("Slot allocation request {} has been cancelled.", allocationId, throwable); } } return null; } }, mainThreadExecutor);
@Override public void notifyHeartbeatTimeout(final ResourceID resourceId) { runAsync(() -> { // first check whether the timeout is still valid if (establishedResourceManagerConnection != null && establishedResourceManagerConnection.getResourceManagerResourceId().equals(resourceId)) { log.info("The heartbeat of ResourceManager with id {} timed out.", resourceId); reconnectToResourceManager(new TaskManagerException( String.format("The heartbeat of ResourceManager with id %s timed out.", resourceId))); } else { log.debug("Received heartbeat timeout for outdated ResourceManager id {}. Ignoring the timeout.", resourceId); } }); }
/** * Rejects the pending slot request by failing the request future with a * {@link SlotAllocationException}. * * @param pendingSlotRequest to reject * @param cause of the rejection */ private void rejectPendingSlotRequest(PendingSlotRequest pendingSlotRequest, Exception cause) { CompletableFuture<Acknowledge> request = pendingSlotRequest.getRequestFuture(); if (null != request) { request.completeExceptionally(new SlotAllocationException(cause)); } else { LOG.debug("Cannot reject pending slot request {}, since no request has been sent.", pendingSlotRequest.getAllocationId()); } }
@RpcMethod public Acknowledge cancelTask(ExecutionAttemptID executionAttemptID) throws TaskException { final Task task = taskSlotTable.getTask(executionAttemptID); if (task != null) { try { task.cancelExecution(); return Acknowledge.get(); } catch (Throwable t) { throw new TaskException("Cannot cancel task for execution " + executionAttemptID + '.', t); } } else { final String message = "Cannot find task to stop for execution " + executionAttemptID + '.'; log.debug(message); throw new TaskException(message); } }
@RpcMethod public Acknowledge triggerCheckpoint(ExecutionAttemptID executionAttemptID, long checkpointId, long checkpointTimestamp, CheckpointOptions checkpointOptions) throws CheckpointException { log.debug("Trigger checkpoint {}@{} for {}.", checkpointId, checkpointTimestamp, executionAttemptID); final Task task = taskSlotTable.getTask(executionAttemptID); if (task != null) { task.triggerCheckpointBarrier(checkpointId, checkpointTimestamp, checkpointOptions); return Acknowledge.get(); } else { final String message = "TaskManager received a checkpoint request for unknown task " + executionAttemptID + '.'; log.debug(message); throw new CheckpointException(message); } }
private void registrationTimeout(@Nonnull UUID registrationTimeoutId) { if (registrationTimeoutId.equals(currentRegistrationTimeoutId)) { final Time maxRegistrationDuration = taskManagerConfiguration.getMaxRegistrationDuration(); onFatalError( new RegistrationTimeoutException( String.format("Could not register at the ResourceManager within the specified maximum " + "registration duration %s. This indicates a problem with this instance. Terminating now.", maxRegistrationDuration))); } }
@RpcMethod public Acknowledge stopTask(ExecutionAttemptID executionAttemptID) throws TaskException { final Task task = taskSlotTable.getTask(executionAttemptID); if (task != null) { try { task.stopExecution(); return Acknowledge.get(); } catch (Throwable t) { throw new TaskException("Cannot stop task for execution " + executionAttemptID + '.', t); } } else { final String message = "Cannot find task to stop for execution " + executionAttemptID + '.'; log.debug(message); throw new TaskException(message); } }
/** * Rejects the pending slot request by failing the request future with a * {@link SlotAllocationException}. * * @param pendingSlotRequest to reject * @param cause of the rejection */ private void rejectPendingSlotRequest(PendingSlotRequest pendingSlotRequest, Exception cause) { CompletableFuture<Acknowledge> request = pendingSlotRequest.getRequestFuture(); if (null != request) { request.completeExceptionally(new SlotAllocationException(cause)); } else { LOG.debug("Cannot reject pending slot request {}, since no request has been sent.", pendingSlotRequest.getAllocationId()); } }
@RpcMethod public Acknowledge confirmCheckpoint(ExecutionAttemptID executionAttemptID, long checkpointId, long checkpointTimestamp) throws CheckpointException { log.debug("Confirm checkpoint {}@{} for {}.", checkpointId, checkpointTimestamp, executionAttemptID); final Task task = taskSlotTable.getTask(executionAttemptID); if (task != null) { task.notifyCheckpointComplete(checkpointId); return Acknowledge.get(); } else { final String message = "TaskManager received a checkpoint confirmation for unknown task " + executionAttemptID + '.'; log.debug(message); throw new CheckpointException(message); } }
@Override public void notifyHeartbeatTimeout(final ResourceID resourceId) { runAsync(() -> { // first check whether the timeout is still valid if (establishedResourceManagerConnection != null && establishedResourceManagerConnection.getResourceManagerResourceId().equals(resourceId)) { log.info("The heartbeat of ResourceManager with id {} timed out.", resourceId); reconnectToResourceManager(new TaskManagerException( String.format("The heartbeat of ResourceManager with id %s timed out.", resourceId))); } else { log.debug("Received heartbeat timeout for outdated ResourceManager id {}. Ignoring the timeout.", resourceId); } }); }
private void registrationTimeout(@Nonnull UUID registrationTimeoutId) { if (registrationTimeoutId.equals(currentRegistrationTimeoutId)) { final Time maxRegistrationDuration = taskManagerConfiguration.getMaxRegistrationDuration(); onFatalError( new RegistrationTimeoutException( String.format("Could not register at the ResourceManager within the specified maximum " + "registration duration %s. This indicates a problem with this instance. Terminating now.", maxRegistrationDuration))); } }
@Override public CompletableFuture<Acknowledge> stopTask(ExecutionAttemptID executionAttemptID, Time timeout) { final Task task = taskSlotTable.getTask(executionAttemptID); if (task != null) { try { task.stopExecution(); return CompletableFuture.completedFuture(Acknowledge.get()); } catch (Throwable t) { return FutureUtils.completedExceptionally(new TaskException("Cannot stop task for execution " + executionAttemptID + '.', t)); } } else { final String message = "Cannot find task to stop for execution " + executionAttemptID + '.'; log.debug(message); return FutureUtils.completedExceptionally(new TaskException(message)); } }
/** * Rejects the pending slot request by failing the request future with a * {@link SlotAllocationException}. * * @param pendingSlotRequest to reject * @param cause of the rejection */ private void rejectPendingSlotRequest(PendingSlotRequest pendingSlotRequest, Exception cause) { CompletableFuture<Acknowledge> request = pendingSlotRequest.getRequestFuture(); if (null != request) { request.completeExceptionally(new SlotAllocationException(cause)); } else { LOG.debug("Cannot reject pending slot request {}, since no request has been sent.", pendingSlotRequest.getAllocationId()); } }
@Override public CompletableFuture<Acknowledge> confirmCheckpoint( ExecutionAttemptID executionAttemptID, long checkpointId, long checkpointTimestamp) { log.debug("Confirm checkpoint {}@{} for {}.", checkpointId, checkpointTimestamp, executionAttemptID); final Task task = taskSlotTable.getTask(executionAttemptID); if (task != null) { task.notifyCheckpointComplete(checkpointId); return CompletableFuture.completedFuture(Acknowledge.get()); } else { final String message = "TaskManager received a checkpoint confirmation for unknown task " + executionAttemptID + '.'; log.debug(message); return FutureUtils.completedExceptionally(new CheckpointException(message)); } }
@Override public void notifyHeartbeatTimeout(final ResourceID resourceId) { runAsync(() -> { // first check whether the timeout is still valid if (establishedResourceManagerConnection != null && establishedResourceManagerConnection.getResourceManagerResourceId().equals(resourceId)) { log.info("The heartbeat of ResourceManager with id {} timed out.", resourceId); reconnectToResourceManager(new TaskManagerException( String.format("The heartbeat of ResourceManager with id %s timed out.", resourceId))); } else { log.debug("Received heartbeat timeout for outdated ResourceManager id {}. Ignoring the timeout.", resourceId); } }); }
private void registrationTimeout(@Nonnull UUID registrationTimeoutId) { if (registrationTimeoutId.equals(currentRegistrationTimeoutId)) { final Time maxRegistrationDuration = taskManagerConfiguration.getMaxRegistrationDuration(); onFatalError( new RegistrationTimeoutException( String.format("Could not register at the ResourceManager within the specified maximum " + "registration duration %s. This indicates a problem with this instance. Terminating now.", maxRegistrationDuration))); } }
@Override public CompletableFuture<Acknowledge> cancelTask(ExecutionAttemptID executionAttemptID, Time timeout) { final Task task = taskSlotTable.getTask(executionAttemptID); if (task != null) { try { task.cancelExecution(); return CompletableFuture.completedFuture(Acknowledge.get()); } catch (Throwable t) { return FutureUtils.completedExceptionally( new TaskException("Cannot cancel task for execution " + executionAttemptID + '.', t)); } } else { final String message = "Cannot find task to stop for execution " + executionAttemptID + '.'; log.debug(message); return FutureUtils.completedExceptionally(new TaskException(message)); } }
/** * Rejects the pending slot request by failing the request future with a * {@link SlotAllocationException}. * * @param pendingSlotRequest to reject * @param cause of the rejection */ private void rejectPendingSlotRequest(PendingSlotRequest pendingSlotRequest, Exception cause) { CompletableFuture<Acknowledge> request = pendingSlotRequest.getRequestFuture(); if (null != request) { request.completeExceptionally(new SlotAllocationException(cause)); } else { LOG.debug("Cannot reject pending slot request {}, since no request has been sent.", pendingSlotRequest.getAllocationId()); } }
@Override public CompletableFuture<Acknowledge> confirmCheckpoint( ExecutionAttemptID executionAttemptID, long checkpointId, long checkpointTimestamp) { log.debug("Confirm checkpoint {}@{} for {}.", checkpointId, checkpointTimestamp, executionAttemptID); final Task task = taskSlotTable.getTask(executionAttemptID); if (task != null) { task.notifyCheckpointComplete(checkpointId); return CompletableFuture.completedFuture(Acknowledge.get()); } else { final String message = "TaskManager received a checkpoint confirmation for unknown task " + executionAttemptID + '.'; log.debug(message); return FutureUtils.completedExceptionally(new CheckpointException(message)); } }
@Override public CompletableFuture<Acknowledge> cancelTask(ExecutionAttemptID executionAttemptID, Time timeout) { final Task task = taskSlotTable.getTask(executionAttemptID); if (task != null) { try { task.cancelExecution(); return CompletableFuture.completedFuture(Acknowledge.get()); } catch (Throwable t) { return FutureUtils.completedExceptionally( new TaskException("Cannot cancel task for execution " + executionAttemptID + '.', t)); } } else { final String message = "Cannot find task to stop for execution " + executionAttemptID + '.'; log.debug(message); return FutureUtils.completedExceptionally(new TaskException(message)); } }