@Override public void notifyHeartbeatTimeout(ResourceID resourceID) { jobMasterGateway.disconnectTaskManager( resourceID, new TimeoutException("Heartbeat of TaskManager with id " + resourceID + " timed out.")); }
@Override public void receiveHeartbeat(ResourceID resourceID, AccumulatorReport payload) { jobMasterGateway.heartbeatFromTaskManager(resourceID, payload); }
@Override protected CompletableFuture<RegistrationResponse> invokeRegistration( JobMasterGateway gateway, JobMasterId jobMasterId, long timeoutMillis) throws Exception { return gateway.registerTaskManager(taskManagerRpcAddress, taskManagerLocation, Time.milliseconds(timeoutMillis)); } }
/** * This method should be called by the framework once it detects that a currently registered * job manager has failed. * * @param jobId identifying the job whose leader shall be disconnected. * @param cause The exception which cause the JobManager failed. */ protected void closeJobManagerConnection(JobID jobId, Exception cause) { JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.remove(jobId); if (jobManagerRegistration != null) { final ResourceID jobManagerResourceId = jobManagerRegistration.getJobManagerResourceID(); final JobMasterGateway jobMasterGateway = jobManagerRegistration.getJobManagerGateway(); final JobMasterId jobMasterId = jobManagerRegistration.getJobMasterId(); log.info("Disconnect job manager {}@{} for job {} from the resource manager.", jobMasterId, jobMasterGateway.getAddress(), jobId); jobManagerHeartbeatManager.unmonitorTarget(jobManagerResourceId); jmResourceIdRegistrations.remove(jobManagerResourceId); // tell the job manager about the disconnect jobMasterGateway.disconnectResourceManager(getFencingToken(), cause); } else { log.debug("There was no registered job manager for job {}.", jobId); } }
jobMasterGateway.failSlot(getResourceID(), offer.getAllocationId(), leaderId, new Exception(message)); jobMasterGateway.failSlot(getResourceID(), offer.getAllocationId(), leaderId, new Exception(message)); continue; Future<Iterable<SlotOffer>> acceptedSlotsFuture = jobMasterGateway.offerSlots( getResourceID(), reservedSlots,
@Override public CompletableFuture<ExecutionState> requestPartitionProducerState( JobID jobId, IntermediateDataSetID resultId, ResultPartitionID partitionId) { return jobMasterGateway.requestPartitionState(resultId, partitionId); } }
@Override public void notifyPartitionConsumable(JobID jobId, ResultPartitionID partitionId, final TaskActions taskActions) { Future<Acknowledge> acknowledgeFuture = jobMasterGateway.scheduleOrUpdateConsumers( jobMasterLeaderId, partitionId, timeout); acknowledgeFuture.exceptionallyAsync(new ApplyFunction<Throwable, Void>() { @Override public Void apply(Throwable value) { LOG.error("Could not schedule or update consumers at the JobManager.", value); taskActions.failExternally(new RuntimeException("Could not notify JobManager to schedule or update consumers.", value)); return null; } }, executor); } }
@Override public InputSplit getNextInputSplit(OperatorID operatorID, ClassLoader userCodeClassLoader) throws InputSplitProviderException { Preconditions.checkNotNull(operatorID); Preconditions.checkNotNull(userCodeClassLoader); CompletableFuture<SerializedInputSplit> futureInputSplit = jobMasterGateway.requestNextInputSplit( jobVertexID, operatorID, executionAttemptID); try { SerializedInputSplit serializedInputSplit = futureInputSplit.get(timeout.getSize(), timeout.getUnit()); if (serializedInputSplit.isEmpty()) { return null; } else { InputSplit inputSplit = InstantiationUtil.deserializeObject(serializedInputSplit.getInputSplitData(), userCodeClassLoader); assignedInutSplits.putIfAbsent(operatorID, new ArrayList<>(1)); assignedInutSplits.get(operatorID).add(inputSplit); return inputSplit; } } catch (Exception e) { throw new InputSplitProviderException("Requesting the next input split failed.", e); } }
allTaskExecutionStatus.size(), jobId, jobMasterGateway.getAddress()); CompletableFuture<TaskExecutorReportResponse> acceptedSlotsFuture = jobMasterGateway.reportTasksExecutionStatus( getResourceID(), allTaskExecutionStatus,
} catch (IOException e) { log.warn("Could not properly disassociate from JobManager {}.", jobManagerConnection.getJobManagerGateway().getAddress(), e);
/** * This method should be called by the framework once it detects that a currently registered * job manager has failed. * * @param jobId identifying the job whose leader shall be disconnected. * @param cause The exception which cause the JobManager failed. */ protected void closeJobManagerConnection(JobID jobId, Exception cause) { JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.remove(jobId); if (jobManagerRegistration != null) { final ResourceID jobManagerResourceId = jobManagerRegistration.getJobManagerResourceID(); final JobMasterGateway jobMasterGateway = jobManagerRegistration.getJobManagerGateway(); final UUID jobManagerLeaderId = jobManagerRegistration.getLeaderID(); log.info("Disconnect job manager {}@{} for job {} from the resource manager.", jobManagerLeaderId, jobMasterGateway.getAddress(), jobId); jobManagerHeartbeatManager.unmonitorTarget(jobManagerResourceId); jmResourceIdRegistrations.remove(jobManagerResourceId); // tell the job manager about the disconnect jobMasterGateway.disconnectResourceManager(jobManagerLeaderId, getLeaderSessionId(), cause); } else { log.debug("There was no registered job manager for job {}.", jobId); } }
jobMasterGateway.failSlot(getResourceID(), offer.getAllocationId(), new Exception(message)); jobMasterGateway.failSlot(getResourceID(), offer.getAllocationId(), new Exception(message)); continue; CompletableFuture<Collection<SlotOffer>> acceptedSlotsFuture = jobMasterGateway.offerSlots( getResourceID(), reservedSlots,
@Override public CompletableFuture<ExecutionState> requestPartitionProducerState( JobID jobId, IntermediateDataSetID resultId, ResultPartitionID partitionId) { return jobMasterGateway.requestPartitionState(resultId, partitionId); } }
@Override public void notifyPartitionConsumable(JobID jobId, ResultPartitionID partitionId, final TaskActions taskActions) { CompletableFuture<Acknowledge> acknowledgeFuture = jobMasterGateway.scheduleOrUpdateConsumers(partitionId, timeout); acknowledgeFuture.whenCompleteAsync( (Acknowledge ack, Throwable throwable) -> { if (throwable != null) { LOG.error("Could not schedule or update consumers at the JobManager.", throwable); taskActions.failExternally(new RuntimeException("Could not notify JobManager to schedule or update consumers.", throwable)); } }, executor); }
@Override public InputSplit getNextInputSplit(ClassLoader userCodeClassLoader) throws InputSplitProviderException { Preconditions.checkNotNull(userCodeClassLoader); CompletableFuture<SerializedInputSplit> futureInputSplit = jobMasterGateway.requestNextInputSplit( jobVertexID, executionAttemptID); try { SerializedInputSplit serializedInputSplit = futureInputSplit.get(timeout.getSize(), timeout.getUnit()); if (serializedInputSplit.isEmpty()) { return null; } else { return InstantiationUtil.deserializeObject(serializedInputSplit.getInputSplitData(), userCodeClassLoader); } } catch (Exception e) { throw new InputSplitProviderException("Requesting the next input split failed.", e); } } }
} catch (IOException e) { log.warn("Could not properly disassociate from JobManager {}.", jobManagerConnection.getJobManagerGateway().getAddress(), e);
/** * This method should be called by the framework once it detects that a currently registered * job manager has failed. * * @param jobId identifying the job whose leader shall be disconnected. * @param cause The exception which cause the JobManager failed. */ protected void closeJobManagerConnection(JobID jobId, Exception cause) { JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.remove(jobId); if (jobManagerRegistration != null) { final ResourceID jobManagerResourceId = jobManagerRegistration.getJobManagerResourceID(); final JobMasterGateway jobMasterGateway = jobManagerRegistration.getJobManagerGateway(); final JobMasterId jobMasterId = jobManagerRegistration.getJobMasterId(); log.info("Disconnect job manager {}@{} for job {} from the resource manager.", jobMasterId, jobMasterGateway.getAddress(), jobId); jobManagerHeartbeatManager.unmonitorTarget(jobManagerResourceId); jmResourceIdRegistrations.remove(jobManagerResourceId); // tell the job manager about the disconnect jobMasterGateway.disconnectResourceManager(getFencingToken(), cause); } else { log.debug("There was no registered job manager for job {}.", jobId); } }
CompletableFuture<Collection<SlotOffer>> acceptedSlotsFuture = jobMasterGateway.offerSlots( getResourceID(), reservedSlots, jobMasterGateway.failSlot( getResourceID(), acceptedSlot.getAllocationId(), jobMasterGateway.failSlot( getResourceID(), acceptedSlot.getAllocationId(),
@Override public void notifyHeartbeatTimeout(ResourceID resourceID) { jobMasterGateway.disconnectTaskManager( resourceID, new TimeoutException("Heartbeat of TaskManager with id " + resourceID + " timed out.")); }
@Override public CompletableFuture<ExecutionState> requestPartitionProducerState( JobID jobId, IntermediateDataSetID resultId, ResultPartitionID partitionId) { return jobMasterGateway.requestPartitionState(resultId, partitionId); }