public SerializingLongReceiver createReceiver() throws Exception { TaskManagerLocation senderLocation = new TaskManagerLocation( ResourceID.generate(), LOCAL_ADDRESS, senderEnv.getConnectionManager().getDataPort()); InputGate receiverGate = createInputGate( jobId, dataSetID, executionAttemptID, senderLocation, receiverEnv, channels); SerializingLongReceiver receiver = new SerializingLongReceiver(receiverGate, channels * partitionIds.length); receiver.start(); return receiver; }
private static String getHostnamesFromInstances(Iterable<TaskManagerLocation> locations) { StringBuilder bld = new StringBuilder(); boolean successive = false; for (TaskManagerLocation loc : locations) { if (successive) { bld.append(", "); } else { successive = true; } bld.append(loc.getHostname()); } return bld.toString(); }
AllocatedSlot tryRemove(AllocationID slotId) { final SlotAndTimestamp sat = availableSlots.remove(slotId); if (sat != null) { final AllocatedSlot slot = sat.slot(); final ResourceID resourceID = slot.getTaskManagerLocation().getResourceID(); final String host = slot.getTaskManagerLocation().getFQDNHostname(); final Set<AllocatedSlot> slotsForTm = availableSlotsByTaskManager.get(resourceID); final Set<AllocatedSlot> slotsForHost = availableSlotsByHost.get(host); slotsForTm.remove(slot); slotsForHost.remove(slot); if (slotsForTm.isEmpty()) { availableSlotsByTaskManager.remove(resourceID); } if (slotsForHost.isEmpty()) { availableSlotsByHost.remove(host); } return slot; } else { return null; } }
String locationString = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort();
Instance prior = registeredHostsByResource.get(taskManagerLocation.getResourceID()); if (prior != null) { throw new IllegalStateException("Registration attempt from TaskManager at " + taskManagerLocation.addressString() + ". This connection is already registered under ID " + prior.getId()); boolean wasDead = this.deadHosts.remove(taskManagerLocation.getResourceID()); if (wasDead) { LOG.info("Registering TaskManager at " + taskManagerLocation.addressString() + " which was marked as dead earlier because of a heart-beat timeout."); registeredHostsByResource.put(taskManagerLocation.getResourceID(), host); "Current number of registered hosts is %d. " + "Current number of alive task slots is %d.", taskManagerLocation.getHostname(), taskManagerGateway.getAddress(), instanceID,
public static SubtaskExecutionAttemptInfo create(AccessExecution execution) { final ExecutionState status = execution.getState(); final long now = System.currentTimeMillis(); final TaskManagerLocation location = execution.getAssignedResourceLocation(); final String locationString = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort(); final ResourceID resourceID = location == null ? new ResourceID("(unassigned)") : location.getResourceID(); long startTime = execution.getStateTimestamp(ExecutionState.DEPLOYING); if (startTime == 0) { startTime = -1; } final long endTime = status.isTerminal() ? execution.getStateTimestamp(status) : -1; final long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1; final String failureCause = execution.getFailureCauseAsString(); return new SubtaskExecutionAttemptInfo( execution.getAttemptId(), status, execution.getAttemptNumber(), locationString, startTime, endTime, duration, failureCause, resourceID ); } }
location.getFQDNHostname() + ':' + location.dataPort() : "(unassigned)";
public ResourceID getTaskManagerID() { return location.getResourceID(); }
network.start(); final TaskManagerLocation taskManagerLocation = new TaskManagerLocation( resourceID, taskManagerServicesConfiguration.getTaskManagerAddress(), taskManagerLocation.getHostname(), taskManagerLocation.getResourceID().toString());
public static TaskManagerMetricGroup instantiateTaskManagerMetricGroup( MetricRegistry metricRegistry, TaskManagerLocation taskManagerLocation, NetworkEnvironment network, Optional<Time> systemResourceProbeInterval) { final TaskManagerMetricGroup taskManagerMetricGroup = new TaskManagerMetricGroup( metricRegistry, taskManagerLocation.getHostname(), taskManagerLocation.getResourceID().toString()); MetricGroup statusGroup = taskManagerMetricGroup.addGroup(METRIC_GROUP_STATUS_NAME); // Initialize the TM metrics instantiateStatusMetrics(statusGroup); MetricGroup networkGroup = statusGroup .addGroup("Network"); instantiateNetworkMetrics(networkGroup, network); if (systemResourceProbeInterval.isPresent()) { instantiateSystemMetrics(taskManagerMetricGroup, systemResourceProbeInterval.get()); } return taskManagerMetricGroup; }
private void connectToResourceManager() { assert(resourceManagerAddress != null); assert(establishedResourceManagerConnection == null); assert(resourceManagerConnection == null); log.info("Connecting to ResourceManager {}.", resourceManagerAddress); resourceManagerConnection = new TaskExecutorToResourceManagerConnection( log, getRpcService(), getAddress(), getResourceID(), taskManagerLocation.dataPort(), hardwareDescription, resourceManagerAddress.getAddress(), resourceManagerAddress.getResourceManagerId(), getMainThreadExecutor(), new ResourceManagerRegistrationListener()); resourceManagerConnection.start(); }
/** * Remove all available slots come from specified TaskManager. * * @param taskManager The id of the TaskManager * @return The set of removed slots for the given TaskManager */ Set<AllocatedSlot> removeAllForTaskManager(final ResourceID taskManager) { // remove from the by-TaskManager view final Set<AllocatedSlot> slotsForTm = availableSlotsByTaskManager.remove(taskManager); if (slotsForTm != null && slotsForTm.size() > 0) { final String host = slotsForTm.iterator().next().getTaskManagerLocation().getFQDNHostname(); final Set<AllocatedSlot> slotsForHost = availableSlotsByHost.get(host); // remove from the base set and the by-host view for (AllocatedSlot slot : slotsForTm) { availableSlots.remove(slot.getAllocationId()); slotsForHost.remove(slot); } if (slotsForHost.isEmpty()) { availableSlotsByHost.remove(host); } return slotsForTm; } else { return Collections.emptySet(); } }
if (taskManagerLocationFuture.isDone() && !slot.getTaskManagerLocation().equals(taskManagerLocationFuture.getNow(null))) { ASSIGNED_SLOT_UPDATER.compareAndSet(this, slot, null); reconcileFuture.complete(attemptId);
/** * Unregisters the TaskManager with the given instance id. Unregistering means to mark * the given instance as dead and notify {@link InstanceListener} about the dead instance. * * @param instanceId TaskManager which is about to be marked dead. */ public void unregisterTaskManager(InstanceID instanceId, boolean terminated){ Instance instance = registeredHostsById.get(instanceId); if (instance != null){ registeredHostsById.remove(instance.getId()); registeredHostsByResource.remove(instance.getTaskManagerID()); if (terminated) { deadHosts.add(instance.getTaskManagerID()); } instance.markDead(); totalNumberOfAliveTaskSlots -= instance.getTotalNumberOfSlots(); notifyDeadInstance(instance); LOG.info( "Unregistered task manager " + instance.getTaskManagerLocation().addressString() + ". Number of registered task managers " + getNumberOfRegisteredTaskManagers() + ". Number of available slots " + getTotalNumberOfSlots() + "."); } else { LOG.warn("Tried to unregister instance {} but it is not registered.", instanceId); } }
@Override public ResultPartitionLocation getResultPartitionLocation( TaskManagerLocation producerLocation, TaskManagerLocation consumerLocation, IntermediateResult intermediateResult) { // use the yarn shuffle service data port Integer dataPort = configuration.getInteger( ExternalBlockShuffleServiceOptions.FLINK_SHUFFLE_SERVICE_PORT_KEY); // use the taskmanager ip address, for the shuffle service deployed on the same host // of the taskmanager is used to shuffle data to down streams. InetSocketAddress address = new InetSocketAddress(producerLocation.address(), dataPort); ConnectionID connectionId = new ConnectionID(address, intermediateResult.getConnectionIndex()); return ResultPartitionLocation.createRemote(connectionId); } }
boolean tryRemove(AllocationID slotId) { final SlotAndTimestamp sat = availableSlots.remove(slotId); if (sat != null) { final AllocatedSlot slot = sat.slot(); final ResourceID resourceID = slot.getTaskManagerLocation().getResourceID(); final String host = slot.getTaskManagerLocation().getFQDNHostname(); final Set<AllocatedSlot> slotsForTm = availableSlotsByTaskManager.get(resourceID); final Set<AllocatedSlot> slotsForHost = availableSlotsByHost.get(host); slotsForTm.remove(slot); slotsForHost.remove(slot); if (slotsForTm.isEmpty()) { availableSlotsByTaskManager.remove(resourceID); } if (slotsForHost.isEmpty()) { availableSlotsByHost.remove(host); } return true; } else { return false; } }
final String locationString = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort();
Instance prior = registeredHostsByResource.get(taskManagerLocation.getResourceID()); if (prior != null) { throw new IllegalStateException("Registration attempt from TaskManager at " + taskManagerLocation.addressString() + ". This connection is already registered under ID " + prior.getId()); boolean wasDead = this.deadHosts.remove(taskManagerLocation.getResourceID()); if (wasDead) { LOG.info("Registering TaskManager at " + taskManagerLocation.addressString() + " which was marked as dead earlier because of a heart-beat timeout."); registeredHostsByResource.put(taskManagerLocation.getResourceID(), host); "Current number of registered hosts is %d. " + "Current number of alive task slots is %d.", taskManagerLocation.getHostname(), taskManagerGateway.getAddress(), instanceID,
for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) { TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation(); String taskManager = location == null ? "(unassigned)" : location.getHostname() + ':' + location.dataPort(); List<AccessExecutionVertex> vertices = taskManagerVertices.computeIfAbsent( taskManager, TaskManagerLocation taskManagerLocation = taskVertices.get(0).getCurrentAssignedResourceLocation(); if (taskManagerLocation != null) { resourceId = taskManagerLocation.getResourceID().getResourceIdString();