@Override protected void handleMessage(Object message) { // check for YARN specific actor messages first if (message instanceof ContainersAllocated) { containersAllocated(((ContainersAllocated) message).containers()); } else if (message instanceof ContainersComplete) { containersComplete(((ContainersComplete) message).containers()); } else { // message handled by the generic resource master code super.handleMessage(message); } }
checkWorkersPool(); adjustDesignatedNumberOfWorkers(msg.numberOfWorkers()); removeRegisteredResource(msg.resourceId()); handleResourceStarted(sender(), msg.getResourceID()); newJobManagerLeaderAvailable(msg.leaderAddress(), msg.leaderSessionId()); triggerConnectingToJobManager(msg.jobManagerAddress()); jobManagerLeaderConnected(msg.jobManager(), msg.currentlyRegisteredTaskManagers()); shutdownCluster(msg.finalStatus(), msg.message()); sender().tell(decorateMessage(StopClusterSuccessful.getInstance()), ActorRef.noSender()); infoMessageListeners.add(sender()); sender().tell(decorateMessage( RegisterInfoMessageListenerSuccessful.get()), infoMessageListeners.remove(sender()); fatalError(fatalErrorOccurred.message(), fatalErrorOccurred.error()); fatalError("Error processing actor message", t);
Collection<WorkerType> consolidated = reacceptRegisteredWorkers(workers); LOG.info("Consolidated {} TaskManagers", consolidated.size()); releasePendingWorker(id); checkWorkersPool(); fatalError(msg, new Exception(msg));
protected void sendInfoMessage(String message) { for (ActorRef listener : infoMessageListeners) { listener.tell(decorateMessage(new InfoMessage(message)), self()); } }
/** * This method causes the resource framework master to <b>synchronously</b>re-examine * the set of available and pending workers containers, and allocate containers * if needed. * * This method does not automatically release workers, because it is not visible to * this resource master which workers can be released. Instead, the JobManager must * explicitly release individual workers. */ private void checkWorkersPool() { int numWorkersPending = getNumWorkerRequestsPending(); int numWorkersPendingRegistration = getNumWorkersPendingRegistration(); // sanity checks Preconditions.checkState(numWorkersPending >= 0, "Number of pending workers should never be below 0."); Preconditions.checkState(numWorkersPendingRegistration >= 0, "Number of pending workers pending registration should never be below 0."); // see how many workers we want, and whether we have enough int allAvailableAndPending = startedWorkers.size() + numWorkersPending + numWorkersPendingRegistration; int missing = designatedPoolSize - allAvailableAndPending; if (missing > 0) { requestNewWorkers(missing); } }
/** * Tells the ResourceManager that a TaskManager had been started in a container with the given * resource id. * * @param jobManager The sender (JobManager) of the message * @param resourceID The resource id of the started TaskManager */ private void handleResourceStarted(ActorRef jobManager, ResourceID resourceID) { if (resourceID != null) { // check if resourceID is already registered (TaskManager may send duplicate register messages) WorkerType oldWorker = startedWorkers.get(resourceID); if (oldWorker != null) { LOG.debug("Notification that TaskManager {} had been started was sent before.", resourceID); } else { WorkerType newWorker = workerStarted(resourceID); if (newWorker != null) { startedWorkers.put(resourceID, newWorker); LOG.info("TaskManager {} has started.", resourceID); } else { LOG.info("TaskManager {} has not been started by this resource manager.", resourceID); } } } // Acknowledge the resource registration jobManager.tell(decorateMessage(Acknowledge.get()), self()); }
final Object registerMessage = decorateMessage(new RegisterResourceManager(self())); final Object retryMessage = decorateMessage(new TriggerRegistrationAtJobManager(leaderAddress)); ActorSelection jobManagerSel = context().actorSelection(leaderAddress); Future<Object> future = Patterns.ask(jobManagerSel, registerMessage, new Timeout(messageTimeout)); }, context().dispatcher());
/** * Sets the designated worker pool size. If this size is larger than the current pool * size, then the resource manager will try to acquire more TaskManagers. * * @param num The number of workers in the pool. */ private void adjustDesignatedNumberOfWorkers(int num) { if (num >= 0) { LOG.info("Adjusting designated worker pool size to {}", num); designatedPoolSize = num; checkWorkersPool(); } else { LOG.warn("Ignoring invalid designated worker pool size: " + num); } }
/** * Starts the resource manager actors. * @param configuration The configuration for the resource manager * @param actorSystem The actor system to start the resource manager in * @param leaderRetriever The leader retriever service to intialize the resource manager * @param resourceManagerClass The class of the ResourceManager to be started * @param resourceManagerActorName The name of the resource manager actor. * @return ActorRef of the resource manager */ public static ActorRef startResourceManagerActors( Configuration configuration, ActorSystem actorSystem, LeaderRetrievalService leaderRetriever, Class<? extends FlinkResourceManager<?>> resourceManagerClass, String resourceManagerActorName) { Props resourceMasterProps = getResourceManagerProps( resourceManagerClass, configuration, leaderRetriever); return actorSystem.actorOf(resourceMasterProps, resourceManagerActorName); }
checkWorkersPool(); adjustDesignatedNumberOfWorkers(msg.numberOfWorkers()); removeRegisteredResource(msg.resourceId()); handleResourceStarted(sender(), msg.getResourceID()); newJobManagerLeaderAvailable(msg.leaderAddress(), msg.leaderSessionId()); triggerConnectingToJobManager(msg.jobManagerAddress()); jobManagerLeaderConnected(msg.jobManager(), msg.currentlyRegisteredTaskManagers()); shutdownCluster(msg.finalStatus(), msg.message()); sender().tell(decorateMessage(StopClusterSuccessful.getInstance()), ActorRef.noSender()); infoMessageListeners.add(sender()); sender().tell(decorateMessage( RegisterInfoMessageListenerSuccessful.get()), infoMessageListeners.remove(sender()); fatalError(fatalErrorOccurred.message(), fatalErrorOccurred.error()); fatalError("Error processing actor message", t);
Collection<WorkerType> consolidated = reacceptRegisteredWorkers(workers); LOG.info("Consolidated {} TaskManagers", consolidated.size()); releasePendingWorker(id); checkWorkersPool(); fatalError(msg, new Exception(msg));
protected void sendInfoMessage(String message) { for (ActorRef listener : infoMessageListeners) { listener.tell(decorateMessage(new InfoMessage(message)), self()); } }
/** * This method causes the resource framework master to <b>synchronously</b>re-examine * the set of available and pending workers containers, and allocate containers * if needed. * * This method does not automatically release workers, because it is not visible to * this resource master which workers can be released. Instead, the JobManager must * explicitly release individual workers. */ private void checkWorkersPool() { int numWorkersPending = getNumWorkerRequestsPending(); int numWorkersPendingRegistration = getNumWorkersPendingRegistration(); // sanity checks Preconditions.checkState(numWorkersPending >= 0, "Number of pending workers should never be below 0."); Preconditions.checkState(numWorkersPendingRegistration >= 0, "Number of pending workers pending registration should never be below 0."); // see how many workers we want, and whether we have enough int allAvailableAndPending = startedWorkers.size() + numWorkersPending + numWorkersPendingRegistration; int missing = designatedPoolSize - allAvailableAndPending; if (missing > 0) { requestNewWorkers(missing); } }
/** * Tells the ResourceManager that a TaskManager had been started in a container with the given * resource id. * * @param jobManager The sender (JobManager) of the message * @param resourceID The resource id of the started TaskManager */ private void handleResourceStarted(ActorRef jobManager, ResourceID resourceID) { if (resourceID != null) { // check if resourceID is already registered (TaskManager may send duplicate register messages) WorkerType oldWorker = startedWorkers.get(resourceID); if (oldWorker != null) { LOG.debug("Notification that TaskManager {} had been started was sent before.", resourceID); } else { WorkerType newWorker = workerStarted(resourceID); if (newWorker != null) { startedWorkers.put(resourceID, newWorker); LOG.info("TaskManager {} has started.", resourceID); } else { LOG.info("TaskManager {} has not been started by this resource manager.", resourceID); } } } // Acknowledge the resource registration jobManager.tell(decorateMessage(Acknowledge.get()), self()); }
final Object registerMessage = decorateMessage(new RegisterResourceManager(self())); final Object retryMessage = decorateMessage(new TriggerRegistrationAtJobManager(leaderAddress)); ActorSelection jobManagerSel = context().actorSelection(leaderAddress); Future<Object> future = Patterns.ask(jobManagerSel, registerMessage, new Timeout(messageTimeout)); }, context().dispatcher());
/** * Sets the designated worker pool size. If this size is larger than the current pool * size, then the resource manager will try to acquire more TaskManagers. * * @param num The number of workers in the pool. */ private void adjustDesignatedNumberOfWorkers(int num) { if (num >= 0) { LOG.info("Adjusting designated worker pool size to {}", num); designatedPoolSize = num; checkWorkersPool(); } else { LOG.warn("Ignoring invalid designated worker pool size: " + num); } }
/** * Starts the resource manager actors. * @param configuration The configuration for the resource manager * @param actorSystem The actor system to start the resource manager in * @param leaderRetriever The leader retriever service to initialize the resource manager * @param resourceManagerClass The class of the ResourceManager to be started * @param resourceManagerActorName The name of the resource manager actor. * @return ActorRef of the resource manager */ public static ActorRef startResourceManagerActors( Configuration configuration, ActorSystem actorSystem, LeaderRetrievalService leaderRetriever, Class<? extends FlinkResourceManager<?>> resourceManagerClass, String resourceManagerActorName) { Props resourceMasterProps = getResourceManagerProps( resourceManagerClass, configuration, leaderRetriever); return actorSystem.actorOf(resourceMasterProps, resourceManagerActorName); }
checkWorkersPool(); adjustDesignatedNumberOfWorkers(msg.numberOfWorkers()); removeRegisteredResource(msg.resourceId()); handleResourceStarted(sender(), msg.getResourceID()); newJobManagerLeaderAvailable(msg.leaderAddress(), msg.leaderSessionId()); triggerConnectingToJobManager(msg.jobManagerAddress()); jobManagerLeaderConnected(msg.jobManager(), msg.currentlyRegisteredTaskManagers()); shutdownCluster(msg.finalStatus(), msg.message()); sender().tell(decorateMessage(StopClusterSuccessful.getInstance()), ActorRef.noSender()); infoMessageListeners.add(sender()); sender().tell(decorateMessage( RegisterInfoMessageListenerSuccessful.get()), infoMessageListeners.remove(sender()); fatalError(fatalErrorOccurred.message(), fatalErrorOccurred.error()); fatalError("Error processing actor message", t);
Collection<WorkerType> consolidated = reacceptRegisteredWorkers(workers); LOG.info("Consolidated {} TaskManagers", consolidated.size()); releasePendingWorker(id); checkWorkersPool(); fatalError(msg, new Exception(msg));
protected void sendInfoMessage(String message) { for (ActorRef listener : infoMessageListeners) { listener.tell(decorateMessage(new InfoMessage(message)), self()); } }