@Override public GetClusterStatusResponse getClusterStatus() { ActorGateway jmGateway; try { jmGateway = getJobManagerGateway(); Future<Object> future = jmGateway.ask(GetClusterStatus.getInstance(), timeout); Object result = Await.result(future, timeout); if (result instanceof GetClusterStatusResponse) { return (GetClusterStatusResponse) result; } else { throw new RuntimeException("Received the wrong reply " + result + " from cluster."); } } catch (Exception e) { throw new RuntimeException("Couldn't retrieve the cluster status.", e); } }
/** * This method is only available if the cluster hasn't been started in detached mode. */ @Override public GetClusterStatusResponse getClusterStatus() { try { final Future<Object> clusterStatusOption = getJobManagerGateway().ask( GetClusterStatus.getInstance(), akkaDuration); return (GetClusterStatusResponse) Await.result(clusterStatusOption, akkaDuration); } catch (Exception e) { throw new RuntimeException("Unable to get ClusterClient status from Application Client", e); } }
/** * Requests the {@link JobStatus} of the job with the given {@link JobID}. */ public CompletableFuture<JobStatus> getJobStatus(JobID jobId) { final ActorGateway jobManager; try { jobManager = getJobManagerGateway(); } catch (FlinkException e) { throw new RuntimeException("Could not retrieve JobManage gateway.", e); } Future<Object> response = jobManager.ask(JobManagerMessages.getRequestJobStatus(jobId), timeout); CompletableFuture<Object> javaFuture = FutureUtils.toJava(response); return javaFuture.thenApply((responseMessage) -> { if (responseMessage instanceof JobManagerMessages.CurrentJobStatus) { return ((JobManagerMessages.CurrentJobStatus) responseMessage).status(); } else if (responseMessage instanceof JobManagerMessages.JobNotFound) { throw new CompletionException( new IllegalStateException("Could not find job with JobId " + jobId)); } else { throw new CompletionException( new IllegalStateException("Unknown JobManager response of type " + responseMessage.getClass())); } }); }
private void stopAfterJob(JobID jobID) { Preconditions.checkNotNull(jobID, "The job id must not be null"); try { Future<Object> replyFuture = getJobManagerGateway().ask( new ShutdownClusterAfterJob(jobID), akkaDuration); Await.ready(replyFuture, akkaDuration); } catch (Exception e) { throw new RuntimeException("Unable to tell application master to stop once the specified job has been finised", e); } }
/** * Cancels a job identified by the job id. * @param jobId the job id * @throws Exception In case an error occurred. */ public void cancel(JobID jobId) throws Exception { final ActorGateway jobManager = getJobManagerGateway(); Object cancelMsg = new JobManagerMessages.CancelJob(jobId); Future<Object> response = jobManager.ask(cancelMsg, timeout); final Object rc = Await.result(response, timeout); if (rc instanceof JobManagerMessages.CancellationSuccess) { // no further action required } else if (rc instanceof JobManagerMessages.CancellationFailure) { throw new Exception("Canceling the job with ID " + jobId + " failed.", ((JobManagerMessages.CancellationFailure) rc).cause()); } else { throw new IllegalStateException("Unexpected response: " + rc); } }
/** * Cancels a job identified by the job id and triggers a savepoint. * @param jobId the job id * @param savepointDirectory directory the savepoint should be written to * @return path where the savepoint is located * @throws Exception In case an error occurred. */ public String cancelWithSavepoint(JobID jobId, @Nullable String savepointDirectory) throws Exception { final ActorGateway jobManager = getJobManagerGateway(); Object cancelMsg = new JobManagerMessages.CancelJobWithSavepoint(jobId, savepointDirectory); Future<Object> response = jobManager.ask(cancelMsg, timeout); final Object rc = Await.result(response, timeout); if (rc instanceof JobManagerMessages.CancellationSuccess) { JobManagerMessages.CancellationSuccess success = (JobManagerMessages.CancellationSuccess) rc; return success.savepointPath(); } else if (rc instanceof JobManagerMessages.CancellationFailure) { throw new Exception("Cancel & savepoint for the job with ID " + jobId + " failed.", ((JobManagerMessages.CancellationFailure) rc).cause()); } else { throw new IllegalStateException("Unexpected response: " + rc); } }
/** * Stops a program on Flink cluster whose job-manager is configured in this client's configuration. * Stopping works only for streaming programs. Be aware, that the program might continue to run for * a while after sending the stop command, because after sources stopped to emit data all operators * need to finish processing. * * @param jobId * the job ID of the streaming program to stop * @throws Exception * If the job ID is invalid (ie, is unknown or refers to a batch job) or if sending the stop signal * failed. That might be due to an I/O problem, ie, the job-manager is unreachable. */ public void stop(final JobID jobId) throws Exception { final ActorGateway jobManager = getJobManagerGateway(); Future<Object> response = jobManager.ask(new JobManagerMessages.StopJob(jobId), timeout); final Object rc = Await.result(response, timeout); if (rc instanceof JobManagerMessages.StoppingSuccess) { // no further action required } else if (rc instanceof JobManagerMessages.StoppingFailure) { throw new Exception("Stopping the job with ID " + jobId + " failed.", ((JobManagerMessages.StoppingFailure) rc).cause()); } else { throw new IllegalStateException("Unexpected response: " + rc); } }
/** * Lists the currently running and finished jobs on the cluster. * * @return future collection of running and finished jobs * @throws Exception if no connection to the cluster could be established */ public CompletableFuture<Collection<JobStatusMessage>> listJobs() throws Exception { final ActorGateway jobManager = getJobManagerGateway(); Future<Object> response = jobManager.ask(new RequestJobDetails(true, false), timeout); CompletableFuture<Object> responseFuture = FutureUtils.toJava(response); return responseFuture.thenApply((responseMessage) -> { if (responseMessage instanceof MultipleJobsDetails) { MultipleJobsDetails details = (MultipleJobsDetails) responseMessage; final Collection<JobDetails> jobDetails = details.getJobs(); Collection<JobStatusMessage> flattenedDetails = new ArrayList<>(jobDetails.size()); jobDetails.forEach(detail -> flattenedDetails.add(new JobStatusMessage(detail.getJobId(), detail.getJobName(), detail.getStatus(), detail.getStartTime()))); return flattenedDetails; } else { throw new CompletionException( new IllegalStateException("Unknown JobManager response of type " + responseMessage.getClass())); } }); }
/** * Requests and returns the accumulators for the given job identifier. Accumulators can be * requested while a is running or after it has finished. * @param jobID The job identifier of a job. * @param loader The class loader for deserializing the accumulator results. * @return A Map containing the accumulator's name and its value. */ public Map<String, OptionalFailure<Object>> getAccumulators(JobID jobID, ClassLoader loader) throws Exception { ActorGateway jobManagerGateway = getJobManagerGateway(); Future<Object> response; try { response = jobManagerGateway.ask(new RequestAccumulatorResults(jobID), timeout); } catch (Exception e) { throw new Exception("Failed to query the job manager gateway for accumulators.", e); } Object result = Await.result(response, timeout); if (result instanceof AccumulatorResultsFound) { Map<String, SerializedValue<OptionalFailure<Object>>> serializedAccumulators = ((AccumulatorResultsFound) result).result(); return AccumulatorHelper.deserializeAccumulators(serializedAccumulators, loader); } else if (result instanceof AccumulatorResultsErroneous) { throw ((AccumulatorResultsErroneous) result).cause(); } else { throw new Exception("Failed to fetch accumulators for the job " + jobID + "."); } }
/** * Triggers a savepoint for the job identified by the job id. The savepoint will be written to the given savepoint * directory, or {@link org.apache.flink.configuration.CheckpointingOptions#SAVEPOINT_DIRECTORY} if it is null. * * @param jobId job id * @param savepointDirectory directory the savepoint should be written to * @return path future where the savepoint is located * @throws FlinkException if no connection to the cluster could be established */ public CompletableFuture<String> triggerSavepoint(JobID jobId, @Nullable String savepointDirectory) throws FlinkException { final ActorGateway jobManager = getJobManagerGateway(); Future<Object> response = jobManager.ask(new JobManagerMessages.TriggerSavepoint(jobId, Option.<String>apply(savepointDirectory)), new FiniteDuration(1, TimeUnit.HOURS)); CompletableFuture<Object> responseFuture = FutureUtils.toJava(response); return responseFuture.thenApply((responseMessage) -> { if (responseMessage instanceof JobManagerMessages.TriggerSavepointSuccess) { JobManagerMessages.TriggerSavepointSuccess success = (JobManagerMessages.TriggerSavepointSuccess) responseMessage; return success.savepointPath(); } else if (responseMessage instanceof JobManagerMessages.TriggerSavepointFailure) { JobManagerMessages.TriggerSavepointFailure failure = (JobManagerMessages.TriggerSavepointFailure) responseMessage; throw new CompletionException(failure.cause()); } else { throw new CompletionException( new IllegalStateException("Unknown JobManager response of type " + responseMessage.getClass())); } }); }
public CompletableFuture<Acknowledge> disposeSavepoint(String savepointPath) throws FlinkException { final ActorGateway jobManager = getJobManagerGateway(); Object msg = new JobManagerMessages.DisposeSavepoint(savepointPath); CompletableFuture<Object> responseFuture = FutureUtils.toJava( jobManager.ask( msg, timeout)); return responseFuture.thenApply( (Object response) -> { if (response instanceof JobManagerMessages.DisposeSavepointSuccess$) { return Acknowledge.get(); } else if (response instanceof JobManagerMessages.DisposeSavepointFailure) { JobManagerMessages.DisposeSavepointFailure failureResponse = (JobManagerMessages.DisposeSavepointFailure) response; if (failureResponse.cause() instanceof ClassNotFoundException) { throw new CompletionException( new ClassNotFoundException("Savepoint disposal failed, because of a " + "missing class. This is most likely caused by a custom state " + "instance, which cannot be disposed without the user code class " + "loader. Please provide the program jar with which you have created " + "the savepoint via -j <JAR> for disposal.", failureResponse.cause().getCause())); } else { throw new CompletionException(failureResponse.cause()); } } else { throw new CompletionException(new FlinkRuntimeException("Unknown response type " + response.getClass().getSimpleName() + '.')); } }); }
Future<Object> registeredTMs = leadingJM.ask( JobManagerMessages.getRequestNumberRegisteredTaskManager(), timeout);
Future<Object> future = jm.ask(new WaitForAllVerticesToBeRunningOrFinished(graph.getJobID()), deadline.timeLeft());
private void stopAfterJob(ClusterClient client, JobID jobID) { Preconditions.checkNotNull(jobID, "The job id must not be null"); try { Future<Object> replyFuture = client.getJobManagerGateway().ask( new ShutdownClusterAfterJob(jobID), AKKA_TIMEOUT); Await.ready(replyFuture, AKKA_TIMEOUT); } catch (Exception e) { throw new RuntimeException("Unable to tell application master to stop" + " once the specified job has been finished", e); } } }
@Test public void testDeployerWithIsolatedConfiguration() throws Exception { YarnClusterConfiguration clusterConf = mock(YarnClusterConfiguration.class); doReturn(new YarnConfiguration()).when(clusterConf).conf(); ScheduledExecutorService executor = mock(ScheduledExecutorService.class); Configuration flinkConf = new Configuration(); YarnClient client = mock(YarnClient.class); JobDeployer deploy = new JobDeployer(clusterConf, client, executor, flinkConf); AthenaXYarnClusterDescriptor desc = mock(AthenaXYarnClusterDescriptor.class); YarnClusterClient clusterClient = mock(YarnClusterClient.class); doReturn(clusterClient).when(desc).deploy(); ActorGateway actorGateway = mock(ActorGateway.class); doReturn(actorGateway).when(clusterClient).getJobManagerGateway(); doReturn(Future$.MODULE$.successful(null)).when(actorGateway).ask(any(), any()); JobGraph jobGraph = mock(JobGraph.class); doReturn(JobID.generate()).when(jobGraph).getJobID(); deploy.start(desc, jobGraph); verify(clusterClient).runDetached(jobGraph, null); } }
@Override public CompletableFuture<Collection<Instance>> requestTaskManagerInstances(Time timeout) { CompletableFuture<JobManagerMessages.RegisteredTaskManagers> taskManagersFuture = FutureUtils.toJava( jobManagerGateway .ask(JobManagerMessages.getRequestRegisteredTaskManagers(), FutureUtils.toFiniteDuration(timeout)) .mapTo(ClassTag$.MODULE$.apply(JobManagerMessages.RegisteredTaskManagers.class))); return taskManagersFuture.thenApply( JobManagerMessages.RegisteredTaskManagers::asJavaCollection); }
@Override public CompletableFuture<MultipleJobsDetails> requestMultipleJobDetails(Time timeout) { return FutureUtils.toJava( jobManagerGateway .ask(new RequestJobDetails(true, true), FutureUtils.toFiniteDuration(timeout)) .mapTo(ClassTag$.MODULE$.apply(MultipleJobsDetails.class))); }
@Override public Future<Acknowledge> submitTask(TaskDeploymentDescriptor tdd, Time timeout) { Preconditions.checkNotNull(tdd); Preconditions.checkNotNull(timeout); scala.concurrent.Future<Acknowledge> submitResult = actorGateway.ask( new TaskMessages.SubmitTask(tdd), new FiniteDuration(timeout.getSize(), timeout.getUnit())) .mapTo(ClassTag$.MODULE$.<Acknowledge>apply(Acknowledge.class)); return new FlinkFuture<>(submitResult); }
@Override public CompletableFuture<Acknowledge> submitTask(TaskDeploymentDescriptor tdd, Time timeout) { Preconditions.checkNotNull(tdd); Preconditions.checkNotNull(timeout); scala.concurrent.Future<Acknowledge> submitResult = actorGateway.ask( new TaskMessages.SubmitTask(tdd), new FiniteDuration(timeout.getSize(), timeout.getUnit())) .mapTo(ClassTag$.MODULE$.<Acknowledge>apply(Acknowledge.class)); return FutureUtils.toJava(submitResult); }
@Override public CompletableFuture<Acknowledge> cancelTask(ExecutionAttemptID executionAttemptID, Time timeout) { Preconditions.checkNotNull(executionAttemptID); Preconditions.checkNotNull(timeout); scala.concurrent.Future<Acknowledge> cancelResult = actorGateway.ask( new TaskMessages.CancelTask(executionAttemptID), new FiniteDuration(timeout.getSize(), timeout.getUnit())) .mapTo(ClassTag$.MODULE$.<Acknowledge>apply(Acknowledge.class)); return FutureUtils.toJava(cancelResult); }