private void waitForJob() throws Exception { for (int i = 0; i < 60; i++) { try { final JobStatus jobStatus = clusterClient.getJobStatus(jobGraph.getJobID()).get(60, TimeUnit.SECONDS); assertThat(jobStatus.isGloballyTerminalState(), equalTo(false)); if (jobStatus == JobStatus.RUNNING) { return; } } catch (ExecutionException ignored) { // JobManagerRunner is not yet registered in Dispatcher } Thread.sleep(1000); } throw new AssertionError("Job did not become running within timeout."); }
public CompletableFuture<JobSubmissionResult> submitJob(@Nonnull JobGraph jobGraph) { jobGraph.setAllowQueuedScheduling(true); CompletableFuture<java.nio.file.Path> jobGraphFileFuture = CompletableFuture.supplyAsync(() -> { try { final java.nio.file.Path jobGraphFile = Files.createTempFile("flink-jobgraph", ".bin"); CompletableFuture<Tuple2<JobSubmitRequestBody, Collection<FileUpload>>> requestFuture = jobGraphFileFuture.thenApply(jobGraphFile -> { List<String> jarFileNames = new ArrayList<>(8); List<JobSubmitRequestBody.DistributedCacheFile> artifactFileNames = new ArrayList<>(8); for (Path jar : jobGraph.getUserJars()) { jarFileNames.add(jar.getName()); filesToUpload.add(new FileUpload(Paths.get(jar.toUri()), RestConstants.CONTENT_TYPE_JAR)); for (Map.Entry<String, DistributedCache.DistributedCacheEntry> artifacts : jobGraph.getUserArtifacts().entrySet()) { artifactFileNames.add(new JobSubmitRequestBody.DistributedCacheFile(artifacts.getKey(), new Path(artifacts.getValue().filePath).getName())); filesToUpload.add(new FileUpload(Paths.get(artifacts.getValue().filePath), RestConstants.CONTENT_TYPE_BINARY)); }); .thenApply( (JobSubmitResponseBody jobSubmitResponseBody) -> new JobSubmissionResult(jobGraph.getJobID())) .exceptionally( (Throwable throwable) -> { throw new CompletionException(new JobSubmissionException(jobGraph.getJobID(), "Failed to submit JobGraph.", ExceptionUtils.stripCompletionException(throwable))); });
JobGraph graph = new JobGraph(jobId, program.getJobName()); try { graph.setExecutionConfig(program.getOriginalPlan().getExecutionConfig()); graph.setAllowQueuedScheduling(false); graph.setSessionTimeout(program.getOriginalPlan().getSessionTimeout()); graph.addVertex(vertex); graph.addVertex(vertex); vertex.setSlotSharingGroup(sharingGroup);
public static JobGraph getJobGraph(Configuration flinkConfig, FlinkPlan optPlan, List<URL> jarFiles, List<URL> classpaths, SavepointRestoreSettings savepointSettings) { JobGraph job; if (optPlan instanceof StreamingPlan) { job = ((StreamingPlan) optPlan).getJobGraph(); job.setSavepointRestoreSettings(savepointSettings); } else { JobGraphGenerator gen = new JobGraphGenerator(flinkConfig); job = gen.compileJobGraph((OptimizedPlan) optPlan); } for (URL jar : jarFiles) { try { job.addJar(new Path(jar.toURI())); } catch (URISyntaxException e) { throw new RuntimeException("URL is invalid. This should not happen.", e); } } job.setClasspaths(classpaths); return job; }
@Override public JobExecutionResult execute(String jobName) throws Exception { final StreamGraph streamGraph = getStreamGraph(); streamGraph.setJobName(jobName); final JobGraph jobGraph = streamGraph.getJobGraph(); for (Path jarFile : jarFiles) { jobGraph.addJar(jarFile); } jobGraph.setClasspaths(new ArrayList<>(classPaths)); return jobExecutor.executeJobBlocking(jobGraph); }
@Override public JobGraph retrieveJobGraph(Configuration configuration) throws FlinkException { final PackagedProgram packagedProgram = createPackagedProgram(); final int defaultParallelism = configuration.getInteger(CoreOptions.DEFAULT_PARALLELISM); try { final JobGraph jobGraph = PackagedProgramUtils.createJobGraph( packagedProgram, configuration, defaultParallelism, FIXED_JOB_ID); jobGraph.setAllowQueuedScheduling(true); jobGraph.setSavepointRestoreSettings(savepointRestoreSettings); return jobGraph; } catch (Exception e) { throw new FlinkException("Could not create the JobGraph from the provided user code jar.", e); } }
private void restoreJob(ClassLoader classLoader, ClusterClient<?> clusterClient, Deadline deadline, String savepointPath) throws Exception { JobGraph jobToRestore = createJobGraph(ExecutionMode.RESTORE); jobToRestore.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath, allowNonRestoredState)); assertNotNull("Job doesn't have a JobID.", jobToRestore.getJobID()); clusterClient.submitJob(jobToRestore, classLoader); CompletableFuture<JobStatus> jobStatusFuture = FutureUtils.retrySuccessfulWithDelay( () -> clusterClient.getJobStatus(jobToRestore.getJobID()), Time.milliseconds(50), deadline, (jobStatus) -> jobStatus == JobStatus.FINISHED, TestingUtils.defaultScheduledExecutor()); assertEquals( JobStatus.FINISHED, jobStatusFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS)); }
@Test public void testPerJobMode() throws Exception { Configuration configuration = new Configuration(); configuration.setString(AkkaOptions.ASK_TIMEOUT, "30 s"); final YarnClient yarnClient = getYarnClient(); .createClusterSpecification(); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.addSource(new NoDataSource()) .shuffle() .addSink(new DiscardingSink<>()); jobGraph.addJar(new org.apache.flink.core.fs.Path(testingJar.toURI())); final RestClusterClient<ApplicationId> restClusterClient = (RestClusterClient<ApplicationId>) clusterClient; final CompletableFuture<JobResult> jobResultCompletableFuture = restClusterClient.requestJobResult(jobGraph.getJobID()); final JobResult jobResult = jobResultCompletableFuture.get();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.enableCheckpointing(100); env.getConfig().disableSysoutLogging(); JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(env.getStreamGraph()); final JobID jobId = jobGraph.getJobID(); assertEquals(JobStatus.CANCELED, client.getJobStatus(jobId).get());
@Override public JobSubmissionResult submitJob(JobGraph jobGraph, ClassLoader classLoader) throws ProgramInvocationException { final CompletableFuture<JobSubmissionResult> jobSubmissionResultFuture = submitJob(jobGraph); if (isDetached()) { try { return jobSubmissionResultFuture.get(); } catch (InterruptedException | ExecutionException e) { ExceptionUtils.checkInterrupted(e); throw new ProgramInvocationException("Could not run job in detached mode.", jobGraph.getJobID(), e); } } else { final CompletableFuture<JobResult> jobResultFuture = jobSubmissionResultFuture.thenCompose( (JobSubmissionResult ignored) -> requestJobResult(jobGraph.getJobID())); final JobResult jobResult; try { jobResult = jobResultFuture.get(); } catch (InterruptedException | ExecutionException e) { ExceptionUtils.checkInterrupted(e); throw new ProgramInvocationException("Could not run job", jobGraph.getJobID(), e); } try { return jobResult.toJobExecutionResult(classLoader); } catch (JobExecutionException e) { throw new ProgramInvocationException("Job failed", jobGraph.getJobID(), e); } catch (IOException | ClassNotFoundException e) { throw new ProgramInvocationException("Job failed", jobGraph.getJobID(), e); } } }
private void tryToSubmitJob() { LOG.info("Sending message to JobManager {} to submit job {} ({}) and wait for progress", jobManager.path().toString(), jobGraph.getName(), jobGraph.getJobID()); Time.milliseconds(timeout.toMillis())); final CompletableFuture<Void> jarUploadFuture = blobServerAddressFuture.thenAcceptAsync( (InetSocketAddress blobServerAddress) -> { try { .thenAccept( (Void ignored) -> { LOG.info("Submit job to the job manager {}.", jobManager.path()); ActorRef.noSender()); }) .whenComplete( (Void ignored, Throwable throwable) -> { if (throwable != null) {
public CompletableFuture<JobSubmissionResult> submitJob(JobGraph jobGraph) { final DispatcherGateway dispatcherGateway; try { dispatcherGateway = getDispatcherGateway(); } catch (LeaderRetrievalException | InterruptedException e) { ExceptionUtils.checkInterrupted(e); return FutureUtils.completedExceptionally(e); } // we have to allow queued scheduling in Flip-6 mode because we need to request slots // from the ResourceManager jobGraph.setAllowQueuedScheduling(true); final CompletableFuture<InetSocketAddress> blobServerAddressFuture = createBlobServerAddress(dispatcherGateway); final CompletableFuture<Void> jarUploadFuture = uploadAndSetJobFiles(blobServerAddressFuture, jobGraph); final CompletableFuture<Acknowledge> acknowledgeCompletableFuture = jarUploadFuture.thenCompose( (Void ack) -> dispatcherGateway.submitJob(jobGraph, rpcTimeout)); return acknowledgeCompletableFuture.thenApply( (Acknowledge ignored) -> new JobSubmissionResult(jobGraph.getJobID())); }
final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 0)); env.enableCheckpointing(10); // Flink doesn't allow lower than 10 ms JobGraph jobGraph = env.getStreamGraph().getJobGraph(); JobID jobID = Preconditions.checkNotNull(jobGraph.getJobID()); TestingUtils.defaultScheduledExecutor()); try { assertEquals(JobStatus.FINISHED, jobStatusFuture.get()); } catch (Throwable e) { out.println("afterMessWithZooKeeper= " + CheckpointBlockingFunction.afterMessWithZooKeeper.get()); out.println("failedAlready= " + CheckpointBlockingFunction.failedAlready.get()); out.println("currentJobStatus= " + clusterClient.getJobStatus(jobID).get()); out.println("numRestarts= " + RestartReporter.numRestarts.getValue()); out.println("threadDump= " + generateThreadDump());
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); final IntegerStreamSource source = new IntegerStreamSource(); IterativeStream<Integer> iteration = env.addSource(source) config.addAll(jobGraph.getJobConfiguration()); config.setString(TaskManagerOptions.MANAGED_MEMORY_SIZE, "0"); .setConfiguration(config) .setNumberTaskManagers(1) .setNumberSlotsPerTaskManager(2 * jobGraph.getMaximumParallelism()) .build()); cluster.before(); latch.await(); savepointPath = client.triggerSavepoint(jobGraph.getJobID(), null).get(); client.cancel(jobGraph.getJobID()); while (!client.getJobStatus(jobGraph.getJobID()).get().isGloballyTerminalState()) { Thread.sleep(100); jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath)); client.cancel(jobGraph.getJobID()); while (!client.getJobStatus(jobGraph.getJobID()).get().isGloballyTerminalState()) { Thread.sleep(100);
final int numSlotsPerTaskManager = 1; final Configuration config = new Configuration(); vertex.setParallelism(1); final JobGraph graph = new JobGraph(vertex); client.submitJob(graph, SavepointITCase.class.getClassLoader()); client.triggerSavepoint(graph.getJobID(), null).get(); assertTrue(ExceptionUtils.findThrowableWithMessage(e, graph.getJobID().toString()).isPresent()); assertTrue(ExceptionUtils.findThrowableWithMessage(e, "is not a streaming job").isPresent()); } finally {
CompletableFuture<JobGraph> jarUploadFuture = jobGraphFuture.thenCombine(blobServerPortFuture, (jobGraph, blobServerPort) -> { final InetSocketAddress address = new InetSocketAddress(gateway.getHostname(), blobServerPort); try { }); CompletableFuture<Acknowledge> jobSubmissionFuture = jarUploadFuture.thenCompose(jobGraph -> { jobGraph.setAllowQueuedScheduling(true); return gateway.submitJob(jobGraph, timeout); }); .thenCombine(jarUploadFuture, (ack, jobGraph) -> new JarRunResponseBody(jobGraph.getJobID())) .exceptionally(throwable -> { throw new CompletionException(new RestHandlerException(
JobGraph jobGraph = env.getStreamGraph().getJobGraph(); jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath)); CompletableFuture<JobStatus> jobStatusFuture = client.getJobStatus(jobSubmissionResult.getJobID()); JobStatus jobStatus = jobStatusFuture.get(5, TimeUnit.SECONDS);
@Before public void setUp() throws Exception { restServerEndpointConfiguration = RestServerEndpointConfiguration.fromConfiguration(restConfig); mockGatewayRetriever = () -> CompletableFuture.completedFuture(mockRestfulGateway); executor = Executors.newSingleThreadExecutor(new ExecutorThreadFactory(RestClusterClientTest.class.getSimpleName())); jobGraph = new JobGraph("testjob"); jobId = jobGraph.getJobID(); }
private CompletableFuture<JobGraph> getJobGraphAsync( JarHandlerContext context, final SavepointRestoreSettings savepointRestoreSettings) { return CompletableFuture.supplyAsync(() -> { final JobGraph jobGraph = context.toJobGraph(configuration); jobGraph.setSavepointRestoreSettings(savepointRestoreSettings); return jobGraph; }, executor); } }