private void jobStatusChanged( final JobStatus newJobStatus, long timestamp, @Nullable final Throwable error) { validateRunsInMainThread(); if (newJobStatus.isGloballyTerminalState()) { final ArchivedExecutionGraph archivedExecutionGraph = ArchivedExecutionGraph.createFrom(executionGraph); scheduledExecutorService.execute(() -> jobCompletionActions.jobReachedGloballyTerminalState(archivedExecutionGraph)); } }
private void jobStatusChanged( final JobStatus newJobStatus, long timestamp, @Nullable final Throwable error) { validateRunsInMainThread(); if (newJobStatus.isGloballyTerminalState()) { final ArchivedExecutionGraph archivedExecutionGraph = ArchivedExecutionGraph.createFrom(executionGraph); scheduledExecutorService.execute(() -> jobCompletionActions.jobReachedGloballyTerminalState(archivedExecutionGraph)); } }
private void jobStatusChanged( final JobStatus newJobStatus, long timestamp, @Nullable final Throwable error) { validateRunsInMainThread(); if (newJobStatus.isGloballyTerminalState()) { final ArchivedExecutionGraph archivedExecutionGraph = ArchivedExecutionGraph.createFrom(executionGraph); operationLogManager.clear(); scheduledExecutorService.execute(() -> jobCompletionActions.jobReachedGloballyTerminalState(archivedExecutionGraph)); } }
private void assignExecutionGraph( ExecutionGraph newExecutionGraph, JobManagerJobMetricGroup newJobManagerJobMetricGroup) { validateRunsInMainThread(); checkState(executionGraph.getState().isTerminalState()); checkState(jobManagerJobMetricGroup == null); executionGraph = newExecutionGraph; jobManagerJobMetricGroup = newJobManagerJobMetricGroup; }
private void assignExecutionGraph( ExecutionGraph newExecutionGraph, JobManagerJobMetricGroup newJobManagerJobMetricGroup) { validateRunsInMainThread(); checkState(executionGraph.getState().isTerminalState()); checkState(jobManagerJobMetricGroup == null); executionGraph = newExecutionGraph; jobManagerJobMetricGroup = newJobManagerJobMetricGroup; }
private Acknowledge startJobExecution(JobMasterId newJobMasterId) throws Exception { validateRunsInMainThread(); checkNotNull(newJobMasterId, "The new JobMasterId must not be null."); if (Objects.equals(getFencingToken(), newJobMasterId)) { log.info("Already started the job execution with JobMasterId {}.", newJobMasterId); return Acknowledge.get(); } setNewFencingToken(newJobMasterId); startJobMasterServices(); log.info("Starting execution of job {} ({})", jobGraph.getName(), jobGraph.getJobID()); resetAndScheduleExecutionGraph(); return Acknowledge.get(); }
private Acknowledge startJobExecution(JobMasterId newJobMasterId) throws Exception { validateRunsInMainThread(); checkNotNull(newJobMasterId, "The new JobMasterId must not be null."); if (Objects.equals(getFencingToken(), newJobMasterId)) { log.info("Already started the job execution with JobMasterId {}.", newJobMasterId); return Acknowledge.get(); } setNewFencingToken(newJobMasterId); startJobMasterServices(); log.info("Starting execution of job {} ({})", jobGraph.getName(), jobGraph.getJobID()); resetAndScheduleExecutionGraph(); return Acknowledge.get(); }
/** * Suspending job, all the running tasks will be cancelled, and communication with other components * will be disposed. * * <p>Mostly job is suspended because of the leadership has been revoked, one can be restart this job by * calling the {@link #start(JobMasterId, Time)} method once we take the leadership back again. * * @param cause The reason of why this job been suspended. */ private Acknowledge suspendExecution(final Exception cause) { validateRunsInMainThread(); if (getFencingToken() == null) { log.debug("Job has already been suspended or shutdown."); return Acknowledge.get(); } // not leader anymore --> set the JobMasterId to null setFencingToken(null); try { resourceManagerLeaderRetriever.stop(); } catch (Throwable t) { log.warn("Failed to stop resource manager leader retriever when suspending.", t); } suspendAndClearExecutionGraphFields(cause); // the slot pool stops receiving messages and clears its pooled slots slotPoolGateway.suspend(); // disconnect from resource manager: closeResourceManagerConnection(cause); return Acknowledge.get(); }
/** * Suspending job, all the running tasks will be cancelled, and communication with other components * will be disposed. * * <p>Mostly job is suspended because of the leadership has been revoked, one can be restart this job by * calling the {@link #start(JobMasterId, Time)} method once we take the leadership back again. * * @param cause The reason of why this job been suspended. */ private Acknowledge suspendExecution(final Exception cause) { validateRunsInMainThread(); if (getFencingToken() == null) { log.debug("Job has already been suspended or shutdown."); return Acknowledge.get(); } // not leader anymore --> set the JobMasterId to null setFencingToken(null); try { resourceManagerLeaderRetriever.stop(); } catch (Throwable t) { log.warn("Failed to stop resource manager leader retriever when suspending.", t); } suspendAndClearExecutionGraphFields(cause); // the slot pool stops receiving messages and clears its pooled slots slotPoolGateway.suspend(); // disconnect from resource manager: closeResourceManagerConnection(cause); return Acknowledge.get(); }
/** * Suspending job, and communication with other components will be disposed. * * <p>Mostly job is suspended without cancelling running tasks because of the leadership has been revoked, * the one who takes the leadership can take over the control. * * @param cause The reason of why this job been suspended. */ private Acknowledge suspendExecution(final Exception cause) { validateRunsInMainThread(); if (getFencingToken() == null) { log.debug("Job has already been suspended or shutdown."); return Acknowledge.get(); } // not leader anymore --> set the JobMasterId to null setFencingToken(null); try { resourceManagerLeaderRetriever.stop(); } catch (Throwable t) { log.warn("Failed to stop resource manager leader retriever when suspending.", t); } suspendAndClearExecutionGraphFields(cause); // flush the operation logs. operationLogManager.stop(); // the slot pool stops receiving messages and clears its pooled slots. slotPoolGateway.suspend(); // disconnect from resource manager: closeResourceManagerConnection(cause); return Acknowledge.get(); }
private void jobStatusChanged(final JobStatus newJobStatus, long timestamp, final Throwable error) { validateRunsInMainThread();
private void resetAndScheduleExecutionGraph() throws Exception { validateRunsInMainThread(); final CompletableFuture<Void> executionGraphAssignedFuture; if (executionGraph.getState() == JobStatus.CREATED) { executionGraphAssignedFuture = CompletableFuture.completedFuture(null); } else { suspendAndClearExecutionGraphFields(new FlinkException("ExecutionGraph is being reset in order to be rescheduled.")); final JobManagerJobMetricGroup newJobManagerJobMetricGroup = jobMetricGroupFactory.create(jobGraph); final ExecutionGraph newExecutionGraph = createAndRestoreExecutionGraph(newJobManagerJobMetricGroup); executionGraphAssignedFuture = executionGraph.getTerminationFuture().handleAsync( (JobStatus ignored, Throwable throwable) -> { assignExecutionGraph(newExecutionGraph, newJobManagerJobMetricGroup); return null; }, getMainThreadExecutor()); } executionGraphAssignedFuture.thenRun(this::scheduleExecutionGraph); }
private void resetAndScheduleExecutionGraph() throws Exception { validateRunsInMainThread(); final CompletableFuture<Void> executionGraphAssignedFuture; if (executionGraph.getState() == JobStatus.CREATED) { executionGraphAssignedFuture = CompletableFuture.completedFuture(null); } else { suspendAndClearExecutionGraphFields(new FlinkException("ExecutionGraph is being reset in order to be rescheduled.")); final JobManagerJobMetricGroup newJobManagerJobMetricGroup = jobMetricGroupFactory.create(jobGraph); final ExecutionGraph newExecutionGraph = createAndRestoreExecutionGraph(newJobManagerJobMetricGroup); executionGraphAssignedFuture = executionGraph.getTerminationFuture().handleAsync( (JobStatus ignored, Throwable throwable) -> { assignExecutionGraph(newExecutionGraph, newJobManagerJobMetricGroup); return null; }, getMainThreadExecutor()); } executionGraphAssignedFuture.thenRun(this::scheduleExecutionGraph); }
private Acknowledge startJobExecution(JobMasterId newJobMasterId) throws Exception { validateRunsInMainThread();