@Override public void jobMasterFailed(Throwable cause) { handleJobManagerRunnerError(cause); }
@Override public void jobMasterFailed(Throwable cause) { handleJobManagerRunnerError(cause); }
@Override public void jobMasterFailed(Throwable cause) { handleJobManagerRunnerError(cause); }
@Override public void handleError(Exception exception) { log.error("Leader Election Service encountered a fatal error.", exception); handleJobManagerRunnerError(exception); }
@Override public void handleError(Exception exception) { log.error("Leader Election Service encountered a fatal error.", exception); handleJobManagerRunnerError(exception); }
@Override public void handleError(Exception exception) { log.error("Leader Election Service encountered a fatal error.", exception); handleJobManagerRunnerError(exception); }
@Override public void grantLeadership(final UUID leaderSessionID) { synchronized (lock) { if (shutdown) { log.info("JobManagerRunner already shutdown."); return; } try { verifyJobSchedulingStatusAndStartJobManager(leaderSessionID); } catch (Exception e) { handleJobManagerRunnerError(e); } } }
@Override public void grantLeadership(final UUID leaderSessionID) { synchronized (lock) { if (shutdown) { log.info("JobManagerRunner already shutdown."); return; } try { verifyJobSchedulingStatusAndStartJobManager(leaderSessionID); } catch (Exception e) { handleJobManagerRunnerError(e); } } }
@Override public void grantLeadership(final UUID leaderSessionID) { synchronized (lock) { if (shutdown) { log.info("JobManagerRunner already shutdown."); return; } try { verifyJobSchedulingStatusAndStartJobManager(leaderSessionID); } catch (Exception e) { handleJobManagerRunnerError(e); } } }
@Override public void revokeLeadership() { synchronized (lock) { if (shutdown) { log.info("JobManagerRunner already shutdown."); return; } log.info("JobManager for job {} ({}) was revoked leadership at {}.", jobName, jobID, getAddress()); setNewLeaderGatewayFuture(); CompletableFuture<Acknowledge> suspendFuture = jobMaster.suspend(new FlinkException("JobManager is no longer the leader."), rpcTimeout); suspendFuture.whenCompleteAsync( (Acknowledge ack, Throwable throwable) -> { if (throwable != null) { handleJobManagerRunnerError(new FlinkException("Could not suspend the job manager.", throwable)); } else { leaderShipLostHandler.onLeaderShipLost(new Exception("Job manager runner was revoked leader ship.")); } }, jobManagerSharedServices.getScheduledExecutorService()); } }
@Override public void revokeLeadership() { synchronized (lock) { if (shutdown) { log.info("JobManagerRunner already shutdown."); return; } log.info("JobManager for job {} ({}) was revoked leadership at {}.", jobGraph.getName(), jobGraph.getJobID(), getAddress()); setNewLeaderGatewayFuture(); CompletableFuture<Acknowledge> suspendFuture = jobMaster.suspend(new FlinkException("JobManager is no longer the leader."), rpcTimeout); suspendFuture.whenCompleteAsync( (Acknowledge ack, Throwable throwable) -> { if (throwable != null) { handleJobManagerRunnerError(new FlinkException("Could not suspend the job manager.", throwable)); } }, jobManagerSharedServices.getScheduledExecutorService()); } }
@Override public void revokeLeadership() { synchronized (lock) { if (shutdown) { log.info("JobManagerRunner already shutdown."); return; } log.info("JobManager for job {} ({}) was revoked leadership at {}.", jobGraph.getName(), jobGraph.getJobID(), getAddress()); setNewLeaderGatewayFuture(); CompletableFuture<Acknowledge> suspendFuture = jobMaster.suspend(new FlinkException("JobManager is no longer the leader."), rpcTimeout); suspendFuture.whenCompleteAsync( (Acknowledge ack, Throwable throwable) -> { if (throwable != null) { handleJobManagerRunnerError(new FlinkException("Could not suspend the job manager.", throwable)); } }, jobManagerSharedServices.getScheduledExecutorService()); } }
private void verifyJobSchedulingStatusAndStartJobManager(UUID leaderSessionId) throws Exception { final JobSchedulingStatus jobSchedulingStatus = runningJobsRegistry.getJobSchedulingStatus(jobID); if (jobSchedulingStatus == JobSchedulingStatus.DONE) { log.info("Granted leader ship but job {} has been finished. ", jobID); jobFinishedByOther(); } else { log.info("JobManager runner for job {} ({}) was granted leadership with session id {} at {}.", jobName, jobID, leaderSessionId, getAddress()); if (jobSchedulingStatus == JobSchedulingStatus.RUNNING) { // If finding the job status is running, it means someone has already started the job, need recover. jobMaster.reconcile(); } else if (jobSchedulingStatus == JobSchedulingStatus.PENDING) { runningJobsRegistry.setJobRunning(jobID); } final CompletableFuture<Acknowledge> startFuture = jobMaster.start(new JobMasterId(leaderSessionId), rpcTimeout); final CompletableFuture<JobMasterGateway> currentLeaderGatewayFuture = leaderGatewayFuture; startFuture.whenCompleteAsync( (Acknowledge ack, Throwable throwable) -> { if (throwable != null) { handleJobManagerRunnerError(new FlinkException("Could not start the job manager.", throwable)); } else { confirmLeaderSessionIdIfStillLeader(leaderSessionId, currentLeaderGatewayFuture); } }, jobManagerSharedServices.getScheduledExecutorService()); } }
private void verifyJobSchedulingStatusAndStartJobManager(UUID leaderSessionId) throws Exception { final JobSchedulingStatus jobSchedulingStatus = runningJobsRegistry.getJobSchedulingStatus(jobGraph.getJobID()); if (jobSchedulingStatus == JobSchedulingStatus.DONE) { log.info("Granted leader ship but job {} has been finished. ", jobGraph.getJobID()); jobFinishedByOther(); } else { log.info("JobManager runner for job {} ({}) was granted leadership with session id {} at {}.", jobGraph.getName(), jobGraph.getJobID(), leaderSessionId, getAddress()); runningJobsRegistry.setJobRunning(jobGraph.getJobID()); final CompletableFuture<Acknowledge> startFuture = jobMaster.start(new JobMasterId(leaderSessionId), rpcTimeout); final CompletableFuture<JobMasterGateway> currentLeaderGatewayFuture = leaderGatewayFuture; startFuture.whenCompleteAsync( (Acknowledge ack, Throwable throwable) -> { if (throwable != null) { handleJobManagerRunnerError(new FlinkException("Could not start the job manager.", throwable)); } else { confirmLeaderSessionIdIfStillLeader(leaderSessionId, currentLeaderGatewayFuture); } }, jobManagerSharedServices.getScheduledExecutorService()); } }
private void verifyJobSchedulingStatusAndStartJobManager(UUID leaderSessionId) throws Exception { final JobSchedulingStatus jobSchedulingStatus = runningJobsRegistry.getJobSchedulingStatus(jobGraph.getJobID()); if (jobSchedulingStatus == JobSchedulingStatus.DONE) { log.info("Granted leader ship but job {} has been finished. ", jobGraph.getJobID()); jobFinishedByOther(); } else { log.info("JobManager runner for job {} ({}) was granted leadership with session id {} at {}.", jobGraph.getName(), jobGraph.getJobID(), leaderSessionId, getAddress()); runningJobsRegistry.setJobRunning(jobGraph.getJobID()); final CompletableFuture<Acknowledge> startFuture = jobMaster.start(new JobMasterId(leaderSessionId), rpcTimeout); final CompletableFuture<JobMasterGateway> currentLeaderGatewayFuture = leaderGatewayFuture; startFuture.whenCompleteAsync( (Acknowledge ack, Throwable throwable) -> { if (throwable != null) { handleJobManagerRunnerError(new FlinkException("Could not start the job manager.", throwable)); } else { confirmLeaderSessionIdIfStillLeader(leaderSessionId, currentLeaderGatewayFuture); } }, jobManagerSharedServices.getScheduledExecutorService()); } }