public SparkRunningJobManager(SparkRunningJobAppConfig.ZKStateConfig config) { this.runningJobManager = new RunningJobManager(config.zkQuorum, config.zkSessionTimeoutMs, config.zkRetryTimes, config.zkRetryInterval, config.zkRoot, config.zkLockPath); }
public void delete(String yarnAppId, String jobId) { this.runningJobManager.delete(yarnAppId, jobId); } }
@Override public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { this.rmResourceFetcher = new RMResourceFetcher(crawlConfig.endPointConfig.RMBasePaths); this.partitionId = calculatePartitionId(context); // sanity verify 0<=partitionId<=numTotalPartitions-1 if (partitionId < 0 || partitionId > crawlConfig.controlConfig.numTotalPartitions) { throw new IllegalStateException("partitionId should be less than numTotalPartitions with partitionId " + partitionId + " and numTotalPartitions " + crawlConfig.controlConfig.numTotalPartitions); } Class<? extends JobIdPartitioner> partitionerCls = crawlConfig.controlConfig.partitionerCls; try { this.jobFilter = new JobIdFilterByPartition(partitionerCls.newInstance(), crawlConfig.controlConfig.numTotalPartitions, partitionId); } catch (Exception e) { LOG.error("failing instantiating job partitioner class " + partitionerCls.getCanonicalName()); throw new IllegalStateException(e); } this.collector = collector; this.runningJobManager = new RunningJobManager(crawlConfig.zkStateConfig.zkQuorum, crawlConfig.zkStateConfig.zkSessionTimeoutMs, crawlConfig.zkStateConfig.zkRetryTimes, crawlConfig.zkStateConfig.zkRetryInterval, crawlConfig.zkStateConfig.zkRoot, crawlConfig.zkStateConfig.zkLockPath); this.lastFinishAppTime = this.runningJobManager.recoverLastFinishedTime(partitionId); if (this.lastFinishAppTime == 0L) { this.lastFinishAppTime = Calendar.getInstance().getTimeInMillis() - 24 * 60 * 60000L;//one day ago this.runningJobManager.updateLastFinishTime(partitionId, this.lastFinishAppTime); } }
public Map<String, SparkAppEntity> recoverYarnApp(String appId) throws Exception { Map<String, Pair<Map<String, String>, AppInfo>> result = this.runningJobManager.recoverYarnApp(appId); Map<String, SparkAppEntity> apps = new HashMap<>(); for (String jobId : result.keySet()) { Pair<Map<String, String>, AppInfo> job = result.get(jobId); SparkAppEntity sparkAppEntity = new SparkAppEntity(); sparkAppEntity.setTags(job.getLeft()); sparkAppEntity.setAppInfo(job.getRight()); sparkAppEntity.setTimestamp(job.getRight().getStartedTime()); apps.put(jobId, sparkAppEntity); } return apps; }
public boolean update(String yarnAppId, String jobId, JobExecutionAPIEntity entity) { return this.runningJobManager.update(yarnAppId, jobId, entity.getTags(), entity.getAppInfo()); }
public Map<String, Map<String, JobExecutionAPIEntity>> recover() { //we need read from zookeeper, path looks like /apps/mr/running/yarnAppId/jobId/ //<yarnAppId, <jobId, JobExecutionAPIEntity>> Map<String, Map<String, JobExecutionAPIEntity>> result = new HashMap<>(); Map<String, Map<String, Pair<Map<String, String>, AppInfo>>> apps = this.runningJobManager.recover(); for (String appId : apps.keySet()) { result.put(appId, new HashMap<>()); Map<String, Pair<Map<String, String>, AppInfo>> jobs = apps.get(appId); for (String jobId : jobs.keySet()) { Pair<Map<String, String>, AppInfo> job = jobs.get(jobId); JobExecutionAPIEntity jobExecutionAPIEntity = new JobExecutionAPIEntity(); jobExecutionAPIEntity.setTags(job.getLeft()); jobExecutionAPIEntity.setAppInfo(job.getRight()); jobExecutionAPIEntity.setTimestamp(job.getRight().getStartedTime()); result.get(appId).put(jobId, jobExecutionAPIEntity); } } return result; }
public RunningJobManager(String zkQuorum, int zkSessionTimeoutMs, int zkRetryTimes, int zkRetryInterval, String zkRoot, String lockPath) { this.zkRoot = zkRoot; curator = newCurator(zkQuorum, zkSessionTimeoutMs, zkRetryTimes, zkRetryInterval); try { curator.start(); } catch (Exception e) { LOG.error("curator start error {}", e); } LOG.info("InterProcessMutex lock path is " + lockPath); lock = new InterProcessMutex(curator, lockPath); try { if (curator.checkExists().forPath(this.zkRoot) == null) { curator.create() .creatingParentsIfNeeded() .withMode(CreateMode.PERSISTENT) .forPath(this.zkRoot); } } catch (Exception e) { LOG.warn("{}", e); } }
@Override public void nextTuple() { LOG.info("start to fetch job list"); try { List<AppInfo> apps = rmResourceFetcher.getResource(Constants.ResourceType.RUNNING_MR_JOB); if (apps == null) { apps = new ArrayList<>(); } handleApps(apps, true); long fetchTime = Calendar.getInstance().getTimeInMillis(); if (fetchTime - this.lastFinishAppTime > 60000L) { apps = rmResourceFetcher.getResource(Constants.ResourceType.COMPLETE_MR_JOB, Long.toString(this.lastFinishAppTime)); if (apps == null) { apps = new ArrayList<>(); } handleApps(apps, false); this.lastFinishAppTime = fetchTime; this.runningJobManager.updateLastFinishTime(partitionId, fetchTime); } } catch (Exception e) { LOG.warn("exception found {}", e); } finally { //need to be configured Utils.sleep(60); } }
public Map<String, JobExecutionAPIEntity> recoverYarnApp(String appId) throws Exception { Map<String, Pair<Map<String, String>, AppInfo>> result = this.runningJobManager.recoverYarnApp(appId); Map<String, JobExecutionAPIEntity> jobs = new HashMap<>(); for (String jobId : result.keySet()) { Pair<Map<String, String>, AppInfo> job = result.get(jobId); JobExecutionAPIEntity jobExecutionAPIEntity = new JobExecutionAPIEntity(); jobExecutionAPIEntity.setTags(job.getLeft()); jobExecutionAPIEntity.setAppInfo(job.getRight()); jobExecutionAPIEntity.setTimestamp(job.getRight().getStartedTime()); jobs.put(jobId, jobExecutionAPIEntity); } return jobs; }
public void update(String yarnAppId, String jobId, SparkAppEntity entity) { this.runningJobManager.update(yarnAppId, jobId, entity.getTags(), entity.getAppInfo()); }
public Map<String, Map<String, SparkAppEntity>> recover() { //we need read from zookeeper, path looks like /apps/mr/running/yarnAppId/jobId/ //<yarnAppId, <jobId, JobExecutionAPIEntity>> Map<String, Map<String, SparkAppEntity>> result = new HashMap<>(); Map<String, Map<String, Pair<Map<String, String>, AppInfo>>> apps = this.runningJobManager.recover(); for (String appId : apps.keySet()) { result.put(appId, new HashMap<>()); Map<String, Pair<Map<String, String>, AppInfo>> jobs = apps.get(appId); for (String jobId : jobs.keySet()) { Pair<Map<String, String>, AppInfo> job = jobs.get(jobId); SparkAppEntity sparkAppEntity = new SparkAppEntity(); sparkAppEntity.setTags(job.getLeft()); sparkAppEntity.setAppInfo(job.getRight()); sparkAppEntity.setTimestamp(job.getRight().getStartedTime()); result.get(appId).put(jobId, sparkAppEntity); } } return result; }
public Map<String, Map<String, Pair<Map<String, String>, AppInfo>>> recover() { //we need read from zookeeper, path looks like /apps/x/running/yarnAppId/jobId/ //content of path /apps/x/running/yarnAppId/jobId is Pair<Map<String, String>, AppInfo> //Pair is entity tags and AppInfo //as we know, a yarn application may contains many mr jobs or spark applications //so, the returned results is a Map-Map //<yarnAppId, <jobId, Pair<<Map<String, String>, AppInfo>>> Map<String, Map<String, Pair<Map<String, String>, AppInfo>>> result = new HashMap<>(); try { lock.acquire(); List<String> yarnAppIds = curator.getChildren().forPath(this.zkRoot); for (String yarnAppId : yarnAppIds) { if (!result.containsKey(yarnAppId)) { result.put(yarnAppId, new HashMap<>()); } result.put(yarnAppId, recoverYarnApp(yarnAppId)); } } catch (Exception e) { LOG.error("fail to recover", e); throw new RuntimeException(e); } finally { try { lock.release(); } catch (Exception e) { LOG.error("fail releasing lock", e); } } return result; }
public void delete(String yarnAppId, String jobId) { this.runningJobManager.delete(yarnAppId, jobId); } }
public MRRunningJobManager(MRRunningJobConfig.ZKStateConfig config) { this.runningJobManager = new RunningJobManager(config.zkQuorum, config.zkSessionTimeoutMs, config.zkRetryTimes, config.zkRetryInterval, config.zkRoot, config.zkLockPath); }
public void delete(String yarnAppId, String jobId) { String path = this.zkRoot + "/" + yarnAppId + "/" + jobId; try { lock.acquire(); if (curator.checkExists().forPath(path) != null) { curator.delete().deletingChildrenIfNeeded().forPath(path); LOG.info("delete job {} for yarn app {}, path {} ", jobId, yarnAppId, path); String yarnPath = this.zkRoot + "/" + yarnAppId; if (curator.getChildren().forPath(yarnPath).size() == 0) { delete(yarnAppId); } } } catch (Exception e) { LOG.error("failed to delete job {} for yarn app {}, path {}, {}", jobId, yarnAppId, path, e); } finally { try { lock.release(); } catch (Exception e) { LOG.error("fail releasing lock", e); } } }