Refine search
conf.addResource(new Path(XLearningConstants.XLEARNING_JOB_CONFIGURATION)); System.setProperty(XLearningConstants.Environment.HADOOP_USER_NAME.toString(), conf.get("hadoop.job.ugi").split(",")[0]); outputInfos = new ArrayList<>(); input2FileStatus = new ConcurrentHashMap<>(); inputFileSplits = null; containerId2InputSplit = new ConcurrentHashMap<>(); statusUpdateInterval = conf.getInt(XLearningConfiguration.XLEARNING_STATUS_UPDATE_INTERVAL, XLearningConfiguration.DEFAULT_XLEARNING_STATUS_PULL_INTERVAL); applicationAttemptID = Records.newRecord(ApplicationAttemptId.class); applicationMessageQueue = new LinkedBlockingQueue<>( + applicationAttemptID.getApplicationId().getId() + ", clustertimestamp=" + applicationAttemptID.getApplicationId().getClusterTimestamp() + ", attemptId=" + applicationAttemptID.getAttemptId()); if (applicationAttemptID.getAttemptId() > 1 && (conf.getInt(XLearningConfiguration.XLEARNING_APP_MAX_ATTEMPTS, XLearningConfiguration.DEFAULT_XLEARNING_APP_MAX_ATTEMPTS) > 1)) { int maxMem = Integer.valueOf(envs.get(XLearningConstants.Environment.XLEARNING_CONTAINER_MAX_MEMORY.toString())); LOG.info("maxMem : " + maxMem); workerMemory = workerMemory + (applicationAttemptID.getAttemptId() - 1) * (int) Math.ceil(workerMemory * conf.getDouble(XLearningConfiguration.XLEARNING_WORKER_MEM_AUTO_SCALE, XLearningConfiguration.DEFAULT_XLEARNING_WORKER_MEM_AUTO_SCALE)); LOG.info("Auto Scale the Worker Memory from " + conf.getInt(XLearningConfiguration.XLEARNING_WORKER_MEMORY, XLearningConfiguration.DEFAULT_XLEARNING_WORKER_MEMORY) + " to " + workerMemory); if (workerMemory > maxMem) { psMemory = psMemory + (applicationAttemptID.getAttemptId() - 1) * (int) Math.ceil(psMemory * conf.getDouble(XLearningConfiguration.XLEARNING_PS_MEM_AUTO_SCALE, XLearningConfiguration.DEFAULT_XLEARNING_PS_MEM_AUTO_SCALE)); LOG.info("Auto Scale the Ps Memory from " + conf.getInt(XLearningConfiguration.XLEARNING_PS_MEMORY, XLearningConfiguration.DEFAULT_XLEARNING_PS_MEMORY) + " to " + psMemory); if (psMemory > maxMem) {
publishApplicationAttemptEvent(timelineClient, jstormMasterContext.appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_START, jstormMasterContext.domainId, appSubmitterUgi); container.getId().getApplicationAttemptId().getApplicationId().toString(), container.getId().toString()); ServiceRecord sr = null; try { sr = new ServiceRecord(); sr.set(JOYConstants.HOST, contianerHost); sr.set(YarnRegistryAttributes.YARN_ID, container.getId().toString()); sr.description = JOYConstants.CONTAINER; sr.set(YarnRegistryAttributes.YARN_PERSISTENCE, jstormMasterContext.instanceName = conf.get(JOYConstants.INSTANCE_NAME_KEY); this.jstormMasterContext.user = conf.get(JOYConstants.JSTORM_YARN_USER); this.jstormMasterContext.password = conf.get(JOYConstants.JSTORM_YARN_PASSWORD); this.jstormMasterContext.oldPassword = conf.get(JOYConstants.JSTORM_YARN_OLD_PASSWORD); JOYConstants.APP_TYPE, jstormMasterContext.instanceName, jstormMasterContext.appAttemptID.getApplicationId().toString()); String instancePath = RegistryUtils.serviceclassPath( JOYConstants.APP_TYPE, jstormMasterContext.instanceName); LOG.info("Registering application " + jstormMasterContext.appAttemptID.getApplicationId().toString());
public ContainerFactory(ApplicationAttemptId appAttemptId, long appIdLong) { this.nextId = new AtomicLong(1); ApplicationId appId = ApplicationId.newInstance(appIdLong, appAttemptId.getApplicationId().getId()); this.customAppAttemptId = ApplicationAttemptId.newInstance(appId, appAttemptId.getAttemptId()); }
Thread.sleep(JOYConstants.HEARTBEAT_TIME_INTERVAL); appPath = RegistryUtils.servicePath( JOYConstants.APP_TYPE, jstormMasterContext.instanceName, jstormMasterContext.appAttemptID.getApplicationId().toString()); ServiceRecord app = new ServiceRecord(); Date now = new Date(); publishApplicationAttemptEvent(timelineClient, jstormMasterContext.appAttemptID.toString(), DSEvent.DS_APP_ATTEMPT_END, jstormMasterContext.domainId, appSubmitterUgi); JOYConstants.APP_TYPE, jstormMasterContext.instanceName, jstormMasterContext.appAttemptID.getApplicationId().toString()); try { registryOperations.delete(appPath, true);
ContainerId containerId = ConverterUtils.toContainerId(envs .get(ApplicationConstants.Environment.CONTAINER_ID.name())); jstormMasterContext.appAttemptID = containerId.getApplicationAttemptId(); + jstormMasterContext.appAttemptID.getApplicationId().getId() + ", clustertimestamp=" + jstormMasterContext.appAttemptID.getApplicationId().getClusterTimestamp() + ", attemptId=" + jstormMasterContext.appAttemptID.getAttemptId()); conf.set(JOYConstants.INSTANCE_DEPLOY_DIR_KEY, envs.get(JOYConstants.BINARYFILEDEPLOYPATH)); jstormMasterContext.deployPath = envs.get(JOYConstants.BINARYFILEDEPLOYPATH); conf.set(JOYConstants.INSTANCE_NAME_KEY, envs.get(JOYConstants.INSTANCENAME)); jstormMasterContext.instanceName = envs.get(JOYConstants.INSTANCENAME);
if (conf.get(XLearningConfiguration.XLEARNING_INPUT_STRATEGY, XLearningConfiguration.DEFAULT_XLEARNING_INPUT_STRATEGY).equals("STREAM")) { buildInputStreamFileStatus(); } else { rmCallbackHandler.setNeededWorkerContainersCount(workerNum); int allocateInterval = conf.getInt(XLearningConfiguration.XLEARNING_ALLOCATE_INTERVAL, XLearningConfiguration.DEFAULT_XLEARNING_ALLOCATE_INTERVAL); amrmAsync.setHeartbeatInterval(allocateInterval); LOG.info("Canceling container: " + container.getId().toString()); amrmAsync.releaseAssignedContainer(container.getId()); amrmAsync.addContainerRequest(psContainerRequest); startAllocatedTimeStamp = System.currentTimeMillis(); if (startAllocatedContainer && (System.currentTimeMillis() - startAllocatedTimeStamp) > conf.getInt(YarnConfiguration.RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS, YarnConfiguration.DEFAULT_RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS)) { this.appendMessage(failMessage, true); this.appendMessage("Unregister Application", true); LOG.info("Canceling container: " + container.getId().toString()); amrmAsync.releaseAssignedContainer(container.getId()); amrmAsync.addContainerRequest(workerContainerRequest); if (!finalSuccess && applicationAttemptID.getAttemptId() < appAttempts) { Runtime.getRuntime().removeShutdownHook(cleanApplication); throw new RuntimeException("Application Failed, retry starting. Note that container memory will auto scale if user config the setting.");
jstormMasterContext.nimbusDataDirPrefix = conf.get(JOYConstants.INSTANCE_DATA_DIR_KEY); String localDir = jstormMasterContext.nimbusDataDirPrefix + container.getId().toString() + JOYConstants.BACKLASH + jstormMasterContext.instanceName; vargs.add(localDir); slotPortsView.setMinPort(conf.getInt(JOYConstants.SUPERVISOR_MIN_PORT_KEY, JOYConstants.PORT_RANGE_MIN)); slotPortsView.setMaxPort(conf.getInt(JOYConstants.SUPERVISOR_MAX_PORT_KEY, JOYConstants.PORT_RANGE_MAX)); String slotPortsStr = JOYConstants.EMPTY; try { deployDst = jstormMasterContext.nimbusDataDirPrefix; String dstPath = deployDst + container.getId().toString(); this.container.getId().toString(), localDir, jstormMasterContext.deployPath, hadoopHome, javaHome, pythonHome, dstPath, slotPortsStr, jstormMasterContext.shellArgs, envs.get(JOYConstants.CLASS_PATH), JOYConstants.ExecShellStringPath, jstormMasterContext.appAttemptID.getApplicationId().toString(), logviewPort, nimbusThriftPort);
Path logdir = new Path(conf.get(XLearningConfiguration.XLEARNING_HISTORY_LOG_DIR, XLearningConfiguration.DEFAULT_XLEARNING_HISTORY_LOG_DIR) + "/" + applicationAttemptID.getApplicationId().toString() + "/" + applicationAttemptID.getApplicationId().toString()); Path jobLogPath = new Path(xlearningConf.get("fs.defaultFS"), logdir); LOG.info("jobLogPath:" + jobLogPath.toString()); FSDataOutputStream out = fs.create(jobLogPath); fs.setPermission(jobLogPath, new FsPermission(LOG_FILE_PERMISSION)); if (conf.getBoolean(XLearningConfiguration.XLEARNING_HOST_LOCAL_ENABLE, XLearningConfiguration.DEFAULT_XLEARNING_HOST_LOCAL_ENABLE)) { Path hostLocaldir = new Path(conf.get(XLearningConfiguration.XLEARNING_HISTORY_LOG_DIR, XLearningConfiguration.DEFAULT_XLEARNING_HISTORY_LOG_DIR) + "/" + conf.get("hadoop.job.ugi").split(",")[0] + "/" + envs.get(XLearningConstants.Environment.XLEARNING_APP_NAME.toString())); if (conf.get(XLearningConfiguration.XLEARNING_TF_BOARD_HISTORY_DIR, XLearningConfiguration.DEFAULT_XLEARNING_TF_BOARD_HISTORY_DIR).equals(xlearningConf.get(XLearningConfiguration.XLEARNING_TF_BOARD_HISTORY_DIR, XLearningConfiguration.DEFAULT_XLEARNING_TF_BOARD_HISTORY_DIR))) { boardLogPath = new Path(xlearningConf.get("fs.defaultFS"), conf.get(XLearningConfiguration.XLEARNING_TF_BOARD_HISTORY_DIR, XLearningConfiguration.DEFAULT_XLEARNING_TF_BOARD_HISTORY_DIR) + "/" + applicationAttemptID.getApplicationId().toString()); } else { boardLogPath = new Path(conf.get("fs.defaultFS"), conf.get(XLearningConfiguration.XLEARNING_TF_BOARD_HISTORY_DIR, Map<String, String> containerMessage = new HashMap<>(); containerMessage.put(AMParams.CONTAINER_HTTP_ADDRESS, container.getNodeHttpAddress()); if (tfEvaluator && container.getId().toString().equals(tfEvaluatorContainerId)) { containerMessage.put(AMParams.CONTAINER_ROLE, XLearningConstants.EVALUATOR); } else { container.getId().toString(), userName)); logMessage.put(container.getId().toString(), containerMessage);
@Test public void testUserAsDefaultQueue() throws Exception { conf.set(FairSchedulerConfiguration.USER_AS_DEFAULT_QUEUE, "true"); scheduler.reinitialize(conf, resourceManager.getRMContext()); ApplicationAttemptId appAttemptId = createAppAttemptId(1, 1); createApplicationWithAMResource(appAttemptId, "default", "user1", null); assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1", true) .getNumRunnableApps()); assertEquals(0, scheduler.getQueueManager().getLeafQueue("default", true) .getNumRunnableApps()); assertEquals("root.user1", resourceManager.getRMContext().getRMApps() .get(appAttemptId.getApplicationId()).getQueue()); }
private Map<String, String> buildContainerEnv(String role) { LOG.info("Setting environments for the Container"); Map<String, String> containerEnv = new HashMap<>(); containerEnv.put(XLearningConstants.Environment.HADOOP_USER_NAME.toString(), conf.get("hadoop.job.ugi").split(",")[0]); containerEnv.put(XLearningConstants.Environment.XLEARNING_TF_ROLE.toString(), role); containerEnv.put(XLearningConstants.Environment.XLEARNING_EXEC_CMD.toString(), xlearningCommand); containerEnv.put(XLearningConstants.Environment.APP_ATTEMPTID.toString(), applicationAttemptID.toString()); containerEnv.put(XLearningConstants.Environment.APP_ID.toString(), applicationAttemptID.getApplicationId().toString()); LOG.debug("env:" + anEnvStr); if (conf.get(XLearningConfiguration.XLEARNING_CONTAINER_EXTRAENV) != null) { String[] containerUserEnv = StringUtils.split(conf.get(XLearningConfiguration.XLEARNING_CONTAINER_EXTRAENV), "|"); if (containerUserEnv.length > 0) { for (String envPair : containerUserEnv) {
@Before public void setup() { conf.set(YarnConfiguration.RM_CLUSTER_ID, "subclusterId"); callback = new CountingCallback(); attemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 1), 1); uam = new TestableUnmanagedApplicationManager(conf, attemptId.getApplicationId(), null, "submitter", "appNameSuffix", true); }
applicationHistoryUrl = conf.get(XLearningConfiguration.XLEARNING_HISTORY_WEBAPP_ADDRESS, XLearningConfiguration.DEFAULT_XLEARNING_HISTORY_WEBAPP_ADDRESS) + "/jobhistory/job/" + applicationAttemptID.getApplicationId(); LOG.info("master tracking url:" + applicationMasterTrackingUrl); LOG.info("history url: " + applicationHistoryUrl);
/** * get MPI executable local directory * @param conf * @return local directory */ public static String getMpiExecDir(Configuration conf, ApplicationAttemptId appAttemptID) { String execDir = null; StringBuilder mpiExecBuilder = new StringBuilder(100); mpiExecBuilder.append(conf.get("hadoop.tmp.dir" , "/tmp")).append("/mpiexecs/"); execDir = conf.get("mpi.local.dir", mpiExecBuilder.toString()) + appAttemptID.toString(); return execDir; }
public TestPerNodeTimelineCollectorsAuxService() { appId = ApplicationId.newInstance(System.currentTimeMillis(), 1); appAttemptId = ApplicationAttemptId.newInstance(appId, 1); conf = new YarnConfiguration(); // enable timeline service v.2 conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 2.0f); conf.setClass(YarnConfiguration.TIMELINE_SERVICE_WRITER_CLASS, FileSystemTimelineWriterImpl.class, TimelineWriter.class); conf.setLong(YarnConfiguration.ATS_APP_COLLECTOR_LINGER_PERIOD_IN_MS, 1000L); }
supervisorContainer.getId().getApplicationAttemptId().getApplicationId().toString(), supervisorContainer.getId().toString()); try { if (jstormMaster.registryOperations.exists(containerPath)) { nimbusContainer.getId().getApplicationAttemptId().getApplicationId().toString(), nimbusContainer.getId().toString());
private static String getApplicationId(ContainerId containerId) { return containerId.getApplicationAttemptId().getApplicationId().toString(); }
private static TimelineEntity createContainerEntity(ContainerId containerId) { TimelineEntity entity = new TimelineEntity(); entity.setEntityType(ContainerMetricsConstants.ENTITY_TYPE); entity.setEntityId(containerId.toString()); entity.addPrimaryFilter(ContainerMetricsConstants.PARENT_PRIMARIY_FILTER, containerId.getApplicationAttemptId().toString()); return entity; }
public static ContainerId newContainerId(int appId, int appAttemptId, long timestamp, long containerId) { ApplicationId applicationId = ApplicationId.newInstance(timestamp, appId); ApplicationAttemptId applicationAttemptId = ApplicationAttemptId.newInstance(applicationId, appAttemptId); return ContainerId.newContainerId(applicationAttemptId, containerId); } }
nimbusMap.put(container.getId().getContainerId(), container); supervisorMap.put(container.getId().getContainerId(), container); Long containerId = containerStatus.getContainerId().getContainerId(); JOYConstants.APP_TYPE, jstormMasterContext.instanceName, jstormMasterContext.appAttemptID.getApplicationId().toString(), containerStatus.getContainerId().toString()); try { if (registryOperations.exists(containerPath)) {