public MiniMrShim(Configuration conf, int numberOfTaskTrackers, String nameNode, int numDir) throws IOException { this.conf = conf; JobConf jConf = new JobConf(conf); jConf.set("yarn.scheduler.capacity.root.queues", "default"); jConf.set("yarn.scheduler.capacity.root.default.capacity", "100"); jConf.setInt(MRJobConfig.MAP_MEMORY_MB, 512); jConf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 512); jConf.setInt(MRJobConfig.MR_AM_VMEM_MB, 128); jConf.setInt(YarnConfiguration.YARN_MINICLUSTER_NM_PMEM_MB, 512); jConf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128); jConf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 512); mr = new MiniMRCluster(numberOfTaskTrackers, nameNode, numDir, null, null, jConf); }
private JobConf initializeVertexConf(JobConf baseConf, Context context, ReduceWork reduceWork) { JobConf conf = new JobConf(baseConf); conf.set(Operator.CONTEXT_NAME_KEY, reduceWork.getName()); // Is this required ? conf.set("mapred.reducer.class", ExecReducer.class.getName()); boolean useSpeculativeExecReducers = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); conf.setBoolean(org.apache.hadoop.mapreduce.MRJobConfig.REDUCE_SPECULATIVE, useSpeculativeExecReducers); return conf; }
public String runBuildStore(Props props, String url) throws Exception { Path tempDir = new Path(props.getString(BUILD_TEMP_DIR, "/tmp/vold-build-and-push-" + new Random().nextLong())); Path outputDir = new Path(props.getString(BUILD_OUTPUT_DIR), new URI(url).getHost()); CheckSumType checkSumType = CheckSum.fromString(props.getString(CHECKSUM_TYPE, CheckSum.toString(CheckSumType.MD5))); JobConf configuration = new JobConf(); Class mapperClass; Class<? extends InputFormat> inputFormatClass; configuration.set(HadoopStoreBuilder.AVRO_REC_SCHEMA, getRecordSchema()); configuration.set(AvroStoreBuilderMapper.AVRO_KEY_SCHEMA, getKeySchema()); configuration.set(AvroStoreBuilderMapper.AVRO_VALUE_SCHEMA, getValueSchema()); configuration.set(VoldemortBuildAndPushJob.AVRO_KEY_FIELD, this.keyFieldName); configuration.set(VoldemortBuildAndPushJob.AVRO_VALUE_FIELD, this.valueFieldName); mapperClass = AvroStoreBuilderMapper.class; inputFormatClass = AvroInputFormat.class; return outputDir.toString();
@Before public void openFileSystem() throws Exception { conf = new JobConf(); // all columns conf.set("columns", "userid,string1,subtype,decimal1,ts"); conf.set("columns.types", "bigint,string,double,decimal,timestamp"); // needed columns conf.set(ColumnProjectionUtils.READ_ALL_COLUMNS, "false"); conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0,2"); conf.set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "userid,subtype"); fs = FileSystem.getLocal(conf); testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc"); testFilePath2 = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".2.orc"); fs.delete(testFilePath, false); fs.delete(testFilePath2, false); }
protected JobConf configStage1() throws Exception { final JobConf conf = new JobConf(getConf(), ConCmptBlock.class); conf.set("block_width", "" + block_width); conf.set("recursive_diagmult", "" + recursive_diagmult); conf.setJobName("ConCmptBlock_pass1"); conf.setMapperClass(MapStage1.class); conf.setReducerClass(RedStage1.class); FileInputFormat.setInputPaths(conf, edge_path, curbm_path); FileOutputFormat.setOutputPath(conf, tempbm_path); conf.setNumReduceTasks( nreducers ); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); return conf; }
continue; Path path = new Path(pathString); FileSystem fs = path.getFileSystem(jobConf); if (ignoreInvalidPath && !fs.exists(path)) { continue; final String qualifiedPath = fs.makeQualified(path).toString(); str.append(separator) .append(StringUtils.escapeString(qualifiedPath)); jobConf.set("mapred.input.dir", str.toString());
conf.set("cluster.xml", new ClusterMapper().writeCluster(cluster)); conf.set("stores.xml", new StoreDefinitionsMapper().writeStoreList(Collections.singletonList(storeDef))); conf.setBoolean(VoldemortBuildAndPushJob.SAVE_KEYS, saveKeys); conf.setOutputKeyClass(BytesWritable.class); conf.setOutputValueClass(BytesWritable.class); conf.setJarByClass(getClass()); conf.setReduceSpeculativeExecution(false); FileInputFormat.setInputPaths(conf, inputPath); conf.set("final.output.dir", outputDir.toString()); conf.set(VoldemortBuildAndPushJob.CHECKSUM_TYPE, CheckSum.toString(checkSumType)); conf.set("dfs.umaskmode", "002"); FileOutputFormat.setOutputPath(conf, tempDir); conf.setNumReduceTasks(numReducers); conf.setOutputKeyClass(ByteBuffer.class); conf.setOutputValueClass(ByteBuffer.class); conf.setReducerClass(AvroStoreBuilderReducer.class); Path directoryPath = new Path(outputDir.toString(), directoryName);
reader = new BufferedReader(new InputStreamReader(xlearningProcess.getInputStream())); List<OutputInfo> outputs = Arrays.asList(amClient.getOutputLocation()); JobConf jobConf = new JobConf(conf); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setBoolean("mapred.output.compress", true); jobConf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); jobConf.setOutputFormat(TextMultiOutputFormat.class); Path remotePath = new Path(outputs.get(0).getDfsLocation() + "/_temporary/" + containerId.toString()); FileSystem dfs = remotePath.getFileSystem(jobConf); jobConf.set(XLearningConstants.STREAM_OUTPUT_DIR, remotePath.makeQualified(dfs).toString()); OutputFormat outputFormat = ReflectionUtils.newInstance(conf.getClass(XLearningConfiguration.XLEARNING_OUTPUTFORMAT_CLASS, XLearningConfiguration.DEFAULT_XLEARNING_OUTPUTF0RMAT_CLASS, OutputFormat.class), jobConf); JobID jobID = new JobID(new SimpleDateFormat("yyyyMMddHHmm").format(new Date()), 0); TaskAttemptID taId = new TaskAttemptID(new TaskID(jobID, true, 0), 0); jobConf.set("mapred.tip.id", taId.getTaskID().toString()); jobConf.set("mapred.task.id", taId.toString()); jobConf.set("mapred.job.id", jobID.toString()); amClient.reportMapedTaskID(containerId, taId.toString()); RecordWriter writer = outputFormat.getRecordWriter(dfs, jobConf, "part-r", Reporter.NULL);
public long produceSamples(Path samplePath, boolean textOutput) throws Exception { Path input = new Path(samplePath.toString() + "-seeds"); this.numSamples = writeSeeds(input); LOG.info("Generating " + this.numSamples + " of samples"); JobConf jobConf = getJobConf(); jobConf.set("genkmeansdataset.dimensions", Integer.toString(dimension)); FileInputFormat.setInputPaths(jobConf, input); FileOutputFormat.setOutputPath(jobConf, samplePath); jobConf.setMapperClass(MapClass.class); if (textOutput){ jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(TextOutputFormat.class); jobConf.setOutputKeyClass(LongWritable.class); jobConf.setOutputValueClass(VectorWritable.class); } else { jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setOutputKeyClass(LongWritable.class); jobConf.setOutputValueClass(VectorWritable.class); } jobConf.setNumReduceTasks(0); JobClient.runJob(jobConf); return this.numSamples; }
conf.set("hadoop.job.ugi", hadoop_ugi); conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); conf.set("mapred.local.dir", "/tmp/map-red"); conf.set("mapred.child.java.opts", props.getString("mapred.child.java.opts")); info("mapred.child.java.opts set to " + props.getString("mapred.child.java.opts")); FileStatus[] statuses = fs.listStatus(new Path(latestPath), filter); HadoopUtils.addAllSubPaths(conf, new Path(path)); FileOutputFormat.setOutputPath(conf, new Path(location)); if(lowerCase.startsWith(HADOOP_PREFIX)) { String newKey = key.substring(HADOOP_PREFIX.length()); conf.set(newKey, getProps().get(key)); props.getBoolean(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS, false)); if(props.containsKey(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS_CODEC)) { conf.set(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS_CODEC, props.get(VoldemortBuildAndPushJob.REDUCER_OUTPUT_COMPRESS_CODEC)); conf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
private void testFailAbortInternal(int version) throws IOException, InterruptedException { JobConf conf = new JobConf(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "faildel:///"); conf.setClass("fs.faildel.impl", FakeFileSystem.class, FileSystem.class); conf.set(JobContext.TASK_ATTEMPT_ID, attempt); conf.setInt(org.apache.hadoop.mapreduce.lib.output. FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); conf.setInt(MRConstants.APPLICATION_ATTEMPT_ID, 1); FileOutputFormat.setOutputPath(conf, outDir); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); File jobTmpDir = new File(new Path(outDir, FileOutputCommitter.TEMP_DIR_NAME + Path.SEPARATOR + conf.getInt(MRConstants.APPLICATION_ATTEMPT_ID, 0) + Path.SEPARATOR + FileOutputCommitter.TEMP_DIR_NAME).toString()); File taskTmpDir = new File(jobTmpDir, "_" + taskID); File expectedFile = new File(taskTmpDir, partFile); assertTrue(th.getMessage().contains("fake delete failed")); assertTrue("job temp dir does not exists", jobTmpDir.exists()); FileUtil.fullyDelete(new File(outDir.toString()));
@Override public RecordReader<BytesWritable, BytesWritable> getRecordReader(InputSplit split, JobConf conf, Reporter reporter) throws IOException { String inputPathString = ((FileSplit) split).getPath().toUri().getPath(); log.info("Input file path:" + inputPathString); Path inputPath = new Path(inputPathString); SequenceFile.Reader reader = new SequenceFile.Reader(inputPath.getFileSystem(conf), inputPath, conf); SequenceFile.Metadata meta = reader.getMetadata(); try { Text keySchema = meta.get(new Text("key.schema")); Text valueSchema = meta.get(new Text("value.schema")); if(0 == keySchema.getLength() || 0 == valueSchema.getLength()) { throw new Exception(); } // update Joboconf with schemas conf.set("mapper.input.key.schema", keySchema.toString()); conf.set("mapper.input.value.schema", valueSchema.toString()); } catch(Exception e) { throw new IOException("Failed to Load Schema from file:" + inputPathString + "\n"); } return super.getRecordReader(split, conf, reporter); }
ObjectInspectorFactory.ObjectInspectorOptions.JAVA); JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "combinationAcid", inspector, false, PARTITIONS); String[] paths = conf.getStrings("mapred.input.dir"); for(int p=0; p < PARTITIONS; ++p) { partDir[p] = new Path(paths[p]); Path base0 = new Path("mock:/combinationAcid/p=0/base_0000010/bucket_00000"); setBlocks(base0, conf, new MockBlock("host1", "host2")); conf.set(ValidTxnList.VALID_TXNS_KEY, new ValidReadTxnList(new long[0], new BitSet(), 1000, Long.MAX_VALUE).writeToString()); HiveInputFormat<?,?> inputFormat = split.inputFormatClassName()); assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00000", split.getPath().toString()); assertEquals(0, split.getStart()); assertEquals(700, split.getLength()); split.inputFormatClassName()); assertEquals("mock:/combinationAcid/p=0/base_0000010/bucket_00001", split.getPath().toString()); assertEquals(0, split.getStart()); assertEquals(724, split.getLength());
job.addCacheFile(remoteFile.toUri()); jobConf.set("tmpfiles", secondFile.toString()); Path firstJar = makeJar(new Path(testRootDir, "distributed.first.jar"), 1); Path secondJar = makeJar(new Path(testRootDir, "distributed.second.jar"), 2); Path thirdJar = new Path(testRootDir, "distributed.third.jar"); localFs.copyFromLocalFile(secondJar, thirdJar); jobConf.set("tmpjars", secondJar.toString() + "," + thirdJar.toString()); jobConf.set("tmparchives", secondArchive.toString());
/** * Set context for this fetch operator in to the jobconf. * This helps InputFormats make decisions based on the scope of the complete * operation. * @param conf the configuration to modify * @param paths the list of input directories */ static void setFetchOperatorContext(JobConf conf, List<Path> paths) { if (paths != null) { StringBuilder buff = new StringBuilder(); for (Path path : paths) { if (buff.length() > 0) { buff.append('\t'); } buff.append(StringEscapeUtils.escapeJava(path.toString())); } conf.set(FETCH_OPERATOR_DIRECTORY_LIST, buff.toString()); } }
HiveFileFormatUtils.prepareJobOutput(job); job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(work.getMapperClass()); job.setNumReduceTasks(0); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.set(MRJobConfig.JOB_NAME, jobName != null ? jobName : "JOB" + Utilities.randGen.nextInt()); job.set("tmpjars", addedJars);
private JobConf createBaseJobConf(HiveConf conf, String jobName, Table t, StorageDescriptor sd, ValidWriteIdList writeIds, CompactionInfo ci) { JobConf job = new JobConf(conf); job.setJobName(jobName); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setJarByClass(CompactorMR.class); LOG.debug("User jar set to " + job.getJar()); job.setMapperClass(CompactorMap.class); job.setNumReduceTasks(0); job.setInputFormat(CompactorInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setOutputCommitter(CompactorOutputCommitter.class); job.set(FINAL_LOCATION, sd.getLocation()); job.set(TMP_LOCATION, generateTmpPath(sd)); job.set(INPUT_FORMAT_CLASS_NAME, sd.getInputFormat()); job.set(OUTPUT_FORMAT_CLASS_NAME, sd.getOutputFormat()); job.setBoolean(IS_COMPRESSED, sd.isCompressed()); job.set(TABLE_PROPS, new StringableMap(t.getParameters()).toString()); job.setInt(NUM_BUCKETS, sd.getNumBuckets()); job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
job.setMapperClass(ExecMapper.class); job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0); job.setReducerClass(ExecReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.set(MRJobConfig.JOB_NAME, "JOB" + Utilities.randGen.nextInt()); console.printInfo("Not enough sampling data.. Rolling back to single reducer task"); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } catch (Exception e) { LOG.error("Sampling error", e); "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1);
conf.setInt("mapred.max.split.size", 50); RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL); writer.write(NullWritable.get(), .end() .build(); conf.set("sarg.pushdown", toKryo(sarg)); conf.set("hive.io.file.readcolumn.names", "z"); properties.setProperty("columns", "z"); properties.setProperty("columns.types", "string"); inspector = (StructObjectInspector) serde.getObjectInspector(); InputFormat<?,?> in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(0, splits.length);