private static void configureAvroOutput(JobConf job) { if (job.get("mapred.output.format.class") == null) job.setOutputFormat(AvroOutputFormat.class); if (job.getReducerClass() == IdentityReducer.class) job.setReducerClass(HadoopReducer.class); job.setOutputKeyClass(AvroWrapper.class); configureAvroShuffle(job); }
public int run(String[] args) throws Exception { if(args.length != 3) Utils.croak("USAGE: GenerateData input-file output-dir value-size"); JobConf conf = new JobConf(getConf(), GenerateData.class); conf.setJobName("generate-data"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(GenerateDataMapper.class); conf.setReducerClass(IdentityReducer.class); conf.setNumReduceTasks(0); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(BytesWritable.class); conf.setOutputValueClass(BytesWritable.class); Path inputPath = new Path(args[0]); FileInputFormat.setInputPaths(conf, inputPath); Path outputPath = new Path(args[1]); // delete output path if it already exists FileSystem fs = outputPath.getFileSystem(conf); if(fs.exists(outputPath)) fs.delete(outputPath, true); FileOutputFormat.setOutputPath(conf, outputPath); conf.setInt("value.size", Integer.parseInt(args[2])); JobClient.runJob(conf); return 0; }
Class<? extends TableReduce> reducer, JobConf job, Class partitioner, boolean addDependencyJars) throws IOException { job.setOutputFormat(TableOutputFormat.class); job.setReducerClass(reducer); job.set(TableOutputFormat.OUTPUT_TABLE, table); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Put.class); job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName());
public void testInputFormat() throws Exception { JobConf job = new JobConf(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest"); Schema subSchema = Schema.parse("{\"type\":\"record\"," + "\"name\":\"PairValue\","+ "\"fields\": [ " + "{\"name\":\"value\", \"type\":\"long\"}" + "]}"); AvroJob.setInputSchema(job, subSchema); AvroJob.setMapperClass(job, Counter.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*")); job.setInputFormat(AvroTrevniInputFormat.class); job.setNumReduceTasks(0); // map-only job.setOutputFormat(NullOutputFormat.class); // ignore output total = 0; JobClient.runJob(job); assertEquals(WordCountUtil.TOTAL, total); }
@Test /** * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat * and AvroTextOutputFormat to produce a sorted "bytes" Avro file. */ public void testSort() throws Exception { JobConf job = new JobConf(); String inputPath = INPUT_DIR.getRoot().getPath(); Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath()); outputPath.getFileSystem(job).delete(outputPath); WordCountUtil.writeLinesBytesFile(inputPath); job.setInputFormat(AvroAsTextInputFormat.class); job.setOutputFormat(AvroTextOutputFormat.class); job.setOutputKeyClass(Text.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, outputPath); JobClient.runJob(job); WordCountUtil.validateSortedFile(outputPath.toString() + "/part-00000.avro"); }
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException { Configuration conf = UTIL.getConfiguration(); final JobConf job = new JobConf(conf); job.setInputFormat(clazz); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(ExampleVerifier.class); job.setNumReduceTasks(0); LOG.debug("submitting job."); final RunningJob run = JobClient.runJob(job); assertTrue("job failed!", run.isSuccessful()); assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter()); assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter()); assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter()); assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter()); assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter()); assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter()); }
public void configure(JobConf job) { if (null != inf) job.setInputFormat(inf); if (null != of) job.setOutputFormat(of); } }
@Override public void sinkConfInit(FlowProcess<JobConf> process, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputFormat(TableOutputFormat.class); conf.setOutputKeyClass(ImmutableBytesWritable.class); conf.setOutputValueClass(Put.class); }
/** * Sets task classes with related info if needed into configuration object. * * @param jobConf Configuration to change. * @param setMapper Option to set mapper and input format classes. * @param setCombiner Option to set combiner class. * @param setReducer Option to set reducer and output format classes. */ public static void setTasksClasses(JobConf jobConf, boolean setMapper, boolean setCombiner, boolean setReducer) { if (setMapper) { jobConf.setMapperClass(HadoopWordCount1Map.class); jobConf.setInputFormat(TextInputFormat.class); } if (setCombiner) jobConf.setCombinerClass(HadoopWordCount1Reduce.class); if (setReducer) { jobConf.setReducerClass(HadoopWordCount1Reduce.class); jobConf.setOutputFormat(TextOutputFormat.class); } } }
List<OutputInfo> outputs = Arrays.asList(amClient.getOutputLocation()); JobConf jobConf = new JobConf(conf); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setBoolean("mapred.output.compress", true); jobConf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec"); jobConf.setOutputFormat(TextMultiOutputFormat.class); Path remotePath = new Path(outputs.get(0).getDfsLocation() + "/_temporary/" + containerId.toString()); FileSystem dfs = remotePath.getFileSystem(jobConf); jobConf.set(XLearningConstants.STREAM_OUTPUT_DIR, remotePath.makeQualified(dfs).toString());
jobConf.setInputFormat(CustomV1InputFormat.class); jobConf.setOutputFormat(CustomV1OutputFormat.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path(igfsScheme() + inFile.toString())); FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT));
private static void configureAvroOutput(JobConf job) { if (job.get("mapred.output.format.class") == null) job.setOutputFormat(AvroOutputFormat.class); if (job.getReducerClass() == IdentityReducer.class) job.setReducerClass(HadoopReducer.class); job.setOutputKeyClass(AvroWrapper.class); configureAvroShuffle(job); }
public void configure(JobConf job) { if (null != inf) job.setInputFormat(inf); if (null != of) job.setOutputFormat(of); } }
private static void setupTetherJob(JobConf job) throws IOException { job.setMapRunnerClass(TetherMapRunner.class); job.setPartitionerClass(TetherPartitioner.class); job.setReducerClass(TetherReducer.class); job.setInputFormat(TetherInputFormat.class); job.setOutputFormat(TetherOutputFormat.class); job.setOutputKeyClass(TetherData.class); job.setOutputKeyComparatorClass(TetherKeyComparator.class); job.setMapOutputValueClass(NullWritable.class); // set the map output key class to TetherData job.setMapOutputKeyClass(TetherData.class); // if protocol isn't set if (job.getStrings(TETHER_PROTOCOL)==null) { job.set(TETHER_PROTOCOL, "sasl"); } // add TetherKeySerialization to io.serializations Collection<String> serializations = job.getStringCollection("io.serializations"); if (!serializations.contains(TetherKeySerialization.class.getName())) { serializations.add(TetherKeySerialization.class.getName()); job.setStrings("io.serializations", serializations.toArray(new String[0])); } // determine whether the executable should be added to the cache. if (job.getBoolean(TETHER_EXEC_CACHED,false)){ DistributedCache.addCacheFile(getExecutable(job), job); } }
Path rankings = new Path(options.getResultPath(), RANKINGS); Path fout = new Path(options.getResultPath(), USERVISITS); Path uagentPath = new Path(options.getWorkPath(), uagentf); DistributedCache.addCacheFile(uagentPath.toUri(), job); DistributedCache.addCacheFile(searchkeyPath.toUri(), job); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setReducerClass(CreateUserVisitsReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); } else { job.setOutputFormat(TextOutputFormat.class);
private JobConf createJobConfig() throws IOException { Path inputPath = new Path(INPUT_PATH); Path outputPath = new Path(OUTPUT_PATH); FileSystem.get(new Configuration()).delete(outputPath, true); JobConf jobConfig = new JobConf(); jobConfig.setInputFormat(AvroInputFormat.class); jobConfig.setOutputFormat(AvroOutputFormat.class); AvroOutputFormat.setOutputPath(jobConfig, outputPath); AvroInputFormat.addInputPath(jobConfig, inputPath); jobConfig.set(AvroJob.OUTPUT_SCHEMA, User.SCHEMA.toString()); jobConfig.set(AvroJob.INPUT_SCHEMA, User.SCHEMA.toString()); return jobConfig; }
if(!isAvro) { conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class); conf.setMapperClass(mapperClass); conf.setMapOutputKeyClass(BytesWritable.class); conf.setMapOutputValueClass(BytesWritable.class); conf.setReducerClass(HadoopStoreBuilderReducer.class); conf.setInputFormat(inputFormatClass); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(BytesWritable.class); conf.setOutputValueClass(BytesWritable.class); conf.setJarByClass(getClass()); conf.setReduceSpeculativeExecution(false); conf.setMapOutputValueClass(ByteBuffer.class); conf.setInputFormat(inputFormatClass); conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class); conf.setOutputKeyClass(ByteBuffer.class); conf.setOutputValueClass(ByteBuffer.class); conf.setReducerClass(AvroStoreBuilderReducer.class); Path directoryPath = new Path(outputDir.toString(), directoryName);
JobConf job = new JobConf(conf); job.setJobName(jobName); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setJarByClass(CompactorMR.class); LOG.debug("User jar set to " + job.getJar()); job.setMapperClass(CompactorMap.class); job.setNumReduceTasks(0); job.setInputFormat(CompactorInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setOutputCommitter(CompactorOutputCommitter.class);
JobConf job = new JobConf(conf); job.setJobName(jobName); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setJarByClass(CompactorMR.class); LOG.debug("User jar set to " + job.getJar()); job.setMapperClass(CompactorMap.class); job.setNumReduceTasks(0); job.setInputFormat(CompactorInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setOutputCommitter(CompactorOutputCommitter.class);
@Test public void testJob() throws Exception { JobConf job = new JobConf(); Path outputPath = new Path(DIR.getRoot().getPath() + "/out"); outputPath.getFileSystem(job).delete(outputPath); job.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(job, DIR.getRoot().getPath() + "/in"); job.setMapperClass(AvroTestConverter.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputPath); System.out.println(createSchema()); AvroJob.setOutputSchema(job, Pair.getPairSchema(Schema.create(Schema.Type.LONG), createSchema())); job.setOutputFormat(AvroOutputFormat.class); JobClient.runJob(job); } }