public void testOutputFormat() throws Exception { JobConf job = new JobConf(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest"); wordCountUtil.writeLinesFile(); AvroJob.setInputSchema(job, STRING); AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG)); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in")); FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out")); FileOutputFormat.setCompressOutput(job, true); job.setOutputFormat(AvroTrevniOutputFormat.class); JobClient.runJob(job); wordCountUtil.validateCountsFile(); }
@SuppressWarnings("deprecation") public void testJob(String pathOut) throws Exception { JobConf job = new JobConf(); String pathIn = INPUT_DIR.getRoot().getPath(); WordCountUtil.writeLinesFile(pathIn + "/lines.avro"); Path outputPath = new Path(pathOut); outputPath.getFileSystem(job).delete(outputPath); job.setJobName("wordcount"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(pathIn)); FileOutputFormat.setOutputPath(job, new Path(pathOut)); FileOutputFormat.setCompressOutput(job, true); WordCountUtil.setMeta(job); JobClient.runJob(job); WordCountUtil.validateCountsFile(new File(pathOut, "part-00000.avro")); }
@Test public void testJob() throws Exception { JobConf job = new JobConf(); Path inputPath1 = new Path(INPUT_DIR_1.getRoot().getPath()); Path inputPath2 = new Path(INPUT_DIR_2.getRoot().getPath()); Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath()); outputPath.getFileSystem(job).delete(outputPath); writeNamesFiles(new File(inputPath1.toUri().getPath())); writeBalancesFiles(new File(inputPath2.toUri().getPath())); job.setJobName("multiple-inputs-join"); AvroMultipleInputs.addInputPath(job, inputPath1, NamesMapImpl.class, ReflectData.get().getSchema(NamesRecord.class)); AvroMultipleInputs.addInputPath(job, inputPath2, BalancesMapImpl.class, ReflectData.get().getSchema(BalancesRecord.class)); Schema keySchema = ReflectData.get().getSchema(KeyRecord.class); Schema valueSchema = ReflectData.get().getSchema(JoinableRecord.class); AvroJob.setMapOutputSchema(job, Pair.getPairSchema(keySchema, valueSchema)); AvroJob.setOutputSchema(job, ReflectData.get().getSchema(CompleteRecord.class)); AvroJob.setReducerClass(job, ReduceImpl.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, outputPath); AvroJob.setReflect(job); JobClient.runJob(job); validateCompleteFile(new File(OUTPUT_DIR.getRoot(), "part-00000.avro")); }
@Test @SuppressWarnings("deprecation") public void testJob() throws Exception { JobConf job = new JobConf(); String dir = "target/testReflectJob"; Path inputPath = new Path(dir + "/in"); Path outputPath = new Path(dir + "/out"); outputPath.getFileSystem(job).delete(outputPath); inputPath.getFileSystem(job).delete(inputPath); writeLinesFile(new File(dir+"/in")); job.setJobName("reflect"); AvroJob.setInputSchema(job, ReflectData.get().getSchema(Text.class)); AvroJob.setMapOutputSchema (job, new Pair(new Text(""), new Count(0L)).getSchema()); AvroJob.setOutputSchema(job, ReflectData.get().getSchema(WordCount.class)); AvroJob.setMapperClass(job, MapImpl.class); //AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); AvroJob.setReflect(job); // use reflection JobClient.runJob(job); validateCountsFile(new File(new File(dir, "out"), "part-00000.avro")); }
AvroJob.setReducerClass(job, SortReducer.class);
@SuppressWarnings("deprecation") public void testJob(String pathOut) throws Exception { JobConf job = new JobConf(); String pathIn = INPUT_DIR.getRoot().getPath(); File fileIn = new File(pathIn, "lines.avro"); Path outputPath = new Path(pathOut); outputPath.getFileSystem(job).delete(outputPath); WordCountUtil.writeLinesFile(fileIn); job.setJobName("AvroMultipleOutputs"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, pathIn); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); AvroMultipleOutputs.addNamedOutput(job, "myavro", AvroOutputFormat.class, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroMultipleOutputs.addNamedOutput(job, "myavro1", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); WordCountUtil.setMeta(job); JobClient.runJob(job); WordCountUtil.validateCountsFile(new File(outputPath.toString(), "/part-00000.avro")); }
/** * Creates a JobConf for a map-only job. Automatically loads the schema from each input file. * * @param mapperClass AvroMapper subclass implementing the map phase * @param outputSchema Schema of the mapper output * @return A configured JobConf. * @throws IOException * @throws URISyntaxException */ public JobConf createJobConf(Class<? extends AvroMapper> mapperClass, Schema outputSchema) throws IOException, URISyntaxException { JobConf conf = createJobConf(); AvroJob.setMapperClass(conf, mapperClass); AvroJob.setReducerClass(conf, AvroReducer.class); AvroJob.setOutputSchema(conf, outputSchema); conf.setNumReduceTasks(0); return conf; }
Pair.getPairSchema(Schema.create(Type.FLOAT), outputSchema)); AvroJob.setMapperClass(conf, mapperClass); AvroJob.setReducerClass(conf, reducerClass); return conf;
public static void main(String... args) throws Exception { JobConf job = new JobConf(); job.setJarByClass(SmallFilesMapReduce.class); Path input = new Path(args[0]); Path output = new Path(args[1]); output.getFileSystem(job).delete(output, true); AvroJob.setInputSchema(job, Stock.SCHEMA$); //<co id="ch03_avro_mr_comment1"/> AvroJob.setMapOutputSchema(job, Pair.getPairSchema(Stock.SCHEMA$, Schema.create(Schema.Type.NULL))); AvroJob.setOutputSchema(job, Stock.SCHEMA$); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); AvroJob.setMapperClass(job, Mapper.class); //<co id="ch03_smallfilemr_comment2"/> AvroJob.setReducerClass(job, Reducer.class); FileOutputFormat.setCompressOutput(job, true); AvroJob.setOutputCodec(job, SNAPPY_CODEC); JobClient.runJob(job); }
Pair.getPairSchema(Schema.create(Type.STRING), inputSchema)); AvroJob.setMapperClass(conf, mapperClass); AvroJob.setReducerClass(conf, reducerClass); return conf;
/** * Creates a JobConf for a map-reduce job. Loads the input schema from the input files. * * @param mapperClass AvroMapper subclass for the mapper. * @param reducerClass AvroReducer subclass for the reducer. * @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair * @param outputSchema Reducer output schema * @return A configured JobConf. * @throws IOException * @throws URISyntaxException */ public JobConf createJobConf(Class<? extends AvroMapper> mapperClass, Class<? extends AvroReducer> reducerClass, Schema mapperOutputSchema, Schema outputSchema) throws IOException, URISyntaxException { JobConf conf = createJobConf(); AvroJob.setMapperClass(conf, mapperClass); AvroJob.setReducerClass(conf, reducerClass); AvroJob.setMapOutputSchema(conf, mapperOutputSchema); AvroJob.setOutputSchema(conf, outputSchema); return conf; }
/** * Creates a JobConf for a map-only job with an explicitly set input Schema. * * @param mapperClass AvroMapper subclass implementing the map phase * @param inputSchema Schema of the input data. * @param outputSchema Schema of the mapper output * @return A configured JobConf. * @throws IOException * @throws URISyntaxException */ public JobConf createJobConf(Class<? extends AvroMapper> mapperClass, Schema inputSchema, Schema outputSchema) throws IOException, URISyntaxException { JobConf conf = createJobConf(); AvroJob.setMapperClass(conf, mapperClass); AvroJob.setReducerClass(conf, AvroReducer.class); AvroJob.setInputSchema(conf, inputSchema); AvroJob.setOutputSchema(conf, outputSchema); conf.setNumReduceTasks(0); return conf; }
/** * Creates a JobConf for a map-reducer job with an explicitly set input schema. * * @param mapperClass AvroMapper subclass for the mapper. * @param reducerClass AvroReducer subclass for the reducer. * @param inputSchema Schema of the input data. * @param mapperOutputSchema Mapper output schema. Must be an instance of org.apache.avro.mapred.Pair * @param outputSchema Reducer output schema * @return A configured JobConf. * @throws IOException * @throws URISyntaxException */ public JobConf createJobConf(Class<? extends AvroMapper> mapperClass, Class<? extends AvroReducer> reducerClass, Schema inputSchema, Schema mapperOutputSchema, Schema outputSchema) throws IOException, URISyntaxException { JobConf conf = createJobConf(); AvroJob.setMapperClass(conf, mapperClass); AvroJob.setReducerClass(conf, reducerClass); AvroJob.setInputSchema(conf, inputSchema); AvroJob.setMapOutputSchema(conf, mapperOutputSchema); AvroJob.setOutputSchema(conf, outputSchema); return conf; }
AvroJob.setReducerClass(conf, reducerClass); AvroJob.setCombinerClass(conf, combinerClass);
AvroJob.setReducerClass(conf, reducerClass); AvroJob.setCombinerClass(conf, combinerClass);
AvroJob.setReducerClass(conf, Reducer.class);
public void testOutputFormat() throws Exception { JobConf job = new JobConf(); WordCountUtil.writeLinesFile(); AvroJob.setInputSchema(job, STRING); AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG)); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(DIR + "/in")); FileOutputFormat.setOutputPath(job, new Path(DIR + "/out")); FileOutputFormat.setCompressOutput(job, true); job.setOutputFormat(AvroTrevniOutputFormat.class); JobClient.runJob(job); WordCountUtil.validateCountsFile(); }
AvroJob.setReducerClass(conf, Reducer.class);
AvroJob.setReducerClass(conf, Reducer.class);
AvroJob.setReducerClass(conf, DepthReducer.class);