/** Enable output compression using the deflate codec and specify its level.*/ public static void setDeflateLevel(JobConf job, int level) { FileOutputFormat.setCompressOutput(job, true); job.setInt(DEFLATE_LEVEL_KEY, level); }
/** Enable output compression using the deflate codec and specify its level.*/ public static void setDeflateLevel(JobConf job, int level) { FileOutputFormat.setCompressOutput(job, true); job.setInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, level); }
/** Uses default mapper with no reduces for a map-only identity job. */ @Test @SuppressWarnings("deprecation") public void testMapOnly() throws Exception { JobConf job = new JobConf(); String inDir = System.getProperty("share.dir","../../../share")+"/test/data"; Path input = new Path(inDir+"/weather.avro"); Path output = new Path("target/test/weather-ident"); output.getFileSystem(job).delete(output); job.setJobName("identity map weather"); AvroJob.setInputSchema(job, Weather.SCHEMA$); AvroJob.setOutputSchema(job, Weather.SCHEMA$); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); job.setNumReduceTasks(0); // map-only JobClient.runJob(job); // check output is correct DatumReader<Weather> reader = new SpecificDatumReader<>(); DataFileReader<Weather> check = new DataFileReader<> (new File(inDir + "/weather.avro"), reader); DataFileReader<Weather> sorted = new DataFileReader<> (new File(output.toString() + "/part-00000.avro"), reader); for (Weather w : sorted) assertEquals(check.next(), w); check.close(); sorted.close(); }
@SuppressWarnings("deprecation") public void testJobNoreducer() throws Exception { JobConf job = new JobConf(); job.setNumReduceTasks(0); Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath()); outputPath.getFileSystem(job).delete(outputPath); WordCountUtil.writeLinesFile(new File(INPUT_DIR.getRoot(),"lines.avro")); job.setJobName("AvroMultipleOutputs_noreducer"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); FileInputFormat.setInputPaths(job, new Path(INPUT_DIR.getRoot().toString())); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); JobClient.runJob(job); }
@SuppressWarnings("deprecation") public void testJob(String pathOut) throws Exception { JobConf job = new JobConf(); String pathIn = INPUT_DIR.getRoot().getPath(); WordCountUtil.writeLinesFile(pathIn + "/lines.avro"); Path outputPath = new Path(pathOut); outputPath.getFileSystem(job).delete(outputPath); job.setJobName("wordcount"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(pathIn)); FileOutputFormat.setOutputPath(job, new Path(pathOut)); FileOutputFormat.setCompressOutput(job, true); WordCountUtil.setMeta(job); JobClient.runJob(job); WordCountUtil.validateCountsFile(new File(pathOut, "part-00000.avro")); }
public void testOutputFormat() throws Exception { JobConf job = new JobConf(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest"); wordCountUtil.writeLinesFile(); AvroJob.setInputSchema(job, STRING); AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG)); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setCombinerClass(job, ReduceImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in")); FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out")); FileOutputFormat.setCompressOutput(job, true); job.setOutputFormat(AvroTrevniOutputFormat.class); JobClient.runJob(job); wordCountUtil.validateCountsFile(); }
FileOutputFormat.setCompressOutput(job, true); AvroJob.setOutputCodec(job, SNAPPY_CODEC);
@SuppressWarnings("deprecation") public void testJob(String pathOut) throws Exception { JobConf job = new JobConf(); String pathIn = INPUT_DIR.getRoot().getPath(); File fileIn = new File(pathIn, "lines.avro"); Path outputPath = new Path(pathOut); outputPath.getFileSystem(job).delete(outputPath); WordCountUtil.writeLinesFile(fileIn); job.setJobName("AvroMultipleOutputs"); AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroJob.setMapperClass(job, MapImpl.class); AvroJob.setReducerClass(job, ReduceImpl.class); FileInputFormat.setInputPaths(job, pathIn); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); AvroMultipleOutputs.addNamedOutput(job, "myavro", AvroOutputFormat.class, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema()); AvroMultipleOutputs.addNamedOutput(job, "myavro1", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING)); WordCountUtil.setMeta(job); JobClient.runJob(job); WordCountUtil.validateCountsFile(new File(outputPath.toString(), "/part-00000.avro")); }
job.set("mapred.output.compression.type","BLOCK"); job.set("mapreduce.output.fileoutputformat.compress.type", "BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
job.set("mapred.output.compression.type","BLOCK"); job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
job.set("mapred.output.compression.type","BLOCK"); job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
job.set("mapred.output.compression.type","BLOCK"); job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK"); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
/** Enable output compression using the deflate codec and specify its level.*/ public static void setDeflateLevel(JobConf job, int level) { FileOutputFormat.setCompressOutput(job, true); job.setInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, level); }
/** Enable output compression using the deflate codec and specify its level.*/ public static void setDeflateLevel(JobConf job, int level) { FileOutputFormat.setCompressOutput(job, true); job.setInt(DEFLATE_LEVEL_KEY, level); }
/** Enable output compression using the deflate codec and specify its level.*/ public static void setDeflateLevel(JobConf job, int level) { FileOutputFormat.setCompressOutput(job, true); job.setInt(DEFLATE_LEVEL_KEY, level); }
/** Enable output compression using the deflate codec and specify its level.*/ public static void setDeflateLevel(JobConf job, int level) { FileOutputFormat.setCompressOutput(job, true); job.setInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, level); }
/** Enable output compression using the deflate codec and specify its level. */ public static void setDeflateLevel(JobConf job, int level) { FileOutputFormat.setCompressOutput(job, true); job.setInt(DEFLATE_LEVEL_KEY, level); }
/** * Set the {@link CompressionCodec} to be used to compress job outputs. * @param conf the {@link JobConf} to modify * @param codecClass the {@link CompressionCodec} to be used to * compress the job outputs */ public static void setOutputCompressorClass(JobConf conf, Class<? extends CompressionCodec> codecClass) { setCompressOutput(conf, true); conf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class); }
/** * Set the {@link CompressionCodec} to be used to compress job outputs. * @param conf the {@link JobConf} to modify * @param codecClass the {@link CompressionCodec} to be used to * compress the job outputs */ public static void setOutputCompressorClass(JobConf conf, Class<? extends CompressionCodec> codecClass) { setCompressOutput(conf, true); conf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class); }
/** * Set the {@link CompressionCodec} to be used to compress job outputs. * @param conf the {@link JobConf} to modify * @param codecClass the {@link CompressionCodec} to be used to * compress the job outputs */ public static void setOutputCompressorClass(JobConf conf, Class<? extends CompressionCodec> codecClass) { setCompressOutput(conf, true); conf.setClass(org.apache.hadoop.mapreduce.lib.output. FileOutputFormat.COMPRESS_CODEC, codecClass, CompressionCodec.class); }