org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput java code examples

/** Enable output compression using the deflate codec and specify its level.*/
public static void setDeflateLevel(JobConf job, int level) {
 FileOutputFormat.setCompressOutput(job, true);
 job.setInt(DEFLATE_LEVEL_KEY, level);
}

/** Enable output compression using the deflate codec and specify its level.*/
public static void setDeflateLevel(JobConf job, int level) {
 FileOutputFormat.setCompressOutput(job, true);
 job.setInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, level);
}

/** Uses default mapper with no reduces for a map-only identity job. */
@Test
@SuppressWarnings("deprecation")
public void testMapOnly() throws Exception {
 JobConf job = new JobConf();
 String inDir = System.getProperty("share.dir","../../../share")+"/test/data";
 Path input = new Path(inDir+"/weather.avro");
 Path output = new Path("target/test/weather-ident");
 output.getFileSystem(job).delete(output);
 job.setJobName("identity map weather");
 AvroJob.setInputSchema(job, Weather.SCHEMA$);
 AvroJob.setOutputSchema(job, Weather.SCHEMA$);
 FileInputFormat.setInputPaths(job, input);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setCompressOutput(job, true);
 job.setNumReduceTasks(0);                     // map-only
 JobClient.runJob(job);
 // check output is correct
 DatumReader<Weather> reader = new SpecificDatumReader<>();
 DataFileReader<Weather> check = new DataFileReader<>
  (new File(inDir + "/weather.avro"), reader);
 DataFileReader<Weather> sorted = new DataFileReader<>
  (new File(output.toString() + "/part-00000.avro"), reader);
 for (Weather w : sorted)
  assertEquals(check.next(), w);
 check.close();
 sorted.close();
}

@SuppressWarnings("deprecation")
public void testJobNoreducer() throws Exception {
 JobConf job = new JobConf();
 job.setNumReduceTasks(0);
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesFile(new File(INPUT_DIR.getRoot(),"lines.avro"));
 job.setJobName("AvroMultipleOutputs_noreducer");
 AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING));
 AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());
 AvroJob.setMapperClass(job, MapImpl.class);
 FileInputFormat.setInputPaths(job, new Path(INPUT_DIR.getRoot().toString()));
 FileOutputFormat.setOutputPath(job, outputPath);
 FileOutputFormat.setCompressOutput(job, false);
 AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING));
 JobClient.runJob(job);
}

@SuppressWarnings("deprecation")
public void testJob(String pathOut) throws Exception {
 JobConf job = new JobConf();
 String pathIn = INPUT_DIR.getRoot().getPath();
 WordCountUtil.writeLinesFile(pathIn + "/lines.avro");
 Path outputPath = new Path(pathOut);
 outputPath.getFileSystem(job).delete(outputPath);
 job.setJobName("wordcount");
 AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING));
 AvroJob.setOutputSchema(job, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());
 AvroJob.setMapperClass(job, MapImpl.class);
 AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, new Path(pathIn));
 FileOutputFormat.setOutputPath(job, new Path(pathOut));
 FileOutputFormat.setCompressOutput(job, true);
 WordCountUtil.setMeta(job);
 JobClient.runJob(job);
 WordCountUtil.validateCountsFile(new File(pathOut, "part-00000.avro"));
}

public void testOutputFormat() throws Exception {
 JobConf job = new JobConf();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest");
 wordCountUtil.writeLinesFile();
 AvroJob.setInputSchema(job, STRING);
 AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG));
 AvroJob.setMapperClass(job, MapImpl.class);
 AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in"));
 FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out"));
 FileOutputFormat.setCompressOutput(job, true);
 job.setOutputFormat(AvroTrevniOutputFormat.class);
 JobClient.runJob(job);
 wordCountUtil.validateCountsFile();
}

FileOutputFormat.setCompressOutput(job, true);
AvroJob.setOutputCodec(job, SNAPPY_CODEC);

@SuppressWarnings("deprecation")
public void testJob(String pathOut) throws Exception {
 JobConf job = new JobConf();
 String pathIn = INPUT_DIR.getRoot().getPath();
 File fileIn = new File(pathIn, "lines.avro");
 Path outputPath = new Path(pathOut);
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesFile(fileIn);
 job.setJobName("AvroMultipleOutputs");
 AvroJob.setInputSchema(job, Schema.create(Schema.Type.STRING));
 AvroJob.setOutputSchema(job,
     new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());
 AvroJob.setMapperClass(job, MapImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, pathIn);
 FileOutputFormat.setOutputPath(job, outputPath);
 FileOutputFormat.setCompressOutput(job, false);
 AvroMultipleOutputs.addNamedOutput(job, "myavro", AvroOutputFormat.class, new Pair<Utf8, Long>(new Utf8(""), 0L).getSchema());
 AvroMultipleOutputs.addNamedOutput(job, "myavro1", AvroOutputFormat.class, Schema.create(Schema.Type.STRING));
 AvroMultipleOutputs.addNamedOutput(job, "myavro2", AvroOutputFormat.class, Schema.create(Schema.Type.STRING));
 WordCountUtil.setMeta(job);
 JobClient.runJob(job);
 WordCountUtil.validateCountsFile(new File(outputPath.toString(), "/part-00000.avro"));
}

job.set("mapred.output.compression.type","BLOCK");
job.set("mapreduce.output.fileoutputformat.compress.type", "BLOCK");
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());

job.set("mapred.output.compression.type","BLOCK");
      job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK");
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());

job.set("mapred.output.compression.type","BLOCK");
 job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK");	
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());

job.set("mapred.output.compression.type","BLOCK");
job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK");
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());

/** Enable output compression using the deflate codec and specify its level.*/
public static void setDeflateLevel(JobConf job, int level) {
 FileOutputFormat.setCompressOutput(job, true);
 job.setInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, level);
}

/** Enable output compression using the deflate codec and specify its level.*/
public static void setDeflateLevel(JobConf job, int level) {
 FileOutputFormat.setCompressOutput(job, true);
 job.setInt(DEFLATE_LEVEL_KEY, level);
}

/** Enable output compression using the deflate codec and specify its level.*/
public static void setDeflateLevel(JobConf job, int level) {
 FileOutputFormat.setCompressOutput(job, true);
 job.setInt(DEFLATE_LEVEL_KEY, level);
}

/** Enable output compression using the deflate codec and specify its level.*/
public static void setDeflateLevel(JobConf job, int level) {
 FileOutputFormat.setCompressOutput(job, true);
 job.setInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, level);
}

/** Enable output compression using the deflate codec and specify its level. */
public static void setDeflateLevel(JobConf job, int level) {
  FileOutputFormat.setCompressOutput(job, true);
  job.setInt(DEFLATE_LEVEL_KEY, level);
}

/**
 * Set the {@link CompressionCodec} to be used to compress job outputs.
 * @param conf the {@link JobConf} to modify
 * @param codecClass the {@link CompressionCodec} to be used to
 *                   compress the job outputs
 */
public static void 
setOutputCompressorClass(JobConf conf, 
             Class<? extends CompressionCodec> codecClass) {
 setCompressOutput(conf, true);
 conf.setClass("mapred.output.compression.codec", codecClass, 
        CompressionCodec.class);
}

/**
 * Set the {@link CompressionCodec} to be used to compress job outputs.
 * @param conf the {@link JobConf} to modify
 * @param codecClass the {@link CompressionCodec} to be used to
 *                   compress the job outputs
 */
public static void 
setOutputCompressorClass(JobConf conf, 
             Class<? extends CompressionCodec> codecClass) {
 setCompressOutput(conf, true);
 conf.setClass("mapred.output.compression.codec", codecClass, 
        CompressionCodec.class);
}

/**
 * Set the {@link CompressionCodec} to be used to compress job outputs.
 * @param conf the {@link JobConf} to modify
 * @param codecClass the {@link CompressionCodec} to be used to
 *                   compress the job outputs
 */
public static void 
setOutputCompressorClass(JobConf conf, 
             Class<? extends CompressionCodec> codecClass) {
 setCompressOutput(conf, true);
 conf.setClass(org.apache.hadoop.mapreduce.lib.output.
  FileOutputFormat.COMPRESS_CODEC, codecClass, 
        CompressionCodec.class);
}

Javadoc

Set whether the output of the job is compressed.

Popular methods of FileOutputFormat

setOutputPath
Set the Path of the output directory for the map-reduce job.
getOutputPath
Get the Path to the output directory for the map-reduce job.
getTaskOutputPath
Helper function to create the task's temporary output directory and return the path to the task's ou
getCompressOutput
Is the job output compressed?
getUniqueName
Helper function to generate a name that is unique for the task.The generated name can be used to cre
setWorkOutputPath
Set the Path of the task's temporary output directory for the map-reduce job. Note: Task output path
getOutputCompressorClass
Get the CompressionCodec for compressing the job outputs.
getWorkOutputPath
Get the Path to the task's temporary output directory for the map-reduce job TASKS' SIDE-EFFECT FILE
setOutputCompressorClass
Set the CompressionCodec to be used to compress job outputs.
getPathForCustomFile
Helper function to generate a Path for a file that is unique for the task within the job output dire
getRecordWriter

getRecordWriter

Popular in Java

Reactive rest calls using spring rest template
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
putExtra (Intent)
scheduleAtFixedRate (Timer)
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
Best plugins for Eclipse

How to use setCompressOutputmethodin org.apache.hadoop.mapred.FileOutputFormat

Best Java code snippets using org.apache.hadoop.mapred.FileOutputFormat.setCompressOutput (Showing top 20 results out of 315)

How to use
setCompressOutput
method
in
org.apache.hadoop.mapred.FileOutputFormat