org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputCompressorClass java code examples

private int runMetastoreCompareJob(Path output)
 throws IOException, InterruptedException, ClassNotFoundException {
 Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job");
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(MetastoreScanInputFormat.class);
 job.setMapperClass(Stage1ProcessTableMapper.class);
 job.setReducerClass(Stage1PartitionCompareReducer.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

private int runMetastoreCompareJob(Path output)
 throws IOException, InterruptedException, ClassNotFoundException {
 Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job");
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(MetastoreScanInputFormat.class);
 job.setMapperClass(Stage1ProcessTableMapper.class);
 job.setReducerClass(Stage1PartitionCompareReducer.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

private void setCompression(Path path, Job job) {
   String location=path.getName();
  if (location.endsWith(".bz2") || location.endsWith(".bz")) {
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job,  BZip2Codec.class);
  }  else if (location.endsWith(".gz")) {
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    FileOutputFormat.setCompressOutput( job, false);
  }
}

@Override
public void setStoreLocation(String location, Job job)
  throws IOException {
  job.setOutputKeyClass(this.keyClass);
  job.setOutputKeyClass(this.keyClass);
  Configuration conf = job.getConfiguration();
  if ("true".equals(conf.get("output.compression.enabled"))) {
    FileOutputFormat.setCompressOutput(job, true);
    String codec = conf.get("output.compression.codec");
    FileOutputFormat.setOutputCompressorClass(job,
                         PigContext.resolveClassName(codec).asSubclass(CompressionCodec.class));
  }
  FileOutputFormat.setOutputPath(job, new Path(location));
}

@Override
public void setStoreLocation(String location, Job job) throws IOException {
  job.getConfiguration().set(MRConfiguration.TEXTOUTPUTFORMAT_SEPARATOR, "");
  FileOutputFormat.setOutputPath(job, new Path(location));
  if( "true".equals( job.getConfiguration().get( "output.compression.enabled" ) ) ) {
    FileOutputFormat.setCompressOutput( job, true );
    String codec = job.getConfiguration().get( "output.compression.codec" );
    try {
      FileOutputFormat.setOutputCompressorClass( job,  (Class<? extends CompressionCodec>) Class.forName( codec ) );
    } catch (ClassNotFoundException e) {
      throw new RuntimeException("Class not found: " + codec );
    }
  } else {
    // This makes it so that storing to a directory ending with ".gz" or ".bz2" works.
    setCompression(new Path(location), job);
  }
}

private int runDirectoryComparisonJob(Path source, Path destination, Path output,
                   String compareOption)
  throws IOException, InterruptedException, ClassNotFoundException {
 Job job = new Job(getConf(), "Directory Comparison Job");
 job.setJarByClass(getClass());
 job.setInputFormatClass(DirScanInputFormat.class);
 job.setMapperClass(ListFileMapper.class);
 job.setReducerClass(DirectoryCompareReducer.class);
 // last directory is destination, all other directories are source directories
 job.getConfiguration().set(SRC_PATH_CONF, source.toString());
 job.getConfiguration().set(DST_PATH_CONF, destination.toString());
 job.getConfiguration().set(FileInputFormat.INPUT_DIR, Joiner.on(",").join(source, destination));
 job.getConfiguration().set(COMPARE_OPTION_CONF, compareOption);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(FileStatus.class);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

private int runDirectoryComparisonJob(Path source, Path destination, Path output,
                   String compareOption)
  throws IOException, InterruptedException, ClassNotFoundException {
 Job job = new Job(getConf(), "Directory Comparison Job");
 job.setJarByClass(getClass());
 job.setInputFormatClass(DirScanInputFormat.class);
 job.setMapperClass(ListFileMapper.class);
 job.setReducerClass(DirectoryCompareReducer.class);
 // last directory is destination, all other directories are source directories
 job.getConfiguration().set(SRC_PATH_CONF, source.toString());
 job.getConfiguration().set(DST_PATH_CONF, destination.toString());
 job.getConfiguration().set(FileInputFormat.INPUT_DIR, Joiner.on(",").join(source, destination));
 job.getConfiguration().set(COMPARE_OPTION_CONF, compareOption);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(FileStatus.class);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

private Job createJob(String inputPath, String outputPath) throws Exception {
  Configuration conf = getConf();
  Job job = new Job(conf);
  job.setJarByClass(VisualJob.class);
  job.setNumReduceTasks(90);
  FileSystem fs = FileSystem.get(new URI(outputPath), conf);
  if (fs.exists(new Path(outputPath))) {
    fs.delete(new Path(outputPath), true);
  }
  FileInputFormat.setInputPaths(job, new Path(inputPath));
  FileOutputFormat.setOutputPath(job, new Path(outputPath));
  FileOutputFormat.setCompressOutput(job, true);
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(FloatArrayWritable.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setMapperClass(VisualThreadedMapper.class);
  job.setReducerClass(VisualReducer.class);
  return job;
}

private int runSyncJob(Path source, Path destination, Path tmpDir, Path input,
            Path output)
  throws IOException, InterruptedException, ClassNotFoundException {
 Job job = new Job(getConf(), "HDFS Sync job");
 job.setJarByClass(getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(HdfsSyncMapper.class);
 job.setReducerClass(HdfsSyncReducer.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.getConfiguration().set(SRC_PATH_CONF, source.toString());
 job.getConfiguration().set(DST_PATH_CONF, destination.toString());
 job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString());
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setInputDirRecursive(job, true);
 FileInputFormat.setMaxInputSplitSize(job,
     this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L));
 FileOutputFormat.setOutputPath(job, new Path(output.toString()));
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

private int runSyncJob(Path source, Path destination, Path tmpDir, Path input,
            Path output)
  throws IOException, InterruptedException, ClassNotFoundException {
 Job job = new Job(getConf(), "HDFS Sync job");
 job.setJarByClass(getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(HdfsSyncMapper.class);
 job.setReducerClass(HdfsSyncReducer.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.getConfiguration().set(SRC_PATH_CONF, source.toString());
 job.getConfiguration().set(DST_PATH_CONF, destination.toString());
 job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString());
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setInputDirRecursive(job, true);
 FileInputFormat.setMaxInputSplitSize(job,
     this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L));
 FileOutputFormat.setOutputPath(job, new Path(output.toString()));
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

private int runMetastoreCompareJobWithTextInput(Path input, Path output)
 throws IOException, InterruptedException, ClassNotFoundException {
 Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job with Input List");
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(Stage1ProcessTableMapperWithTextInput.class);
 job.setReducerClass(Stage1PartitionCompareReducer.class);
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setMaxInputSplitSize(job,
   this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 job.setNumReduceTasks(getConf().getInt(
   ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
   150));
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

private int runMetastoreCompareJobWithTextInput(Path input, Path output)
 throws IOException, InterruptedException, ClassNotFoundException {
 Job job = Job.getInstance(this.getConf(), "Stage1: Metastore Compare Job with Input List");
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(Stage1ProcessTableMapperWithTextInput.class);
 job.setReducerClass(Stage1PartitionCompareReducer.class);
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setMaxInputSplitSize(job,
   this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 job.setNumReduceTasks(getConf().getInt(
   ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
   150));
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

@SuppressWarnings("unchecked")
@Override
public void setStoreLocation(String location, Job job) throws IOException {
  ensureUDFContext(job.getConfiguration());
  job.setOutputKeyClass(keyClass);
  job.setOutputValueClass(valueClass);
  FileOutputFormat.setOutputPath(job, new Path(location));
  if ("true".equals(job.getConfiguration().get(
      "output.compression.enabled"))) {
    FileOutputFormat.setCompressOutput(job, true);
    String codec = job.getConfiguration().get(
        "output.compression.codec");
    FileOutputFormat
        .setOutputCompressorClass(
            job,
            PigContext.resolveClassName(codec).asSubclass(
                CompressionCodec.class));
  } else {
    // This makes it so that storing to a directory ending with ".gz" or
    // ".bz2" works.
    setCompression(new Path(location), job);
  }
}

public void run(Configuration conf, Path inputPathQ, Path inputUHatPath,
  Path sigmaPath, Path outputPath, int k, int numReduceTasks,
  Class<? extends Writable> labelClass, SSVDSolver.OutputScalingEnum outputScaling)
 throws ClassNotFoundException, InterruptedException, IOException {
 job = new Job(conf);
 job.setJobName("U-job");
 job.setJarByClass(UJob.class);
 job.setInputFormatClass(SequenceFileInputFormat.class);
 job.setOutputFormatClass(SequenceFileOutputFormat.class);
 FileInputFormat.setInputPaths(job, inputPathQ);
 FileOutputFormat.setOutputPath(job, outputPath);
 // WARN: tight hadoop integration here:
 job.getConfiguration().set("mapreduce.output.basename", OUTPUT_U);
 FileOutputFormat.setCompressOutput(job, true);
 FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
 SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
 job.setMapperClass(UMapper.class);
 job.setMapOutputKeyClass(IntWritable.class);
 job.setMapOutputValueClass(VectorWritable.class);
 job.setOutputKeyClass(labelClass);
 job.setOutputValueClass(VectorWritable.class);
 job.getConfiguration().set(PROP_UHAT_PATH, inputUHatPath.toString());
 job.getConfiguration().set(PROP_SIGMA_PATH, sigmaPath.toString());
 job.getConfiguration().set(PROP_OUTPUT_SCALING, outputScaling.name());
 job.getConfiguration().setInt(PROP_K, k);
 job.setNumReduceTasks(0);
 job.submit();
}

if (codecName != null) {
  FileOutputFormat.setCompressOutput(job, true);
  FileOutputFormat.setOutputCompressorClass(job, CompressionUtils.getHadoopCodec(codecName));

private int runHdfsCopyJob(Path input, Path output)
 throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {
 LOG.info("Starting job for step 2...");
 Job job = Job.getInstance(this.getConf(), "Stage 2: HDFS Copy Job");
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(Stage2DirectoryCopyMapper.class);
 job.setReducerClass(Stage2DirectoryCopyReducer.class);
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setInputDirRecursive(job, true);
 FileInputFormat.setMaxInputSplitSize(job,
   this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 job.setNumReduceTasks(getConf().getInt(
   ConfigurationKeys.BATCH_JOB_COPY_PARALLELISM,
   150));
 boolean success = job.waitForCompletion(true);
 if (success) {
  LOG.info("Job for step 2 finished successfully! To view logging data, run the following "
    + "commands in Hive: \n\n"
    + VelocityUtils.renderTemplate(STEP2_HQL_TEMPLATE, velocityContext)
    + "\n");
 }
 return success ? 0 : 1;
}

/**
 * @param path
 * @param job
 */
private void setCompression(Path path, Job job) {
  CompressionCodecFactory codecFactory = new CompressionCodecFactory(
      job.getConfiguration());
  CompressionCodec codec = codecFactory.getCodec(path);
  if (codec != null) {
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, codec.getClass());
  } else {
    FileOutputFormat.setCompressOutput(job, false);
  }
}

@SuppressWarnings("unchecked")
@Override
public void setStoreLocation(String location, Job job) throws IOException {
 Configuration conf = HadoopCompat.getConfiguration(job);
 ensureUDFContext(conf);
 verifyWritableClass(config.keyClass, true, config.keyConverter);
 verifyWritableClass(config.valueClass, false, config.valueConverter);
 job.setOutputKeyClass(config.keyClass);
 job.setOutputValueClass(config.valueClass);
 super.setStoreLocation(location, job);
 if ("true".equals(conf.get("output.compression.enabled"))) {
  FileOutputFormat.setCompressOutput(job, true);
  String codec = conf.get("output.compression.codec");
  FileOutputFormat.setOutputCompressorClass(job,
    PigContext.resolveClassName(codec).asSubclass(CompressionCodec.class));
 } else {
  // This makes it so that storing to a directory ending with ".gz" or ".bz2" works.
  setCompression(new Path(location), job);
 }
}

/**
 * @param path
 * @param job
 */
private void setCompression(Path path, Job job) {
 CompressionCodecFactory codecFactory =
   new CompressionCodecFactory(HadoopCompat.getConfiguration(job));
 CompressionCodec codec = codecFactory.getCodec(path);
 if (codec != null) {
  FileOutputFormat.setCompressOutput(job, true);
  FileOutputFormat.setOutputCompressorClass(job, codec.getClass());
 } else {
  FileOutputFormat.setCompressOutput(job, false);
 }
}

private Path doMapReduce(final Path inputPath, final boolean writeHeader)
  throws Exception {
  final FileSystem fileSystem = FileSystem.get(conf);
  final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
  fileSystem.delete(outputPath, true);
  final Job job = Job.getInstance(conf);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(VCFInputFormat.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(VariantContextWritable.class);
  job.setOutputFormatClass(writeHeader ? VCFTestWithHeaderOutputFormat.class :
    VCFTestNoHeaderOutputFormat.class);
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(VariantContextWritable.class);
  job.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(job, outputPath);
  if (codecClass != null) {
    FileOutputFormat.setOutputCompressorClass(job, codecClass);
  }
  final boolean success = job.waitForCompletion(true);
  assertTrue(success);
  return outputPath;
}

Javadoc

Set the CompressionCodec to be used to compress job outputs.

Popular methods of FileOutputFormat

setOutputPath
Set the Path of the output directory for the map-reduce job.
getOutputPath
Get the Path to the output directory for the map-reduce job.
setCompressOutput
Set whether the output of the job is compressed.
getUniqueFile
Generate a unique filename, based on the task id, name, and extension
getCompressOutput
Is the job output compressed?
getWorkOutputPath
Get the Path to the task's temporary output directory for the map-reduce job TASKS' SIDE-EFFECT FILE
getOutputCommitter
getOutputCompressorClass
Get the CompressionCodec for compressing the job outputs.
getOutputName
Get the base output name for the output file.
checkOutputSpecs
setOutputName
Set the base output name for output file to be created.
getRecordWriter

Popular in Java

Creating JSON documents from java classes using gson
getSystemService (Context)
getContentResolver (Context)
scheduleAtFixedRate (Timer)
Connection (java.sql)
A connection represents a link from a Java application to a database. All SQL statements and results
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
Top plugins for Android Studio

How to use setOutputCompressorClassmethodin org.apache.hadoop.mapreduce.lib.output.FileOutputFormat

Best Java code snippets using org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputCompressorClass (Showing top 20 results out of 315)

How to use
setOutputCompressorClass
method
in
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat