org.apache.hadoop.mapred.FileOutputFormat.setOutputPath java code examples

Refine search

public int run(String[] args) throws Exception {
  if(args.length != 3)
    Utils.croak("USAGE: GenerateData input-file output-dir value-size");
  JobConf conf = new JobConf(getConf(), GenerateData.class);
  conf.setJobName("generate-data");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  conf.setMapperClass(GenerateDataMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setNumReduceTasks(0);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setOutputKeyClass(BytesWritable.class);
  conf.setOutputValueClass(BytesWritable.class);
  Path inputPath = new Path(args[0]);
  FileInputFormat.setInputPaths(conf, inputPath);
  Path outputPath = new Path(args[1]);
  // delete output path if it already exists
  FileSystem fs = outputPath.getFileSystem(conf);
  if(fs.exists(outputPath))
    fs.delete(outputPath, true);
  FileOutputFormat.setOutputPath(conf, outputPath);
  conf.setInt("value.size", Integer.parseInt(args[2]));
  JobClient.runJob(conf);
  return 0;
}

@Test
public void testNonAvroMapper() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroMapper.class);
 // reducer is default, identity
 // configure output for avro
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

@Test
public void testNonAvroReducer() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for Avro from sequence file
 AvroJob.setInputSequenceFile(job);
 AvroJob.setInputSchema(job, SCHEMA);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // mapper is default, identity
 // use a hadoop reducer that consumes Avro input
 AvroJob.setMapOutputSchema(job, SCHEMA);
 job.setReducerClass(NonAvroReducer.class);
 // configure outputPath for non-Avro SequenceFile
 job.setOutputFormat(SequenceFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, outputPath);
 // output key/value classes are default, LongWritable/Text
 JobClient.runJob(job);
 checkFile(new SequenceFileReader<>
      (new File(outputPath.toString() + "/part-00000")));
}

@Override
protected void runJob(String jobName, Configuration c, List<Scan> scans)
  throws IOException, InterruptedException, ClassNotFoundException {
 JobConf job = new JobConf(TEST_UTIL.getConfiguration());
 job.setJobName(jobName);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
 TableMapReduceUtil.addDependencyJars(job);
 job.setReducerClass(Reducer.class);
 job.setNumReduceTasks(1); // one to get final "first" and "last" key
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 LOG.info("Started " + job.getJobName());
 RunningJob runningJob = JobClient.runJob(job);
 runningJob.waitForCompletion();
 assertTrue(runningJob.isSuccessful());
 LOG.info("After map/reduce completion - job " + jobName);
}

/**
 * Gets fully configured JobConf instance.
 *
 * @param input input file name.
 * @param output output directory name.
 * @return Job configuration
 */
public static JobConf getJob(String input, String output) {
  JobConf conf = new JobConf(HadoopWordCount1.class);
  conf.setJobName("wordcount");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  setTasksClasses(conf, true, true, true);
  FileInputFormat.setInputPaths(conf, new Path(input));
  FileOutputFormat.setOutputPath(conf, new Path(output));
  return conf;
}

@Test
public void testNonAvroMapOnly() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroOnlyMapper.class);
 // configure output for avro
 job.setNumReduceTasks(0);                     // map-only
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

if(!isAvro) {
  conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
  conf.setMapperClass(mapperClass);
  conf.setMapOutputKeyClass(BytesWritable.class);
  conf.setMapOutputValueClass(BytesWritable.class);
  conf.setReducerClass(HadoopStoreBuilderReducer.class);
conf.setOutputKeyClass(BytesWritable.class);
conf.setOutputValueClass(BytesWritable.class);
conf.setJarByClass(getClass());
conf.setReduceSpeculativeExecution(false);
FileInputFormat.setInputPaths(conf, inputPath);
conf.set("final.output.dir", outputDir.toString());
conf.set(VoldemortBuildAndPushJob.CHECKSUM_TYPE, CheckSum.toString(checkSumType));
conf.set("dfs.umaskmode", "002");
FileOutputFormat.setOutputPath(conf, tempDir);
  conf.setOutputKeyClass(ByteBuffer.class);
  conf.setOutputValueClass(ByteBuffer.class);
  conf.setReducerClass(AvroStoreBuilderReducer.class);
  Path directoryPath = new Path(outputDir.toString(), directoryName);

 @Test
  public void testJob() throws Exception {
  JobConf job = new JobConf();
  Path outputPath = new Path(DIR.getRoot().getPath() + "/out");
  outputPath.getFileSystem(job).delete(outputPath);

  job.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(job, DIR.getRoot().getPath() + "/in");

  job.setMapperClass(AvroTestConverter.class);
  job.setNumReduceTasks(0);

  FileOutputFormat.setOutputPath(job, outputPath);
  System.out.println(createSchema());
  AvroJob.setOutputSchema(job,
              Pair.getPairSchema(Schema.create(Schema.Type.LONG),
                        createSchema()));
  job.setOutputFormat(AvroOutputFormat.class);

  JobClient.runJob(job);
 }
}

@Test
/**
 * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat
 * and AvroTextOutputFormat to produce a sorted "bytes" Avro file.
 */
public void testSort() throws Exception {
 JobConf job = new JobConf();
 String inputPath = INPUT_DIR.getRoot().getPath();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesBytesFile(inputPath);
 job.setInputFormat(AvroAsTextInputFormat.class);
 job.setOutputFormat(AvroTextOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 FileInputFormat.setInputPaths(job, new Path(inputPath));
 FileOutputFormat.setOutputPath(job, outputPath);
 JobClient.runJob(job);
 WordCountUtil.validateSortedFile(outputPath.toString() + "/part-00000.avro");
}

 public static void main(String... args) throws Exception {

  JobConf job = new JobConf();
  job.setJarByClass(LongSleepJob.class);
  Path input = new Path(args[0]);
  Path output = new Path(args[1]);

  output.getFileSystem(job).delete(output, true);

  job.setMapperClass(Map.class);

  FileInputFormat.setInputPaths(job, input);
  FileOutputFormat.setOutputPath(job, output);

  job.set("mapred.task.timeout", String.valueOf(TimeUnit.MINUTES.toMillis(10)));

  JobClient.runJob(job);
 }
}

protected JobConf configStage2 () throws Exception
{
  final JobConf conf = new JobConf(getConf(), ConCmptBlock.class);
  conf.set("block_width", "" + block_width);
  conf.setJobName("ConCmptBlock_pass2");
  
  conf.setMapperClass(MapStage2.class);        
  conf.setReducerClass(RedStage2.class);
  FileInputFormat.setInputPaths(conf, tempbm_path);  
  FileOutputFormat.setOutputPath(conf, nextbm_path);  
  conf.setNumReduceTasks( nreducers );
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

 public static void main(String... args) throws Exception {

  JobConf job = new JobConf();
  job.setJarByClass(FilterProjectJob.class);
  Path input = new Path(args[0]);
  Path output = new Path(args[1]);

  output.getFileSystem(job).delete(output, true);

  job.setMapperClass(Map.class);
  job.setMapOutputKeyClass(Text.class);

  FileInputFormat.setInputPaths(job, input);
  FileOutputFormat.setOutputPath(job, output);

  JobClient.runJob(job);
 }
}

protected JobConf configStage1() throws Exception
{
  final JobConf conf = new JobConf(getConf(), ConCmptBlock.class);
  conf.set("block_width", "" + block_width);
  conf.set("recursive_diagmult", "" + recursive_diagmult);
  conf.setJobName("ConCmptBlock_pass1");
  conf.setMapperClass(MapStage1.class);
  conf.setReducerClass(RedStage1.class);
  FileInputFormat.setInputPaths(conf, edge_path, curbm_path);  
  FileOutputFormat.setOutputPath(conf, tempbm_path);  
  conf.setNumReduceTasks( nreducers );
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

  protected JobConf configStage1() throws Exception
  {
    final JobConf conf = new JobConf(getConf(), PagerankPrep.class);
    conf.set("make_symmetric", "" + make_symmetric);
    conf.setJobName("PagerankPrep_Stage1");

    conf.setMapperClass(MapStage1.class);
    conf.setReducerClass(RedStage1.class);

    FileInputFormat.setInputPaths(conf, edge_path);  
    FileOutputFormat.setOutputPath(conf, output_path);  

    conf.setNumReduceTasks( nreducers );

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
  }
}

protected JobConf configStage1 () throws Exception
{
  final JobConf conf = new JobConf(getConf(), RWRNaive.class);
  conf.set("number_nodes", "" + number_nodes);
  conf.set("mixing_c", "" + mixing_c);
  conf.set("make_symmetric", "" + make_symmetric);
  conf.setJobName("RWR_Stage1");
  
  conf.setMapperClass(MapStage1.class);        
  conf.setReducerClass(RedStage1.class);
  fs.delete(tempmv_path, true);
  FileInputFormat.setInputPaths(conf, edge_path, vector_path);  
  FileOutputFormat.setOutputPath(conf, tempmv_path);  
  conf.setNumReduceTasks( nreducers );
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

protected JobConf configStage2 () throws Exception
{
  final JobConf conf = new JobConf(getConf(), PagerankNaive.class);
  conf.set("number_nodes", "" + number_nodes);
  conf.set("mixing_c", "" + mixing_c);
  conf.set("converge_threshold", "" + converge_threshold);
  conf.setJobName("Pagerank_Stage2");
  
  conf.setMapperClass(MapStage2.class);        
  conf.setReducerClass(RedStage2.class);
  FileInputFormat.setInputPaths(conf, tempmv_path);  
  FileOutputFormat.setOutputPath(conf, output_path);  
  conf.setNumReduceTasks( nreducers );
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

protected JobConf configStage1 () throws Exception
{
  final JobConf conf = new JobConf(getConf(), RWRBlock.class);
  conf.set("number_nodes", "" + number_nodes);
  conf.set("mixing_c", "" + mixing_c);
  conf.set("block_width", "" + block_width);
  conf.setJobName("RWRBlock_Stage1");
  
  conf.setMapperClass(MapStage1.class);        
  conf.setReducerClass(RedStage1.class);
  fs.delete(tempmv_path, true);
  FileInputFormat.setInputPaths(conf, edge_path, vector_path);  
  FileOutputFormat.setOutputPath(conf, tempmv_path);  
  conf.setNumReduceTasks( nreducers );
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

  protected JobConf configSaxpy (Path py, Path px, Path saxpy_output, double a) throws Exception
  {
    final JobConf conf = new JobConf(getConf(), Saxpy.class);
    conf.set("y_path", py.getName());
    conf.set("x_path", px.getName());
    conf.set("a", "" + a);
    conf.setJobName("Lanczos_pass_saxpy");
    
    conf.setMapperClass(Saxpy.MapStage1.class);        
    conf.setReducerClass(Saxpy.RedStage1.class);

    FileInputFormat.setInputPaths(conf, py, px);  
    FileOutputFormat.setOutputPath(conf, saxpy_output);  

    conf.setNumReduceTasks( nreducers );

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    return conf;
  }
}

private static void runIOTest( Class<? extends Mapper> mapperClass, 
                Path outputDir
                ) throws IOException {
 JobConf job = new JobConf(fsConfig, TestDFSIO.class);
 FileInputFormat.setInputPaths(job, CONTROL_DIR);
 job.setInputFormat(SequenceFileInputFormat.class);
 job.setMapperClass(mapperClass);
 job.setReducerClass(AccumulatingReducer.class);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(Text.class);
 job.setNumReduceTasks(1);
 JobClient.runJob(job);
}

protected JobConf configStage1() throws Exception
{
  final JobConf conf = new JobConf(getConf(), ConCmpt.class);
  conf.set("number_nodes", "" + number_nodes);
  conf.set("cur_iter", "" + cur_iter);
  conf.set("make_symmetric", "" + make_symmetric);
  conf.setJobName("ConCmpt_Stage1");
  conf.setMapperClass(MapStage1.class);
  conf.setReducerClass(RedStage1.class);
  FileInputFormat.setInputPaths(conf, edge_path, curbm_path);  
  FileOutputFormat.setOutputPath(conf, tempbm_path);  
  conf.setNumReduceTasks( nreducers );
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

Javadoc

Set the Path of the output directory for the map-reduce job.

Popular methods of FileOutputFormat

getOutputPath
Get the Path to the output directory for the map-reduce job.
getTaskOutputPath
Helper function to create the task's temporary output directory and return the path to the task's ou
setCompressOutput
Set whether the output of the job is compressed.
getCompressOutput
Is the job output compressed?
getUniqueName
Helper function to generate a name that is unique for the task.The generated name can be used to cre
setWorkOutputPath
Set the Path of the task's temporary output directory for the map-reduce job. Note: Task output path
getOutputCompressorClass
Get the CompressionCodec for compressing the job outputs.
getWorkOutputPath
Get the Path to the task's temporary output directory for the map-reduce job TASKS' SIDE-EFFECT FILE
setOutputCompressorClass
Set the CompressionCodec to be used to compress job outputs.
getPathForCustomFile
Helper function to generate a Path for a file that is unique for the task within the job output dire
getRecordWriter

getRecordWriter

Popular in Java

Creating JSON documents from java classes using gson
onRequestPermissionsResult (Fragment)
runOnUiThread (Activity)
getApplicationContext (Context)
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
Date (java.sql)
A class which can consume and produce dates in SQL Date format. Dates are represented in SQL as yyyy
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
Properties (java.util)
A Properties object is a Hashtable where the keys and values must be Strings. Each property can have
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
Best plugins for Eclipse

How to use setOutputPathmethodin org.apache.hadoop.mapred.FileOutputFormat

Best Java code snippets using org.apache.hadoop.mapred.FileOutputFormat.setOutputPath (Showing top 20 results out of 738)

Refine search

How to use
setOutputPath
method
in
org.apache.hadoop.mapred.FileOutputFormat