org.apache.hadoop.mapred.JobConf.setOutputFormat java code examples

Refine search

private static void configureAvroOutput(JobConf job) {
 if (job.get("mapred.output.format.class") == null)
  job.setOutputFormat(AvroOutputFormat.class);
 if (job.getReducerClass() == IdentityReducer.class)
  job.setReducerClass(HadoopReducer.class);
 job.setOutputKeyClass(AvroWrapper.class);
 configureAvroShuffle(job);
}

public int run(String[] args) throws Exception {
  if(args.length != 3)
    Utils.croak("USAGE: GenerateData input-file output-dir value-size");
  JobConf conf = new JobConf(getConf(), GenerateData.class);
  conf.setJobName("generate-data");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  conf.setMapperClass(GenerateDataMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setNumReduceTasks(0);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setOutputKeyClass(BytesWritable.class);
  conf.setOutputValueClass(BytesWritable.class);
  Path inputPath = new Path(args[0]);
  FileInputFormat.setInputPaths(conf, inputPath);
  Path outputPath = new Path(args[1]);
  // delete output path if it already exists
  FileSystem fs = outputPath.getFileSystem(conf);
  if(fs.exists(outputPath))
    fs.delete(outputPath, true);
  FileOutputFormat.setOutputPath(conf, outputPath);
  conf.setInt("value.size", Integer.parseInt(args[2]));
  JobClient.runJob(conf);
  return 0;
}

Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
boolean addDependencyJars) throws IOException {
job.setOutputFormat(TableOutputFormat.class);
job.setReducerClass(reducer);
job.set(TableOutputFormat.OUTPUT_TABLE, table);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
job.setStrings("io.serializations", job.get("io.serializations"),
  MutationSerialization.class.getName(), ResultSerialization.class.getName());

public void testInputFormat() throws Exception {
 JobConf job = new JobConf();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest");
 Schema subSchema = Schema.parse("{\"type\":\"record\"," +
                 "\"name\":\"PairValue\","+
                 "\"fields\": [ " +
                 "{\"name\":\"value\", \"type\":\"long\"}" +
                 "]}");
 AvroJob.setInputSchema(job, subSchema);
 AvroJob.setMapperClass(job, Counter.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*"));
 job.setInputFormat(AvroTrevniInputFormat.class);
 job.setNumReduceTasks(0);                     // map-only
 job.setOutputFormat(NullOutputFormat.class);  // ignore output
 total = 0;
 JobClient.runJob(job);
 assertEquals(WordCountUtil.TOTAL, total);
}

@Test
/**
 * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat
 * and AvroTextOutputFormat to produce a sorted "bytes" Avro file.
 */
public void testSort() throws Exception {
 JobConf job = new JobConf();
 String inputPath = INPUT_DIR.getRoot().getPath();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesBytesFile(inputPath);
 job.setInputFormat(AvroAsTextInputFormat.class);
 job.setOutputFormat(AvroTextOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 FileInputFormat.setInputPaths(job, new Path(inputPath));
 FileOutputFormat.setOutputPath(job, outputPath);
 JobClient.runJob(job);
 WordCountUtil.validateSortedFile(outputPath.toString() + "/part-00000.avro");
}

void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
 Configuration conf = UTIL.getConfiguration();
 final JobConf job = new JobConf(conf);
 job.setInputFormat(clazz);
 job.setOutputFormat(NullOutputFormat.class);
 job.setMapperClass(ExampleVerifier.class);
 job.setNumReduceTasks(0);
 LOG.debug("submitting job.");
 final RunningJob run = JobClient.runJob(job);
 assertTrue("job failed!", run.isSuccessful());
 assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
 assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
 assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
 assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
 assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
 assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}

 public void configure(JobConf job) {
  if (null != inf) job.setInputFormat(inf);
  if (null != of) job.setOutputFormat(of);
 }
}

@Override
public void sinkConfInit(FlowProcess<JobConf> process,
  Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) {
 conf.setOutputFormat(TableOutputFormat.class);
 conf.setOutputKeyClass(ImmutableBytesWritable.class);
 conf.setOutputValueClass(Put.class);
}

  /**
   * Sets task classes with related info if needed into configuration object.
   *
   * @param jobConf Configuration to change.
   * @param setMapper Option to set mapper and input format classes.
   * @param setCombiner Option to set combiner class.
   * @param setReducer Option to set reducer and output format classes.
   */
  public static void setTasksClasses(JobConf jobConf, boolean setMapper, boolean setCombiner, boolean setReducer) {
    if (setMapper) {
      jobConf.setMapperClass(HadoopWordCount1Map.class);
      jobConf.setInputFormat(TextInputFormat.class);
    }

    if (setCombiner)
      jobConf.setCombinerClass(HadoopWordCount1Reduce.class);

    if (setReducer) {
      jobConf.setReducerClass(HadoopWordCount1Reduce.class);
      jobConf.setOutputFormat(TextOutputFormat.class);
    }
  }
}

List<OutputInfo> outputs = Arrays.asList(amClient.getOutputLocation());
JobConf jobConf = new JobConf(conf);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setBoolean("mapred.output.compress", true);
jobConf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
jobConf.setOutputFormat(TextMultiOutputFormat.class);
Path remotePath = new Path(outputs.get(0).getDfsLocation() + "/_temporary/" + containerId.toString());
FileSystem dfs = remotePath.getFileSystem(jobConf);
jobConf.set(XLearningConstants.STREAM_OUTPUT_DIR, remotePath.makeQualified(dfs).toString());

  jobConf.setInputFormat(CustomV1InputFormat.class);
  jobConf.setOutputFormat(CustomV1OutputFormat.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(igfsScheme() + inFile.toString()));
FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT));

private static void configureAvroOutput(JobConf job) {
 if (job.get("mapred.output.format.class") == null)
  job.setOutputFormat(AvroOutputFormat.class);
 if (job.getReducerClass() == IdentityReducer.class)
  job.setReducerClass(HadoopReducer.class);
 job.setOutputKeyClass(AvroWrapper.class);
 configureAvroShuffle(job);
}

 public void configure(JobConf job) {
  if (null != inf) job.setInputFormat(inf);
  if (null != of) job.setOutputFormat(of);
 }
}

private static void setupTetherJob(JobConf job) throws IOException {
 job.setMapRunnerClass(TetherMapRunner.class);
 job.setPartitionerClass(TetherPartitioner.class);
 job.setReducerClass(TetherReducer.class);
 job.setInputFormat(TetherInputFormat.class);
 job.setOutputFormat(TetherOutputFormat.class);
 job.setOutputKeyClass(TetherData.class);
 job.setOutputKeyComparatorClass(TetherKeyComparator.class);
 job.setMapOutputValueClass(NullWritable.class);
 // set the map output key class to TetherData
 job.setMapOutputKeyClass(TetherData.class);
 // if protocol isn't set
 if (job.getStrings(TETHER_PROTOCOL)==null) {
  job.set(TETHER_PROTOCOL, "sasl");
 }
 // add TetherKeySerialization to io.serializations
 Collection<String> serializations =
  job.getStringCollection("io.serializations");
 if (!serializations.contains(TetherKeySerialization.class.getName())) {
  serializations.add(TetherKeySerialization.class.getName());
  job.setStrings("io.serializations",
          serializations.toArray(new String[0]));
 }
 // determine whether the executable should be added to the cache.
 if (job.getBoolean(TETHER_EXEC_CACHED,false)){
  DistributedCache.addCacheFile(getExecutable(job), job);
 }
}

Path rankings = new Path(options.getResultPath(), RANKINGS);
Path fout = new Path(options.getResultPath(), USERVISITS);
Path uagentPath = new Path(options.getWorkPath(), uagentf);
DistributedCache.addCacheFile(uagentPath.toUri(), job);
DistributedCache.addCacheFile(searchkeyPath.toUri(), job);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setReducerClass(CreateUserVisitsReducer.class);
  job.setOutputFormat(SequenceFileOutputFormat.class);
} else {
  job.setOutputFormat(TextOutputFormat.class);

private JobConf createJobConfig() throws IOException {
  Path inputPath = new Path(INPUT_PATH);
  Path outputPath = new Path(OUTPUT_PATH);
  FileSystem.get(new Configuration()).delete(outputPath, true);
  JobConf jobConfig = new JobConf();
  jobConfig.setInputFormat(AvroInputFormat.class);
  jobConfig.setOutputFormat(AvroOutputFormat.class);
  AvroOutputFormat.setOutputPath(jobConfig, outputPath);
  AvroInputFormat.addInputPath(jobConfig, inputPath);
  jobConfig.set(AvroJob.OUTPUT_SCHEMA, User.SCHEMA.toString());
  jobConfig.set(AvroJob.INPUT_SCHEMA, User.SCHEMA.toString());
  return jobConfig;
}

if(!isAvro) {
  conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
  conf.setMapperClass(mapperClass);
  conf.setMapOutputKeyClass(BytesWritable.class);
  conf.setMapOutputValueClass(BytesWritable.class);
  conf.setReducerClass(HadoopStoreBuilderReducer.class);
conf.setInputFormat(inputFormatClass);
conf.setOutputFormat(SequenceFileOutputFormat.class);
conf.setOutputKeyClass(BytesWritable.class);
conf.setOutputValueClass(BytesWritable.class);
conf.setJarByClass(getClass());
conf.setReduceSpeculativeExecution(false);
  conf.setMapOutputValueClass(ByteBuffer.class);
  conf.setInputFormat(inputFormatClass);
  conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class);
  conf.setOutputKeyClass(ByteBuffer.class);
  conf.setOutputValueClass(ByteBuffer.class);
  conf.setReducerClass(AvroStoreBuilderReducer.class);
  Path directoryPath = new Path(outputDir.toString(), directoryName);

JobConf job = new JobConf(conf);
job.setJobName(jobName);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setJarByClass(CompactorMR.class);
LOG.debug("User jar set to " + job.getJar());
job.setMapperClass(CompactorMap.class);
job.setNumReduceTasks(0);
job.setInputFormat(CompactorInputFormat.class);
job.setOutputFormat(NullOutputFormat.class);
job.setOutputCommitter(CompactorOutputCommitter.class);

JobConf job = new JobConf(conf);
job.setJobName(jobName);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setJarByClass(CompactorMR.class);
LOG.debug("User jar set to " + job.getJar());
job.setMapperClass(CompactorMap.class);
job.setNumReduceTasks(0);
job.setInputFormat(CompactorInputFormat.class);
job.setOutputFormat(NullOutputFormat.class);
job.setOutputCommitter(CompactorOutputCommitter.class);

 @Test
  public void testJob() throws Exception {
  JobConf job = new JobConf();
  Path outputPath = new Path(DIR.getRoot().getPath() + "/out");
  outputPath.getFileSystem(job).delete(outputPath);

  job.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(job, DIR.getRoot().getPath() + "/in");

  job.setMapperClass(AvroTestConverter.class);
  job.setNumReduceTasks(0);

  FileOutputFormat.setOutputPath(job, outputPath);
  System.out.println(createSchema());
  AvroJob.setOutputSchema(job,
              Pair.getPairSchema(Schema.create(Schema.Type.LONG),
                        createSchema()));
  job.setOutputFormat(AvroOutputFormat.class);

  JobClient.runJob(job);
 }
}

Javadoc

Set the OutputFormat implementation for the map-reduce job.

Popular methods of JobConf

<init>
A new map/reduce configuration where the behavior of reading from the default resources can be turne
set
get
setInputFormat
Set the InputFormat implementation for the map-reduce job.
getInt
setMapperClass
Set the Mapper class for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setReducerClass
Set the Reducer class for the job.
setNumReduceTasks
Set the requisite number of reduce tasks for this job.HOW MANY REDUCES? The right number of reduces
setBoolean
setJobName
Set the user-specified job name.

Popular in Java

Parsing JSON documents to java classes using gson
getSharedPreferences (Context)
getSystemService (Context)
getExternalFilesDir (Context)
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
ImageIO (javax.imageio)
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
From CI to AI: The AI layer in your organization

How to use setOutputFormatmethodin org.apache.hadoop.mapred.JobConf

Best Java code snippets using org.apache.hadoop.mapred.JobConf.setOutputFormat (Showing top 20 results out of 783)

Refine search

How to use
setOutputFormat
method
in
org.apache.hadoop.mapred.JobConf