org.apache.hadoop.mapred.JobConf.setInputFormat java code examples

Refine search

private static void configureAvroInput(JobConf job) {
 if (job.get("mapred.input.format.class") == null)
  job.setInputFormat(AvroInputFormat.class);
 if (job.getMapperClass() == IdentityMapper.class)
  job.setMapperClass(HadoopMapper.class);
 configureAvroShuffle(job);
}

public int run(String[] args) throws Exception {
  if(args.length != 3)
    Utils.croak("USAGE: GenerateData input-file output-dir value-size");
  JobConf conf = new JobConf(getConf(), GenerateData.class);
  conf.setJobName("generate-data");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  conf.setMapperClass(GenerateDataMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setNumReduceTasks(0);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setOutputKeyClass(BytesWritable.class);
  conf.setOutputValueClass(BytesWritable.class);
  Path inputPath = new Path(args[0]);
  FileInputFormat.setInputPaths(conf, inputPath);
  Path outputPath = new Path(args[1]);
  // delete output path if it already exists
  FileSystem fs = outputPath.getFileSystem(conf);
  if(fs.exists(outputPath))
    fs.delete(outputPath, true);
  FileOutputFormat.setOutputPath(conf, outputPath);
  conf.setInt("value.size", Integer.parseInt(args[2]));
  JobClient.runJob(conf);
  return 0;
}

  /**
   * Sets task classes with related info if needed into configuration object.
   *
   * @param jobConf Configuration to change.
   * @param setMapper Option to set mapper and input format classes.
   * @param setCombiner Option to set combiner class.
   * @param setReducer Option to set reducer and output format classes.
   */
  public static void setTasksClasses(JobConf jobConf, boolean setMapper, boolean setCombiner, boolean setReducer) {
    if (setMapper) {
      jobConf.setMapperClass(HadoopWordCount1Map.class);
      jobConf.setInputFormat(TextInputFormat.class);
    }

    if (setCombiner)
      jobConf.setCombinerClass(HadoopWordCount1Reduce.class);

    if (setReducer) {
      jobConf.setReducerClass(HadoopWordCount1Reduce.class);
      jobConf.setOutputFormat(TextOutputFormat.class);
    }
  }
}

@Test
public void testNonAvroMapper() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroMapper.class);
 // reducer is default, identity
 // configure output for avro
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

@Test
/**
 * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat
 * and AvroTextOutputFormat to produce a sorted "bytes" Avro file.
 */
public void testSort() throws Exception {
 JobConf job = new JobConf();
 String inputPath = INPUT_DIR.getRoot().getPath();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesBytesFile(inputPath);
 job.setInputFormat(AvroAsTextInputFormat.class);
 job.setOutputFormat(AvroTextOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 FileInputFormat.setInputPaths(job, new Path(inputPath));
 FileOutputFormat.setOutputPath(job, outputPath);
 JobClient.runJob(job);
 WordCountUtil.validateSortedFile(outputPath.toString() + "/part-00000.avro");
}

private static void setupTetherJob(JobConf job) throws IOException {
 job.setMapRunnerClass(TetherMapRunner.class);
 job.setPartitionerClass(TetherPartitioner.class);
 job.setReducerClass(TetherReducer.class);
 job.setInputFormat(TetherInputFormat.class);
 job.setOutputFormat(TetherOutputFormat.class);
 job.setOutputKeyClass(TetherData.class);
 job.setOutputKeyComparatorClass(TetherKeyComparator.class);
 job.setMapOutputValueClass(NullWritable.class);
 // set the map output key class to TetherData
 job.setMapOutputKeyClass(TetherData.class);
 // if protocol isn't set
 if (job.getStrings(TETHER_PROTOCOL)==null) {
  job.set(TETHER_PROTOCOL, "sasl");
 }
 // add TetherKeySerialization to io.serializations
 Collection<String> serializations =
  job.getStringCollection("io.serializations");
 if (!serializations.contains(TetherKeySerialization.class.getName())) {
  serializations.add(TetherKeySerialization.class.getName());
  job.setStrings("io.serializations",
          serializations.toArray(new String[0]));
 }
 // determine whether the executable should be added to the cache.
 if (job.getBoolean(TETHER_EXEC_CACHED,false)){
  DistributedCache.addCacheFile(getExecutable(job), job);
 }
}

void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
 Configuration conf = UTIL.getConfiguration();
 final JobConf job = new JobConf(conf);
 job.setInputFormat(clazz);
 job.setOutputFormat(NullOutputFormat.class);
 job.setMapperClass(ExampleVerifier.class);
 job.setNumReduceTasks(0);
 LOG.debug("submitting job.");
 final RunningJob run = JobClient.runJob(job);
 assertTrue("job failed!", run.isSuccessful());
 assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
 assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
 assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
 assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
 assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
 assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}

conf.setInputFormat(inputFormatClass);
conf.setOutputFormat(SequenceFileOutputFormat.class);
conf.setOutputKeyClass(BytesWritable.class);
conf.setOutputValueClass(BytesWritable.class);
conf.setJarByClass(getClass());
conf.setReduceSpeculativeExecution(false);
conf.set(VoldemortBuildAndPushJob.CHECKSUM_TYPE, CheckSum.toString(checkSumType));
conf.set("dfs.umaskmode", "002");
FileOutputFormat.setOutputPath(conf, tempDir);
  conf.setMapOutputValueClass(ByteBuffer.class);
  conf.setInputFormat(inputFormatClass);
  conf.setOutputFormat((Class<? extends OutputFormat>) AvroOutputFormat.class);
  conf.setOutputKeyClass(ByteBuffer.class);
  conf.setOutputValueClass(ByteBuffer.class);
  conf.setReducerClass(AvroStoreBuilderReducer.class);
  Path directoryPath = new Path(outputDir.toString(), directoryName);

@Test
public void testNonAvroMapOnly() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroOnlyMapper.class);
 // configure output for avro
 job.setNumReduceTasks(0);                     // map-only
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

Class<? extends InputFormat> inputFormat) {
job.setInputFormat(inputFormat);
job.setMapOutputValueClass(outputValueClass);
job.setMapOutputKeyClass(outputKeyClass);
job.setMapperClass(mapper);
job.setStrings("io.serializations", job.get("io.serializations"),
  MutationSerialization.class.getName(), ResultSerialization.class.getName());

JobConf job = new JobConf(conf);
job.setJobName(jobName);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setJarByClass(CompactorMR.class);
LOG.debug("User jar set to " + job.getJar());
job.setMapperClass(CompactorMap.class);
job.setNumReduceTasks(0);
job.setInputFormat(CompactorInputFormat.class);
job.setOutputFormat(NullOutputFormat.class);
job.setOutputCommitter(CompactorOutputCommitter.class);

 @Test
  public void testJob() throws Exception {
  JobConf job = new JobConf();
  Path outputPath = new Path(DIR.getRoot().getPath() + "/out");
  outputPath.getFileSystem(job).delete(outputPath);

  job.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(job, DIR.getRoot().getPath() + "/in");

  job.setMapperClass(AvroTestConverter.class);
  job.setNumReduceTasks(0);

  FileOutputFormat.setOutputPath(job, outputPath);
  System.out.println(createSchema());
  AvroJob.setOutputSchema(job,
              Pair.getPairSchema(Schema.create(Schema.Type.LONG),
                        createSchema()));
  job.setOutputFormat(AvroOutputFormat.class);

  JobClient.runJob(job);
 }
}

MultiTableSnapshotInputFormat.setInput(job, snapshotScans, tmpRestoreDir);
job.setInputFormat(MultiTableSnapshotInputFormat.class);
if (outputValueClass != null) {
 job.setMapOutputValueClass(outputValueClass);
 job.setMapOutputKeyClass(outputKeyClass);
job.setMapperClass(mapper);
if (addDependencyJars) {
 addDependencyJars(job);

JobConf job = new JobConf(conf);
job.setJobName(jobName);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setJarByClass(CompactorMR.class);
LOG.debug("User jar set to " + job.getJar());
job.setMapperClass(CompactorMap.class);
job.setNumReduceTasks(0);
job.setInputFormat(CompactorInputFormat.class);
job.setOutputFormat(NullOutputFormat.class);
job.setOutputCommitter(CompactorOutputCommitter.class);

success = true;
HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);

job.setInputFormat(work.getInputformatClass());
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(MergeFileMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);

success = true;
HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);

job.setInputFormat(work.getInputformatClass());
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(MergeFileMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);

job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(ExecMapper.class);
job.setReducerClass(ExecReducer.class);
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(ExecMapper.class);
job.setReducerClass(ExecReducer.class);
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

Javadoc

Set the InputFormat implementation for the map-reduce job.

Popular methods of JobConf

<init>
A new map/reduce configuration where the behavior of reading from the default resources can be turne
set
get
setOutputFormat
Set the OutputFormat implementation for the map-reduce job.
getInt
setMapperClass
Set the Mapper class for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setReducerClass
Set the Reducer class for the job.
setNumReduceTasks
Set the requisite number of reduce tasks for this job.HOW MANY REDUCES? The right number of reduces
setBoolean
setJobName
Set the user-specified job name.

Popular in Java

Start an intent from android
getSupportFragmentManager (FragmentActivity)
setRequestProperty (URLConnection)
compareTo (BigDecimal)
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
Reference (javax.naming)
JTextField (javax.swing)
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
CodeWhisperer alternatives

How to use setInputFormatmethodin org.apache.hadoop.mapred.JobConf

Best Java code snippets using org.apache.hadoop.mapred.JobConf.setInputFormat (Showing top 20 results out of 945)

Refine search

How to use
setInputFormat
method
in
org.apache.hadoop.mapred.JobConf