org.apache.hadoop.mapreduce.Job.setInputFormatClass java code examples

Refine search

private void setupMapper() throws IOException {
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapperClass(UHCDictionaryMapper.class);
  job.setMapOutputKeyClass(SelfDefineSortableKey.class);
  job.setMapOutputValueClass(NullWritable.class);
}

public static void main(String[] args) throws IOException,
   InterruptedException, ClassNotFoundException {
 Configuration conf = new Configuration();
 Job job = new Job(conf);
 job.setJobName("Convert Text");
 job.setJarByClass(Mapper.class);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 // increase if you need sorting or a special number of files
 job.setNumReduceTasks(0);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.setOutputFormatClass(SequenceFileOutputFormat.class);
 job.setInputFormatClass(TextInputFormat.class);
 TextInputFormat.addInputPath(job, new Path("/lol"));
 SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz"));
 // submit and wait for completion
 job.waitForCompletion(true);
 }

private void setupMapper(CubeSegment cubeSeg) throws IOException {
  // set the segment's offset info to job conf
  Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart();
  Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd();
  Integer minPartition = Collections.min(offsetStart.keySet());
  Integer maxPartition = Collections.max(offsetStart.keySet());
  job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString());
  job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString());
  for(Integer partition: offsetStart.keySet()) {
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString());
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString());
  }
  job.setMapperClass(KafkaFlatTableMapper.class);
  job.setInputFormatClass(KafkaInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setNumReduceTasks(0);
}

Path inputPathPattern = new Path(_inputSegmentDir);
Path stagingDir = new Path(_stagingDir);
Path outputDir = new Path(_outputDir);
job.setJarByClass(SegmentCreationJob.class);
job.setJobName(_jobName);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.waitForCompletion(true);
if (!job.isSuccessful()) {
 throw new RuntimeException("Job failed : " + job);

Job job = Job.getInstance(conf);
Path inputDir = new Path(generateOutDir);
Path outputDir = new Path(sortOutDir);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TeraInputFormat.class);
job.setOutputFormatClass(TeraOutputFormat.class);
  long start = System.currentTimeMillis();
  Path partFile = new Path(outputDir, PARTITION_FILENAME);

/**
 * Configures the Hadoop MapReduce job.
 *
 * @return Instance of the Hadoop MapRed job.
 * @throws IOException If failed.
 */
@SuppressWarnings("deprecation")
private Job createConfigBasedHadoopJob() throws IOException {
  Job jobCfg = new Job();
  Configuration cfg = jobCfg.getConfiguration();
  // Use explicit configuration of distributed file system, if provided.
  cfg.addResource(U.resolveIgniteUrl(DFS_CFG));
  jobCfg.setJobName("HadoopPopularWordExample");
  jobCfg.setJarByClass(HadoopPopularWords.class);
  jobCfg.setInputFormatClass(TextInputFormat.class);
  jobCfg.setOutputKeyClass(Text.class);
  jobCfg.setOutputValueClass(IntWritable.class);
  jobCfg.setMapperClass(TokenizingMapper.class);
  jobCfg.setReducerClass(TopNWordsReducer.class);
  FileInputFormat.setInputPaths(jobCfg, BOOKS_DFS_DIR);
  FileOutputFormat.setOutputPath(jobCfg, RESULT_DFS_DIR);
  // Local job tracker allows the only task per wave, but text input format
  // replaces it with the calculated value based on input split size option.
  if ("local".equals(cfg.get("mapred.job.tracker", "local"))) {
    // Split job into tasks using 32MB split size.
    FileInputFormat.setMinInputSplitSize(jobCfg, 32L * 1024 * 1024);
    FileInputFormat.setMaxInputSplitSize(jobCfg, Long.MAX_VALUE);
  }
  return jobCfg;
}

private Job getVertexJobWithDefaultMapper(org.apache.hadoop.conf.Configuration c) throws IOException {
  Job job = Job.getInstance(c);
  job.setJarByClass(HadoopScanMapper.class);
  job.setJobName("testPartitionedVertexScan");
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);
  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setNumReduceTasks(0);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(CassandraInputFormat.class);
  return job;
}

protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
 Path outputDir = getTestDir(TEST_NAME, "load-output");
 LOG.info("Load output dir: " + outputDir);
 NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
 conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString());
 Job job = Job.getInstance(conf);
 job.setJobName(TEST_NAME + " Load for " + htd.getTableName());
 job.setJarByClass(this.getClass());
 setMapperClass(job);
 job.setInputFormatClass(NMapInputFormat.class);
 job.setNumReduceTasks(0);
 setJobScannerConf(job);
 FileOutputFormat.setOutputPath(job, outputDir);
 TableMapReduceUtil.addDependencyJars(job);
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
 TableMapReduceUtil.initCredentials(job);
 assertTrue(job.waitForCompletion(true));
 return job;
}

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
 String tableName = args[0];
 String columnFamily = args[1];
 System.out.println("****" + tableName);
 conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan()));
 conf.set(TableInputFormat.INPUT_TABLE, tableName);
 conf.set("index.tablename", tableName);
 conf.set("index.familyname", columnFamily);
 String[] fields = new String[args.length - 2];
 System.arraycopy(args, 2, fields, 0, fields.length);
 conf.setStrings("index.fields", fields);
 Job job = new Job(conf, tableName);
 job.setJarByClass(IndexBuilder.class);
 job.setMapperClass(Map.class);
 job.setNumReduceTasks(0);
 job.setInputFormatClass(TableInputFormat.class);
 job.setOutputFormatClass(MultiTableOutputFormat.class);
 return job;
}

  job.setInputFormatClass(CustomV2InputFormat.class);
  job.setOutputFormatClass(CustomV2OutputFormat.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(igfsScheme() + inFile.toString()));
FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT));
job.setJarByClass(HadoopWordCount2.class);

protected void configureMapper(Job job) {
 job.setInputFormatClass(AvroKeyRecursiveCombineFileInputFormat.class);
 job.setMapperClass(AvroKeyMapper.class);
 job.setMapOutputKeyClass(AvroKey.class);
 job.setMapOutputValueClass(AvroValue.class);
}

public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
  Integer width, Integer wrapMultiplier, Integer numWalkers)
  throws Exception {
 LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
 createSchema();
 job = Job.getInstance(getConf());
 job.setJobName("Link Generator");
 job.setNumReduceTasks(0);
 job.setJarByClass(getClass());
 FileInputFormat.setInputPaths(job, tmpOutput);
 job.setInputFormatClass(OneFilePerMapperSFIF.class);
 job.setOutputKeyClass(NullWritable.class);
 job.setOutputValueClass(NullWritable.class);
 setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);
 setMapperForGenerator(job);
 job.setOutputFormatClass(NullOutputFormat.class);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 TableMapReduceUtil.addDependencyJars(job);
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
                         AbstractHBaseTool.class);
 TableMapReduceUtil.initCredentials(job);
 boolean success = jobCompletion(job);
 return success ? 0 : 1;
}

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
 Path inputPath = new Path(args[0]);
 String tableName = args[1];
 Job job = new Job(conf, NAME + "_" + tableName);
 job.setJarByClass(Uploader.class);
 FileInputFormat.setInputPaths(job, inputPath);
 job.setInputFormatClass(SequenceFileInputFormat.class);
 job.setMapperClass(Uploader.class);
 // No reducers.  Just write straight to table.  Call initTableReducerJob
 // because it sets up the TableOutputFormat.
 TableMapReduceUtil.initTableReducerJob(tableName, null, job);
 job.setNumReduceTasks(0);
 return job;
}

private void setupMapper(Path input) throws IOException {
  FileInputFormat.setInputPaths(job, input);
  job.setMapperClass(CalculateStatsFromBaseCuboidMapper.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
}

Job job = new Job(conf);
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setJarByClass(Mapper.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path("files/toMap/"));
Path out = new Path("files/out/processed/");
fs.delete(out, true);
job.waitForCompletion(true);

private void setupRandomGeneratorMapper(Job job, boolean putSortReducer) {
 if (putSortReducer) {
  job.setInputFormatClass(NMapInputFormat.class);
  job.setMapperClass(RandomPutGeneratingMapper.class);
  job.setMapOutputKeyClass(ImmutableBytesWritable.class);
  job.setMapOutputValueClass(Put.class);
 } else {
  job.setInputFormatClass(NMapInputFormat.class);
  job.setMapperClass(RandomKVGeneratingMapper.class);
  job.setMapOutputKeyClass(ImmutableBytesWritable.class);
  job.setMapOutputValueClass(KeyValue.class);
 }
}

public static void main(String[] args) throws Exception {
 CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args);
 Configuration configuration = new Configuration();
 if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) {
  configuration.setBoolean(USE_THROTTLING_SERVER, true);
  String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest");
  configuration.set(RESOURCE_ID, resourceLimited);
  configuration.set(
    BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(),
      new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME),
      null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt()));
 }
 if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) {
  configuration .set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt()));
 }
 Job job = Job.getInstance(configuration, "ThrottlingStressTest");
 job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS));
 StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli);
 job.setJarByClass(MRStressTest.class);
 job.setMapperClass(StresserMapper.class);
 job.setReducerClass(AggregatorReducer.class);
 job.setInputFormatClass(MyInputFormat.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(DoubleWritable.class);
 FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis()));
 System.exit(job.waitForCompletion(true) ? 0 : 1);
}

job.setJarByClass(mapperClass);
job.setNumReduceTasks(0);
job.setMapperClass(mapperClass);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(inputFormat);
boolean success = job.waitForCompletion(true);

job.setJarByClass(MapReduceIntegrationChecker.class);
job.setMapperClass(CheckerMapper.class);
job.setCombinerClass(CheckerReducer.class);
job.setReducerClass(CheckerReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(EmptyInputFormat.class);
FileOutputFormat.setOutputPath(job, mOutputFilePath);
 if (!job.waitForCompletion(true)) {
  return 1;

conf.setBoolean(CONF_COMPACT_MAJOR, major);
Job job = new Job(conf);
job.setJobName("CompactionTool");
job.setJarByClass(CompactionTool.class);
job.setMapperClass(CompactionMapper.class);
job.setInputFormatClass(CompactionInputFormat.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setMapSpeculativeExecution(false);
job.setNumReduceTasks(0);
try {
 Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
 CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
 CompactionInputFormat.addInputPath(job, inputPath);
 return job.waitForCompletion(true) ? 0 : 1;
} finally {
 fs.delete(stagingDir, true);

Javadoc

Set the InputFormat for the job.

Popular methods of Job

getConfiguration
setMapperClass
Set the Mapper for the job.
waitForCompletion
Submit the job to the cluster and wait for it to finish.
setJarByClass
Set the Jar by finding where a given class came from.
setOutputFormatClass
Set the OutputFormat for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setNumReduceTasks
Set the number of reduce tasks for the job.
setReducerClass
Set the Reducer for the job.
setMapOutputKeyClass
Set the key class for the map output data. This allows the user to specify the map output key class
setMapOutputValueClass
Set the value class for the map output data. This allows the user to specify the map output value cl
<init>

Popular in Java

Running tasks concurrently on multiple threads
onRequestPermissionsResult (Fragment)
setContentView (Activity)
findViewById (Activity)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
NumberFormat (java.text)
The abstract base class for all number formats. This class provides the interface for formatting and
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
JList (javax.swing)
CodeWhisperer alternatives

How to use setInputFormatClassmethodin org.apache.hadoop.mapreduce.Job

Best Java code snippets using org.apache.hadoop.mapreduce.Job.setInputFormatClass (Showing top 20 results out of 2,142)

Refine search

How to use
setInputFormatClass
method
in
org.apache.hadoop.mapreduce.Job