org.apache.hadoop.mapreduce.Job.setMapperClass java code examples

Refine search

public static void main(String[] args) throws IOException,
   InterruptedException, ClassNotFoundException {
 Configuration conf = new Configuration();
 Job job = new Job(conf);
 job.setJobName("Convert Text");
 job.setJarByClass(Mapper.class);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 // increase if you need sorting or a special number of files
 job.setNumReduceTasks(0);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.setOutputFormatClass(SequenceFileOutputFormat.class);
 job.setInputFormatClass(TextInputFormat.class);
 TextInputFormat.addInputPath(job, new Path("/lol"));
 SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz"));
 // submit and wait for completion
 job.waitForCompletion(true);
 }

Job job = new Job();
job.setMapperClass(MyMapper.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
return 0;

  Job.getInstance(conf,
   conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime()));
job.setJarByClass(MapReduceHFileSplitterJob.class);
job.setInputFormatClass(HFileInputFormat.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
 LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
 TableName tableName = TableName.valueOf(tabName);
 job.setMapperClass(HFileCellMapper.class);
 job.setReducerClass(CellSortReducer.class);
 Path outputDir = new Path(hfileOutPath);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setMapOutputValueClass(MapReduceExtendedCell.class);

/**
 * Configures the Hadoop MapReduce job.
 *
 * @return Instance of the Hadoop MapRed job.
 * @throws IOException If failed.
 */
@SuppressWarnings("deprecation")
private Job createConfigBasedHadoopJob() throws IOException {
  Job jobCfg = new Job();
  Configuration cfg = jobCfg.getConfiguration();
  // Use explicit configuration of distributed file system, if provided.
  cfg.addResource(U.resolveIgniteUrl(DFS_CFG));
  jobCfg.setJobName("HadoopPopularWordExample");
  jobCfg.setJarByClass(HadoopPopularWords.class);
  jobCfg.setInputFormatClass(TextInputFormat.class);
  jobCfg.setOutputKeyClass(Text.class);
  jobCfg.setOutputValueClass(IntWritable.class);
  jobCfg.setMapperClass(TokenizingMapper.class);
  jobCfg.setReducerClass(TopNWordsReducer.class);
  FileInputFormat.setInputPaths(jobCfg, BOOKS_DFS_DIR);
  FileOutputFormat.setOutputPath(jobCfg, RESULT_DFS_DIR);
  // Local job tracker allows the only task per wave, but text input format
  // replaces it with the calculated value based on input split size option.
  if ("local".equals(cfg.get("mapred.job.tracker", "local"))) {
    // Split job into tasks using 32MB split size.
    FileInputFormat.setMinInputSplitSize(jobCfg, 32L * 1024 * 1024);
    FileInputFormat.setMaxInputSplitSize(jobCfg, Long.MAX_VALUE);
  }
  return jobCfg;
}

job.setMapperClass(MergeCuboidMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(CuboidReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

  boolean outputCompression) {
if (setMapper) {
  job.setMapperClass(HadoopWordCount2Mapper.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setReducerClass(HadoopWordCount2Reducer.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);

void testInputFormat(Class<? extends InputFormat> clazz)
  throws IOException, InterruptedException, ClassNotFoundException {
 final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
 job.setInputFormatClass(clazz);
 job.setOutputFormatClass(NullOutputFormat.class);
 job.setMapperClass(ExampleVerifier.class);
 job.setNumReduceTasks(0);
 LOG.debug("submitting job.");
 assertTrue("job failed!", job.waitForCompletion(true));
 assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
 assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
 assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
 assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
 assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
 assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
}

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
 String tableName = args[0];
 String columnFamily = args[1];
 System.out.println("****" + tableName);
 conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan()));
 conf.set(TableInputFormat.INPUT_TABLE, tableName);
 conf.set("index.tablename", tableName);
 conf.set("index.familyname", columnFamily);
 String[] fields = new String[args.length - 2];
 System.arraycopy(args, 2, fields, 0, fields.length);
 conf.setStrings("index.fields", fields);
 Job job = new Job(conf, tableName);
 job.setJarByClass(IndexBuilder.class);
 job.setMapperClass(Map.class);
 job.setNumReduceTasks(0);
 job.setInputFormatClass(TableInputFormat.class);
 job.setOutputFormatClass(MultiTableOutputFormat.class);
 return job;
}

Job job =
  Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis()));
job.setJarByClass(WALPlayer.class);
job.setInputFormatClass(WALInputFormat.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
 job.setMapperClass(WALKeyValueMapper.class);
 job.setReducerClass(CellSortReducer.class);
 Path outputDir = new Path(hfileOutPath);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setMapOutputValueClass(MapReduceExtendedCell.class);
} else {
 job.setMapperClass(WALMapper.class);
 job.setOutputFormatClass(MultiTableOutputFormat.class);
 TableMapReduceUtil.addDependencyJars(job);
 TableMapReduceUtil.initCredentials(job);

job.setMapperClass(this.mapperClass);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setReducerClass(CuboidReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

private void setupMapper(CubeSegment cubeSeg) throws IOException {
  // set the segment's offset info to job conf
  Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart();
  Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd();
  Integer minPartition = Collections.min(offsetStart.keySet());
  Integer maxPartition = Collections.max(offsetStart.keySet());
  job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString());
  job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString());
  for(Integer partition: offsetStart.keySet()) {
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString());
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString());
  }
  job.setMapperClass(KafkaFlatTableMapper.class);
  job.setInputFormatClass(KafkaInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setNumReduceTasks(0);
}

  RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
 HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
 job.setMapperClass(CellSortImporter.class);
 job.setReducerClass(CellReducer.class);
 Path outputDir = new Path(hfileOutPath);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setMapOutputKeyClass(CellWritableComparable.class);
   RawComparator.class);
 Path partitionsPath =
   new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
 FileSystem fs = FileSystem.get(job.getConfiguration());
 fs.deleteOnExit(partitionsPath);
job.setMapperClass(CellImporter.class);
try (Connection conn = ConnectionFactory.createConnection(conf);
  Table table = conn.getTable(tableName);
  RegionLocator regionLocator = conn.getRegionLocator(tableName)){
 job.setReducerClass(CellSortReducer.class);
 Path outputDir = new Path(hfileOutPath);
 FileOutputFormat.setOutputPath(job, outputDir);
job.setMapperClass(Importer.class);
TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
job.setNumReduceTasks(0);

job.setMapperClass(InMemCuboidMapper.class);
job.setMapOutputKeyClass(ByteArrayWritable.class);
job.setMapOutputValueClass(ByteArrayWritable.class);
job.setReducerClass(InMemCuboidReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

Job job = new Job(conf);
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setJarByClass(Mapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
SequenceFileInputFormat.addInputPath(job, new Path("files/toMap/"));
Path out = new Path("files/out/processed/");
fs.delete(out, true);
job.waitForCompletion(true);

Path inputDir = new Path(args[1]);
String jobName = conf.get(JOB_NAME_CONF_KEY,NAME + "_" + tableName.getNameAsString());
job = Job.getInstance(conf, jobName);
job.setJarByClass(mapperClass);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(mapperClass);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
   job.setReducerClass(TextSortReducer.class);
  } else {
   job.setMapOutputValueClass(Put.class);
   job.setCombinerClass(PutCombiner.class);
   job.setReducerClass(PutSortReducer.class);
   Path outputDir = new Path(hfileOutPath);
   FileOutputFormat.setOutputPath(job, outputDir);
   HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(),
 job.setOutputFormatClass(NullOutputFormat.class);
 job.getConfiguration().setStrings("io.serializations",
   job.getConfiguration().get("io.serializations"),

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
 Path inputPath = new Path(args[0]);
 String tableName = args[1];
 Job job = new Job(conf, NAME + "_" + tableName);
 job.setJarByClass(Uploader.class);
 FileInputFormat.setInputPaths(job, inputPath);
 job.setInputFormatClass(SequenceFileInputFormat.class);
 job.setMapperClass(Uploader.class);
 // No reducers.  Just write straight to table.  Call initTableReducerJob
 // because it sets up the TableOutputFormat.
 TableMapReduceUtil.initTableReducerJob(tableName, null, job);
 job.setNumReduceTasks(0);
 return job;
}

public static void main(String[] args) throws Exception {
 CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args);
 Configuration configuration = new Configuration();
 if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) {
  configuration.setBoolean(USE_THROTTLING_SERVER, true);
  String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest");
  configuration.set(RESOURCE_ID, resourceLimited);
  configuration.set(
    BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(),
      new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME),
      null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt()));
 }
 if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) {
  configuration .set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt()));
 }
 Job job = Job.getInstance(configuration, "ThrottlingStressTest");
 job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS));
 StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli);
 job.setJarByClass(MRStressTest.class);
 job.setMapperClass(StresserMapper.class);
 job.setReducerClass(AggregatorReducer.class);
 job.setInputFormatClass(MyInputFormat.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(DoubleWritable.class);
 FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis()));
 System.exit(job.waitForCompletion(true) ? 0 : 1);
}

job.setJarByClass(mapperClass);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
job.setMapperClass(mapperClass);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(inputFormat);
boolean success = job.waitForCompletion(true);

job.setJarByClass(MapReduceIntegrationChecker.class);
job.setMapperClass(CheckerMapper.class);
job.setCombinerClass(CheckerReducer.class);
job.setReducerClass(CheckerReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(EmptyInputFormat.class);
FileOutputFormat.setOutputPath(job, mOutputFilePath);
 if (!job.waitForCompletion(true)) {
  return 1;

conf.setBoolean(CONF_COMPACT_MAJOR, major);
Job job = new Job(conf);
job.setJobName("CompactionTool");
job.setJarByClass(CompactionTool.class);
job.setMapperClass(CompactionMapper.class);
job.setInputFormatClass(CompactionInputFormat.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setMapSpeculativeExecution(false);
job.setNumReduceTasks(0);
try {
 Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime());
 CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs);
 CompactionInputFormat.addInputPath(job, inputPath);
 return job.waitForCompletion(true) ? 0 : 1;
} finally {
 fs.delete(stagingDir, true);

Javadoc

Set the Mapper for the job.

Popular methods of Job

getConfiguration
waitForCompletion
Submit the job to the cluster and wait for it to finish.
setInputFormatClass
Set the InputFormat for the job.
setJarByClass
Set the Jar by finding where a given class came from.
setOutputFormatClass
Set the OutputFormat for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setNumReduceTasks
Set the number of reduce tasks for the job.
setReducerClass
Set the Reducer for the job.
setMapOutputKeyClass
Set the key class for the map output data. This allows the user to specify the map output key class
setMapOutputValueClass
Set the value class for the map output data. This allows the user to specify the map output value cl
<init>

Popular in Java

Updating database using SQL prepared statement
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
getSharedPreferences (Context)
addToBackStack (FragmentTransaction)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Best plugins for Eclipse

How to use setMapperClassmethodin org.apache.hadoop.mapreduce.Job

Best Java code snippets using org.apache.hadoop.mapreduce.Job.setMapperClass (Showing top 20 results out of 2,259)

Refine search

How to use
setMapperClass
method
in
org.apache.hadoop.mapreduce.Job