org.apache.hadoop.mapreduce.Job.setNumReduceTasks java code examples

Refine search

public static void main(String[] args) throws IOException,
   InterruptedException, ClassNotFoundException {
 Configuration conf = new Configuration();
 Job job = new Job(conf);
 job.setJobName("Convert Text");
 job.setJarByClass(Mapper.class);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 // increase if you need sorting or a special number of files
 job.setNumReduceTasks(0);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.setOutputFormatClass(SequenceFileOutputFormat.class);
 job.setInputFormatClass(TextInputFormat.class);
 TextInputFormat.addInputPath(job, new Path("/lol"));
 SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz"));
 // submit and wait for completion
 job.waitForCompletion(true);
 }

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
 String tableName = args[0];
 String columnFamily = args[1];
 System.out.println("****" + tableName);
 conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan()));
 conf.set(TableInputFormat.INPUT_TABLE, tableName);
 conf.set("index.tablename", tableName);
 conf.set("index.familyname", columnFamily);
 String[] fields = new String[args.length - 2];
 System.arraycopy(args, 2, fields, 0, fields.length);
 conf.setStrings("index.fields", fields);
 Job job = new Job(conf, tableName);
 job.setJarByClass(IndexBuilder.class);
 job.setMapperClass(Map.class);
 job.setNumReduceTasks(0);
 job.setInputFormatClass(TableInputFormat.class);
 job.setOutputFormatClass(MultiTableOutputFormat.class);
 return job;
}

job.setJarByClass(RowCounter.class);
Scan scan = new Scan();
scan.setCacheBlocks(false);
job.setOutputFormatClass(NullOutputFormat.class);
TableMapReduceUtil.initTableMapperJob(tableName, scan,
 RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
job.setNumReduceTasks(0);
return job;

protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
 Path outputDir = getTestDir(TEST_NAME, "verify-output");
 LOG.info("Verify output dir: " + outputDir);
 Job job = Job.getInstance(conf);
 job.setJarByClass(this.getClass());
 job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
 setJobScannerConf(job);
 Scan scan = new Scan();
 TableMapReduceUtil.initTableMapperJob(
   htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
   BytesWritable.class, BytesWritable.class, job);
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
 int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
 TableMapReduceUtil.setScannerCaching(job, scannerCaching);
 job.setReducerClass(VerifyReducer.class);
 job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
 FileOutputFormat.setOutputPath(job, outputDir);
 assertTrue(job.waitForCompletion(true));
 long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
 assertEquals(0, numOutputRecords);
}

private void setupMapper(CubeSegment cubeSeg) throws IOException {
  // set the segment's offset info to job conf
  Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart();
  Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd();
  Integer minPartition = Collections.min(offsetStart.keySet());
  Integer maxPartition = Collections.max(offsetStart.keySet());
  job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString());
  job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString());
  for(Integer partition: offsetStart.keySet()) {
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString());
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString());
  }
  job.setMapperClass(KafkaFlatTableMapper.class);
  job.setInputFormatClass(KafkaInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setNumReduceTasks(0);
}

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
 Path inputPath = new Path(args[0]);
 String tableName = args[1];
 Job job = new Job(conf, NAME + "_" + tableName);
 job.setJarByClass(Uploader.class);
 FileInputFormat.setInputPaths(job, inputPath);
 job.setInputFormatClass(SequenceFileInputFormat.class);
 job.setMapperClass(Uploader.class);
 // No reducers.  Just write straight to table.  Call initTableReducerJob
 // because it sets up the TableOutputFormat.
 TableMapReduceUtil.initTableReducerJob(tableName, null, job);
 job.setNumReduceTasks(0);
 return job;
}

job.getConfiguration().setInt("INDEX", labelIndex);
job.getConfiguration().set("LABELS", labels);
job.setJarByClass(getClass());
scan = new Scan();
scan.setCacheBlocks(false);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
TableMapReduceUtil.initCredentials(job);
job.setNumReduceTasks(0);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;

  "syncTable_" + sourceTableName + "-" + targetTableName));
Configuration jobConf = job.getConfiguration();
job.setJarByClass(HashTable.class);
jobConf.set(SOURCE_HASH_DIR_CONF_KEY, sourceHashDir.toString());
jobConf.set(SOURCE_TABLE_CONF_KEY, sourceTableName);
  SyncMapper.class, null, null, job);
job.setNumReduceTasks(0);
 job.setOutputFormatClass(NullOutputFormat.class);
} else {

  private void setupReducer(Path output, CubeSegment cubeSeg) throws IOException {
    int hllShardBase = MapReduceUtil.getCuboidHLLCounterReducerNum(cubeSeg.getCubeInstance());

    job.setReducerClass(CalculateStatsFromBaseCuboidReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(hllShardBase);

    FileOutputFormat.setOutputPath(job, output);
    job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

    deletePath(job.getConfiguration(), output);
  }
}

Job job = new Job(conf);
job.setJobName(jobname);
job.setJarByClass(ExportSnapshot.class);
TableMapReduceUtil.addDependencyJars(job);
job.setMapperClass(ExportMapper.class);
job.setInputFormatClass(ExportSnapshotInputFormat.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setMapSpeculativeExecution(false);
job.setNumReduceTasks(0);
if (!job.waitForCompletion(true)) {
 throw new ExportSnapshotException(job.getStatus().getFailureInfo());

private Job doVerify(Configuration conf, HTableDescriptor htd, String... auths)
  throws IOException, InterruptedException, ClassNotFoundException {
 Path outputDir = getTestDir(TEST_NAME, "verify-output");
 Job job = new Job(conf);
 job.setJarByClass(this.getClass());
 job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
 setJobScannerConf(job);
 Scan scan = new Scan();
 scan.setAuthorizations(new Authorizations(auths));
 TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNameAsString(), scan,
   VerifyMapper.class, NullWritable.class, NullWritable.class, job);
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
 int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
 TableMapReduceUtil.setScannerCaching(job, scannerCaching);
 job.setNumReduceTasks(0);
 FileOutputFormat.setOutputPath(job, outputDir);
 assertTrue(job.waitForCompletion(true));
 return job;
}

job.setJarByClass(VerifyReplication.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setNumReduceTasks(0);
return job;

job.setJarByClass(mapperClass);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);
job.setMapperClass(mapperClass);
job.setOutputFormatClass(NullOutputFormat.class);
job.setInputFormatClass(inputFormat);
boolean success = job.waitForCompletion(true);

job.setJarByClass(CompactionTool.class);
job.setMapperClass(CompactionMapper.class);
job.setInputFormatClass(CompactionInputFormat.class);
job.setOutputFormatClass(NullOutputFormat.class);
job.setMapSpeculativeExecution(false);
job.setNumReduceTasks(0);
 return job.waitForCompletion(true) ? 0 : 1;
} finally {
 fs.delete(stagingDir, true);

job.setJarByClass(ParseJson2.class);
TableMapReduceUtil.initTableMapperJob(input, scan, ParseMapper.class,
 ImmutableBytesWritable.class, Put.class, job);
TableMapReduceUtil.initTableReducerJob(output,
 IdentityTableReducer.class, job);
/*[*/job.setNumReduceTasks(0);/*]*/
System.exit(job.waitForCompletion(true) ? 0 : 1);

/**
 * Sets up the actual job.
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
  throws IOException {
 String tableName = args[0];
 Path outputDir = new Path(args[1]);
 String reportSeparatorString = (args.length > 2) ? args[2]: ":";
 conf.set("ReportSeparator", reportSeparatorString);
 Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
 job.setJarByClass(CellCounter.class);
 Scan scan = getConfiguredScanForJob(conf, args);
 TableMapReduceUtil.initTableMapperJob(tableName, scan,
   CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
 job.setNumReduceTasks(1);
 job.setMapOutputKeyClass(Text.class);
 job.setMapOutputValueClass(IntWritable.class);
 job.setOutputFormatClass(TextOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setReducerClass(IntSumReducer.class);
 return job;
}

this.job.setJarByClass(MRJobLauncher.class);
this.job.setMapperClass(TaskRunner.class);
this.job.setNumReduceTasks(0);
this.job.setInputFormatClass(GobblinWorkUnitsInputFormat.class);
this.job.setOutputFormatClass(GobblinOutputFormat.class);
this.job.setMapOutputKeyClass(NullWritable.class);
this.job.setMapOutputValueClass(NullWritable.class);

private Job getVertexJobWithDefaultMapper(org.apache.hadoop.conf.Configuration c) throws IOException {
  Job job = Job.getInstance(c);
  job.setJarByClass(HadoopScanMapper.class);
  job.setJobName("testPartitionedVertexScan");
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);
  job.setMapOutputKeyClass(NullWritable.class);
  job.setMapOutputValueClass(NullWritable.class);
  job.setNumReduceTasks(0);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setInputFormatClass(CassandraInputFormat.class);
  return job;
}

job.setJarByClass(SegmentCreationJob.class);
job.setJobName(_jobName);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setNumReduceTasks(0);
for (Object key : _properties.keySet()) {
 job.getConfiguration().set(key.toString(), _properties.getProperty(key.toString()));
job.waitForCompletion(true);
if (!job.isSuccessful()) {
 throw new RuntimeException("Job failed : " + job);

public Job createSubmittableJob(String[] args) throws IOException {
 Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
 generatePartitions(partitionsPath);
 Job job = Job.getInstance(getConf(),
    getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
 Configuration jobConf = job.getConfiguration();
 jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
 job.setJarByClass(HashTable.class);
 TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
   HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 // use a TotalOrderPartitioner and reducers to group region output into hash files
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
 job.setReducerClass(Reducer.class);  // identity reducer
 job.setNumReduceTasks(tableHash.numHashFiles);
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(ImmutableBytesWritable.class);
 job.setOutputFormatClass(MapFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
 return job;
}

Javadoc

Set the number of reduce tasks for the job.

Popular methods of Job

getConfiguration
setMapperClass
Set the Mapper for the job.
waitForCompletion
Submit the job to the cluster and wait for it to finish.
setInputFormatClass
Set the InputFormat for the job.
setJarByClass
Set the Jar by finding where a given class came from.
setOutputFormatClass
Set the OutputFormat for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setReducerClass
Set the Reducer for the job.
setMapOutputKeyClass
Set the key class for the map output data. This allows the user to specify the map output key class
setMapOutputValueClass
Set the value class for the map output data. This allows the user to specify the map output value cl
<init>

Popular in Java

Finding current android device location
compareTo (BigDecimal)
setRequestProperty (URLConnection)
addToBackStack (FragmentTransaction)
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
BitSet (java.util)
The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
Top 12 Jupyter Notebook extensions

How to use setNumReduceTasksmethodin org.apache.hadoop.mapreduce.Job

Best Java code snippets using org.apache.hadoop.mapreduce.Job.setNumReduceTasks (Showing top 20 results out of 1,764)

Refine search

How to use
setNumReduceTasks
method
in
org.apache.hadoop.mapreduce.Job