org.apache.hadoop.mapreduce.Job.setCombinerClass java code examples

private void setupMapper(CubeSegment cubeSeg) throws IOException {
  IMRTableInputFormat flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat();
  flatTableInputFormat.configureJob(job);
  job.setMapperClass(FactDistinctColumnsMapper.class);
  job.setCombinerClass(FactDistinctColumnsCombiner.class);
  job.setMapOutputKeyClass(SelfDefineSortableKey.class);
  job.setMapOutputValueClass(Text.class);
}

job.setJarByClass(MapReduceIntegrationChecker.class);
job.setMapperClass(CheckerMapper.class);
job.setCombinerClass(CheckerReducer.class);
job.setReducerClass(CheckerReducer.class);
job.setOutputKeyClass(Text.class);

job.setMapperClass(mapper);
if (Put.class.equals(outputValueClass)) {
 job.setCombinerClass(PutCombiner.class);

} else {
 job.setMapOutputValueClass(Put.class);
 job.setCombinerClass(PutCombiner.class);
 job.setReducerClass(PutSortReducer.class);

job.setCombinerClass(HadoopWordCount2Combiner.class);

job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setCombinerClass(CuboidReducer.class); // for base cuboid shuffle skew, some rowkey aggregates far more records than others

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 List<String> dirs = Splitter.on(",").splitToList(state.getProp(INPUT_DIRECTORIES_KEY));
 String outputBase = state.getProp(OUTPUT_LOCATION);
 List<WorkUnit> workUnits = Lists.newArrayList();
 for (String dir : dirs) {
  try {
   Path input = new Path(dir);
   Path output = new Path(outputBase, input.getName());
   WorkUnit workUnit = new WorkUnit();
   TaskUtils.setTaskFactoryClass(workUnit, MRTaskFactory.class);
   Configuration conf = new Configuration();
   Job job = Job.getInstance(conf, "WordCount_" + input.getName());
   job.setJarByClass(MRTaskFactoryTest.class);
   job.setMapperClass(TokenizerMapper.class);
   job.setCombinerClass(IntSumReducer.class);
   job.setReducerClass(IntSumReducer.class);
   job.setOutputKeyClass(Text.class);
   job.setOutputValueClass(IntWritable.class);
   job.setNumReduceTasks(1);
   FileInputFormat.addInputPath(job, input);
   FileOutputFormat.setOutputPath(job, output);
   MRTask.serializeJobToState(workUnit, job);
   workUnits.add(workUnit);
  } catch (IOException ioe) {
   log.error("Failed to create MR job for " + dir, ioe);
  }
 }
 return workUnits;
}

job.setCombinerClass(IndexGeneratorCombiner.class);
job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class);

/**
 * @throws Exception If failed.
 */
@Test
public void testSimpleTaskSubmit() throws Exception {
  String testInputFile = "/test";
  prepareTestFile(testInputFile);
  Configuration cfg = new Configuration();
  setupFileSystems(cfg);
  Job job = Job.getInstance(cfg);
  job.setMapperClass(TestMapper.class);
  job.setCombinerClass(TestReducer.class);
  job.setReducerClass(TestReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setNumReduceTasks(1);
  FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/" + testInputFile));
  FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/output"));
  job.setJarByClass(getClass());
  IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1),
    createJobInfo(job.getConfiguration(), null));
  fut.get();
}

 groupByJob.setMapOutputKeyClass(BytesWritable.class);
 groupByJob.setMapOutputValueClass(NullWritable.class);
 groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
 groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
 groupByJob.setOutputKeyClass(BytesWritable.class);
dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
dimSelectionJob.setOutputKeyClass(BytesWritable.class);

/**
 * @param combiner With combiner.
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
  HadoopGroupingTestState.values().clear();
  Job job = Job.getInstance();
  job.setInputFormatClass(InFormat.class);
  job.setOutputFormatClass(OutFormat.class);
  job.setOutputKeyClass(YearTemperature.class);
  job.setOutputValueClass(Text.class);
  job.setMapperClass(Mapper.class);
  if (combiner) {
    job.setCombinerClass(MyReducer.class);
    job.setNumReduceTasks(0);
    job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
  }
  else {
    job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(4);
    job.setGroupingComparatorClass(YearComparator.class);
  }
  grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2),
    createJobInfo(job.getConfiguration(), null)).get(30000);
  assertTrue(HadoopGroupingTestState.values().isEmpty());
}

job.setCombinerClass(TestCombiner.class);
job.setReducerClass(TestReducer.class);

job.setCombinerClass(TestReducer.class);
job.setReducerClass(TestReducer.class);

job.setCombinerClass(TestCombiner.class);
job.setInputFormatClass(InFormat.class);

job.setCombinerClass(TestCountingCombiner.class);

private void setupMapper() throws IOException {
  String tableName = job.getConfiguration().get(BatchConstants.TABLE_NAME);
  String[] dbTableNames = HadoopUtil.parseHiveTableName(tableName);
  log.info("setting hcat input format, db name {} , table name {}", dbTableNames[0],dbTableNames[1]);
  HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]);
  job.setInputFormatClass(HCatInputFormat.class);
  job.setMapperClass(IIDistinctColumnsMapper.class);
  job.setCombinerClass(IIDistinctColumnsCombiner.class);
  job.setMapOutputKeyClass(ShortWritable.class);
  job.setMapOutputValueClass(Text.class);
}

  private void setupMapper(String intermediateTable) throws IOException {
//        FileInputFormat.setInputPaths(job, input);

    String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
    HCatInputFormat.setInput(job, dbTableNames[0],
        dbTableNames[1]);
    
    job.setInputFormatClass(HCatInputFormat.class);
    job.setMapperClass(FactDistinctColumnsMapper.class);
    job.setCombinerClass(FactDistinctColumnsCombiner.class);
    job.setMapOutputKeyClass(ShortWritable.class);
    job.setMapOutputValueClass(Text.class);
  }

job.setCombinerClass(TestCombiner.class);

public int run(String[] args) throws Exception {
 Job job = new Job(getConf());
 HadoopUtil.addJarsToDistributedCache(job, "/lib/");
 job.setJobName("AggregateByKeyDriver");
 job.setInputFormatClass(TextInputFormat.class);
 job.setOutputFormatClass(SequenceFileOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);
 job.setMapperClass(AggregateByKeyMapper.class);
 job.setReducerClass(AggregateByKeyReducer.class);
 job.setCombinerClass(AggregateByKeyReducer.class);
  // args[0] = input directory
  // args[1] = output directory
 FileInputFormat.setInputPaths(job, new Path(args[0]));
 FileOutputFormat.setOutputPath(job, new Path(args[1]));
 boolean status = job.waitForCompletion(true);
 THE_LOGGER.info("run(): status="+status);
 return status ? 0 : 1;
}

@Override
public int run(String[] args) throws Exception {
  Job job = new Job(getConf());
  job.setJobName("MarkovStateTransitionModelDriver");
  
  // add jars to distributed cache
  HadoopUtil.addJarsToDistributedCache(job, "/lib/");
  FileInputFormat.addInputPath(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setMapperClass(MarkovStateTransitionModelMapper.class);
  job.setReducerClass(MarkovStateTransitionModelReducer.class);
  job.setCombinerClass(MarkovStateTransitionModelCombiner.class);
  
  // PairOfStrings = (fromState, toState)
  job.setMapOutputKeyClass(PairOfStrings.class); 
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  int status =  job.waitForCompletion(true) ? 0 : 1;
  return status;
}

Javadoc

Set the combiner class for the job.

Popular methods of Job

getConfiguration
setMapperClass
Set the Mapper for the job.
waitForCompletion
Submit the job to the cluster and wait for it to finish.
setInputFormatClass
Set the InputFormat for the job.
setJarByClass
Set the Jar by finding where a given class came from.
setOutputFormatClass
Set the OutputFormat for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setNumReduceTasks
Set the number of reduce tasks for the job.
setReducerClass
Set the Reducer for the job.
setMapOutputKeyClass
Set the key class for the map output data. This allows the user to specify the map output key class
setMapOutputValueClass
Set the value class for the map output data. This allows the user to specify the map output value cl

Popular in Java

Running tasks concurrently on multiple threads
onRequestPermissionsResult (Fragment)
setContentView (Activity)
findViewById (Activity)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
RandomAccessFile (java.io)
Allows reading from and writing to a file in a random-access manner. This is different from the uni-
NumberFormat (java.text)
The abstract base class for all number formats. This class provides the interface for formatting and
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
JList (javax.swing)
Top 12 Jupyter Notebook extensions

How to use setCombinerClassmethodin org.apache.hadoop.mapreduce.Job

Best Java code snippets using org.apache.hadoop.mapreduce.Job.setCombinerClass (Showing top 20 results out of 765)

How to use
setCombinerClass
method
in
org.apache.hadoop.mapreduce.Job