org.apache.hadoop.mapred.JobConf.setOutputValueGroupingComparator java code examples

jobconf.setOutputValueGroupingComparator(NaturalKeyGroupingComparator.class);

jobconf.setOutputValueGroupingComparator(NaturalKeyGroupingComparator.class);

/**
 * Define the comparator that controls which keys are grouped together
 * for a single call to
 * {@link Reducer#reduce(Object, Iterable,
 *                       org.apache.hadoop.mapreduce.Reducer.Context)}
 * @param cls the raw comparator to use
 * @throws IllegalStateException if the job is submitted
 */
public void setGroupingComparatorClass(Class<? extends RawComparator> cls
                    ) throws IllegalStateException {
 ensureState(JobState.DEFINE);
 conf.setOutputValueGroupingComparator(cls);
}

/**
 * Define the comparator that controls which keys are grouped together
 * for a single call to 
 * {@link Reducer#reduce(Object, Iterable, 
 *                       org.apache.hadoop.mapreduce.Reducer.Context)}
 * @param cls the raw comparator to use
 * @throws IllegalStateException if the job is submitted
 */
public void setGroupingComparatorClass(Class<? extends RawComparator> cls
                    ) throws IllegalStateException {
 ensureState(JobState.DEFINE);
 conf.setOutputValueGroupingComparator(cls);
}

/**
 * Define the comparator that controls which keys are grouped together
 * for a single call to 
 * {@link Reducer#reduce(Object, Iterable, 
 *                       org.apache.hadoop.mapreduce.Reducer.Context)}
 * @param cls the raw comparator to use
 * @throws IllegalStateException if the job is submitted
 * @see #setCombinerKeyGroupingComparatorClass(Class)
 */
public void setGroupingComparatorClass(Class<? extends RawComparator> cls
                    ) throws IllegalStateException {
 ensureState(JobState.DEFINE);
 conf.setOutputValueGroupingComparator(cls);
}

/**
 * Define the comparator that controls which keys are grouped together
 * for a single call to 
 * {@link Reducer#reduce(Object, Iterable, 
 *                       org.apache.hadoop.mapreduce.Reducer.Context)}
 * @param cls the raw comparator to use
 * @throws IllegalStateException if the job is submitted
 * @see #setCombinerKeyGroupingComparatorClass(Class)
 */
public void setGroupingComparatorClass(Class<? extends RawComparator> cls
                    ) throws IllegalStateException {
 ensureState(JobState.DEFINE);
 conf.setOutputValueGroupingComparator(cls);
}

/**
 * Define the comparator that controls which keys are grouped together
 * for a single call to 
 * {@link Reducer#reduce(Object, Iterable, 
 *                       org.apache.hadoop.mapreduce.Reducer.Context)}
 * @param cls the raw comparator to use
 * @throws IllegalStateException if the job is submitted
 * @see #setCombinerKeyGroupingComparatorClass(Class)
 */
public void setGroupingComparatorClass(Class<? extends RawComparator> cls
                    ) throws IllegalStateException {
 ensureState(JobState.DEFINE);
 conf.setOutputValueGroupingComparator(cls);
}

/**
 * Define the comparator that controls which keys are grouped together
 * for a single call to 
 * {@link Reducer#reduce(Object, Iterable, 
 *                       org.apache.hadoop.mapreduce.Reducer.Context)}
 * @param cls the raw comparator to use
 * @throws IllegalStateException if the job is submitted
 * @see #setCombinerKeyGroupingComparatorClass(Class)
 */
public void setGroupingComparatorClass(Class<? extends RawComparator> cls
                    ) throws IllegalStateException {
 ensureState(JobState.DEFINE);
 conf.setOutputValueGroupingComparator(cls);
}

@Override
public void setup(Job job) throws IOException {
  job.getConfiguration().setStrings(HadoopTwitterTokenToolOptions.ARGS_KEY, nonHadoopArgs);
  job.getConfiguration().setLong(TIMEDELTA, timedelta);
  final Path tpcOutRoot = new Path(this.actualOutputLocation, TIMEPERIOD_OUTPUT_NAME);
  job.getConfiguration().set(TIMEPERIOD_COUNT_OUTPUT_ROOT, tpcOutRoot.toString());
  if (timedelta != -1) {
    // if there are multiple times, split a file per day
    job.setNumReduceTasks(365);
  }
  ((JobConf) job.getConfiguration()).setOutputValueGroupingComparator(TokenPairValueGroupingComparator.class);
  ((JobConf) job.getConfiguration()).setOutputKeyComparatorClass(TokenPairKeyComparator.class);
  job.setPartitionerClass(TokenPairPartitioner.class);
}

@Override
public void setup(Job job) {
  job.getConfiguration().setFloat(MINP_KEY, (float) this.minp);
  job.getConfiguration().setInt(MINPAIRCOUNT_KEY, this.minPairCount);
  job.getConfiguration().set(PAIRMI_LOC,this.outpath.toString());
  ((JobConf)job.getConfiguration()).setOutputValueGroupingComparator(PMISortValueGroupingComparator.class);
  ((JobConf)job.getConfiguration()).setOutputKeyComparatorClass(PMISortKeyComparator.class);
  job.setPartitionerClass(PMISortPartitioner.class);
}

@Override
public void setup(Job job) throws IOException {
  job.getConfiguration().setStrings(HadoopTwitterTokenToolOptions.ARGS_KEY, nonHadoopArgs);
  job.getConfiguration().setLong(TIMEDELTA, timedelta);
  final Path tpcOutRoot = new Path(this.actualOutputLocation, TIMEPERIOD_OUTPUT_NAME);
  job.getConfiguration().set(TIMEPERIOD_COUNT_OUTPUT_ROOT, tpcOutRoot.toString());
  if (timedelta != -1) {
    // if there are multiple times, split a file per day
    job.setNumReduceTasks(365);
  }
  ((JobConf) job.getConfiguration()).setOutputValueGroupingComparator(TokenPairValueGroupingComparator.class);
  ((JobConf) job.getConfiguration()).setOutputKeyComparatorClass(TokenPairKeyComparator.class);
  job.setPartitionerClass(TokenPairPartitioner.class);
}

@Override
public void setup(Job job) {
  job.getConfiguration().setFloat(MINP_KEY, (float) this.minp);
  job.getConfiguration().setInt(MINPAIRCOUNT_KEY, this.minPairCount);
  job.getConfiguration().set(PAIRMI_LOC,this.outpath.toString());
  ((JobConf)job.getConfiguration()).setOutputValueGroupingComparator(PMISortValueGroupingComparator.class);
  ((JobConf)job.getConfiguration()).setOutputKeyComparatorClass(PMISortKeyComparator.class);
  job.setPartitionerClass(PMISortPartitioner.class);
}

/**
 * Test user-defined grouping comparator for grouping values in Reduce.
 * We generate composite keys that contain a random number, which acts
 * as a timestamp associated with the record. In our Reduce function, 
 * values for a key should be sorted by the 'timestamp'. 
 * @throws Exception
 */
public void testUserValueGroupingComparator() throws Exception { 
 configure();
 conf.setMapperClass(RandomGenMapper.class);
 conf.setReducerClass(AscendingGroupReducer.class);
 conf.setOutputValueGroupingComparator(CompositeIntGroupFn.class);
 
 RunningJob r_job = jc.submitJob(conf);
 while (!r_job.isComplete()) {
  Thread.sleep(1000);
 }
 
 if (!r_job.isSuccessful()) {
  fail("Oops! The job broke due to an unexpected error");
 }
}

job.setOutputValueGroupingComparator(
  CompositeKeyOnlyComparator.class);

job.setOutputValueGroupingComparator(
  CompositeKeyOnlyComparator.class);

/**
 * Test all user comparators. Super-test of all tests here. 
 * We generate composite keys that contain a random number, which acts
 * as a timestamp associated with the record. In our Reduce function, 
 * values for a key should be sorted by the 'timestamp'.
 * We also provide our own comparators that reverse the default sorting 
 * order. This lets us make sure that the right comparators are used. 
 * @throws Exception
 */
public void testAllUserComparators() throws Exception { 
 configure();
 conf.setMapperClass(RandomGenMapper.class);
 // use a decreasing comparator so keys are sorted in reverse order
 conf.setOutputKeyComparatorClass(DecreasingIntComparator.class);
 conf.setReducerClass(DescendingGroupReducer.class);
 conf.setOutputValueGroupingComparator(CompositeIntReverseGroupFn.class);
 RunningJob r_job = jc.submitJob(conf);
 while (!r_job.isComplete()) {
  Thread.sleep(1000);
 }
 
 if (!r_job.isSuccessful()) {
  fail("Oops! The job broke due to an unexpected error");
 }
}

job.setMapOutputValueClass(LongWritable.class);
job.setOutputFormat(TextOutputFormat.class);
job.setOutputValueGroupingComparator(GroupComparator.class);

/**
 * Test user-defined grouping comparator for grouping values in Reduce.
 * We generate composite keys that contain a random number, which acts
 * as a timestamp associated with the record. In our Reduce function, 
 * values for a key should be sorted by the 'timestamp'. 
 * @throws Exception
 */
@Test
public void testUserValueGroupingComparator() throws Exception {
 conf.setMapperClass(RandomGenMapper.class);
 conf.setReducerClass(AscendingGroupReducer.class);
 conf.setOutputValueGroupingComparator(CompositeIntGroupFn.class);
 
 RunningJob r_job = jc.submitJob(conf);
 while (!r_job.isComplete()) {
  Thread.sleep(1000);
 }
 
 if (!r_job.isSuccessful()) {
  fail("Oops! The job broke due to an unexpected error");
 }
}

/**
 * Test all user comparators. Super-test of all tests here. 
 * We generate composite keys that contain a random number, which acts
 * as a timestamp associated with the record. In our Reduce function, 
 * values for a key should be sorted by the 'timestamp'.
 * We also provide our own comparators that reverse the default sorting 
 * order. This lets us make sure that the right comparators are used. 
 * @throws Exception
 */
@Test
public void testAllUserComparators() throws Exception {
 conf.setMapperClass(RandomGenMapper.class);
 // use a decreasing comparator so keys are sorted in reverse order
 conf.setOutputKeyComparatorClass(DecreasingIntComparator.class);
 conf.setReducerClass(DescendingGroupReducer.class);
 conf.setOutputValueGroupingComparator(CompositeIntReverseGroupFn.class);
 RunningJob r_job = jc.submitJob(conf);
 while (!r_job.isComplete()) {
  Thread.sleep(1000);
 }
 
 if (!r_job.isSuccessful()) {
  fail("Oops! The job broke due to an unexpected error");
 }
}

conf.setMapOutputValueClass( ValueIndexTuple.class );
conf.setOutputKeyComparatorClass( IndexTupleCoGroupingComparator.class ); // sorts by group, then by index
conf.setOutputValueGroupingComparator( CoGroupingComparator.class );
conf.setOutputValueGroupingComparator( GroupingComparator.class );

Javadoc

Set the user defined RawComparator comparator for grouping keys in the input to the reduce.

This comparator should be provided if the equivalence rules for keys for sorting the intermediates are different from those for grouping keys before each call to Reducer#reduce(Object,java.util.Iterator,OutputCollector,Reporter).

For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed in a single call to the reduce function if K1 and K2 compare as equal.

Since #setOutputKeyComparatorClass(Class) can be used to control how keys are sorted, this can be used in conjunction to simulate secondary sort on values.

Note: This is not a guarantee of the reduce sort being stable in any sense. (In any case, with the order of available map-outputs to the reduce being non-deterministic, it wouldn't make that much sense.)

Popular methods of JobConf

<init>
A new map/reduce configuration where the behavior of reading from the default resources can be turne
set
get
setInputFormat
Set the InputFormat implementation for the map-reduce job.
setOutputFormat
Set the OutputFormat implementation for the map-reduce job.
getInt
setMapperClass
Set the Mapper class for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setReducerClass
Set the Reducer class for the job.
setNumReduceTasks
Set the requisite number of reduce tasks for this job.HOW MANY REDUCES? The right number of reduces
setBoolean

Popular in Java

Making http post requests using okhttp
putExtra (Intent)
getSystemService (Context)
setContentView (Activity)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
Set (java.util)
A Set is a data structure which does not allow duplicate elements.
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
CodeWhisperer alternatives

How to use setOutputValueGroupingComparatormethodin org.apache.hadoop.mapred.JobConf

Best Java code snippets using org.apache.hadoop.mapred.JobConf.setOutputValueGroupingComparator (Showing top 20 results out of 315)

How to use
setOutputValueGroupingComparator
method
in
org.apache.hadoop.mapred.JobConf