jobconf.setOutputValueGroupingComparator(NaturalKeyGroupingComparator.class);
jobconf.setOutputValueGroupingComparator(NaturalKeyGroupingComparator.class);
/** * Define the comparator that controls which keys are grouped together * for a single call to * {@link Reducer#reduce(Object, Iterable, * org.apache.hadoop.mapreduce.Reducer.Context)} * @param cls the raw comparator to use * @throws IllegalStateException if the job is submitted */ public void setGroupingComparatorClass(Class<? extends RawComparator> cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setOutputValueGroupingComparator(cls); }
/** * Define the comparator that controls which keys are grouped together * for a single call to * {@link Reducer#reduce(Object, Iterable, * org.apache.hadoop.mapreduce.Reducer.Context)} * @param cls the raw comparator to use * @throws IllegalStateException if the job is submitted */ public void setGroupingComparatorClass(Class<? extends RawComparator> cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setOutputValueGroupingComparator(cls); }
/** * Define the comparator that controls which keys are grouped together * for a single call to * {@link Reducer#reduce(Object, Iterable, * org.apache.hadoop.mapreduce.Reducer.Context)} * @param cls the raw comparator to use * @throws IllegalStateException if the job is submitted * @see #setCombinerKeyGroupingComparatorClass(Class) */ public void setGroupingComparatorClass(Class<? extends RawComparator> cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setOutputValueGroupingComparator(cls); }
/** * Define the comparator that controls which keys are grouped together * for a single call to * {@link Reducer#reduce(Object, Iterable, * org.apache.hadoop.mapreduce.Reducer.Context)} * @param cls the raw comparator to use * @throws IllegalStateException if the job is submitted * @see #setCombinerKeyGroupingComparatorClass(Class) */ public void setGroupingComparatorClass(Class<? extends RawComparator> cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setOutputValueGroupingComparator(cls); }
/** * Define the comparator that controls which keys are grouped together * for a single call to * {@link Reducer#reduce(Object, Iterable, * org.apache.hadoop.mapreduce.Reducer.Context)} * @param cls the raw comparator to use * @throws IllegalStateException if the job is submitted * @see #setCombinerKeyGroupingComparatorClass(Class) */ public void setGroupingComparatorClass(Class<? extends RawComparator> cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setOutputValueGroupingComparator(cls); }
/** * Define the comparator that controls which keys are grouped together * for a single call to * {@link Reducer#reduce(Object, Iterable, * org.apache.hadoop.mapreduce.Reducer.Context)} * @param cls the raw comparator to use * @throws IllegalStateException if the job is submitted * @see #setCombinerKeyGroupingComparatorClass(Class) */ public void setGroupingComparatorClass(Class<? extends RawComparator> cls ) throws IllegalStateException { ensureState(JobState.DEFINE); conf.setOutputValueGroupingComparator(cls); }
@Override public void setup(Job job) throws IOException { job.getConfiguration().setStrings(HadoopTwitterTokenToolOptions.ARGS_KEY, nonHadoopArgs); job.getConfiguration().setLong(TIMEDELTA, timedelta); final Path tpcOutRoot = new Path(this.actualOutputLocation, TIMEPERIOD_OUTPUT_NAME); job.getConfiguration().set(TIMEPERIOD_COUNT_OUTPUT_ROOT, tpcOutRoot.toString()); if (timedelta != -1) { // if there are multiple times, split a file per day job.setNumReduceTasks(365); } ((JobConf) job.getConfiguration()).setOutputValueGroupingComparator(TokenPairValueGroupingComparator.class); ((JobConf) job.getConfiguration()).setOutputKeyComparatorClass(TokenPairKeyComparator.class); job.setPartitionerClass(TokenPairPartitioner.class); }
@Override public void setup(Job job) { job.getConfiguration().setFloat(MINP_KEY, (float) this.minp); job.getConfiguration().setInt(MINPAIRCOUNT_KEY, this.minPairCount); job.getConfiguration().set(PAIRMI_LOC,this.outpath.toString()); ((JobConf)job.getConfiguration()).setOutputValueGroupingComparator(PMISortValueGroupingComparator.class); ((JobConf)job.getConfiguration()).setOutputKeyComparatorClass(PMISortKeyComparator.class); job.setPartitionerClass(PMISortPartitioner.class); }
@Override public void setup(Job job) throws IOException { job.getConfiguration().setStrings(HadoopTwitterTokenToolOptions.ARGS_KEY, nonHadoopArgs); job.getConfiguration().setLong(TIMEDELTA, timedelta); final Path tpcOutRoot = new Path(this.actualOutputLocation, TIMEPERIOD_OUTPUT_NAME); job.getConfiguration().set(TIMEPERIOD_COUNT_OUTPUT_ROOT, tpcOutRoot.toString()); if (timedelta != -1) { // if there are multiple times, split a file per day job.setNumReduceTasks(365); } ((JobConf) job.getConfiguration()).setOutputValueGroupingComparator(TokenPairValueGroupingComparator.class); ((JobConf) job.getConfiguration()).setOutputKeyComparatorClass(TokenPairKeyComparator.class); job.setPartitionerClass(TokenPairPartitioner.class); }
@Override public void setup(Job job) { job.getConfiguration().setFloat(MINP_KEY, (float) this.minp); job.getConfiguration().setInt(MINPAIRCOUNT_KEY, this.minPairCount); job.getConfiguration().set(PAIRMI_LOC,this.outpath.toString()); ((JobConf)job.getConfiguration()).setOutputValueGroupingComparator(PMISortValueGroupingComparator.class); ((JobConf)job.getConfiguration()).setOutputKeyComparatorClass(PMISortKeyComparator.class); job.setPartitionerClass(PMISortPartitioner.class); }
/** * Test user-defined grouping comparator for grouping values in Reduce. * We generate composite keys that contain a random number, which acts * as a timestamp associated with the record. In our Reduce function, * values for a key should be sorted by the 'timestamp'. * @throws Exception */ public void testUserValueGroupingComparator() throws Exception { configure(); conf.setMapperClass(RandomGenMapper.class); conf.setReducerClass(AscendingGroupReducer.class); conf.setOutputValueGroupingComparator(CompositeIntGroupFn.class); RunningJob r_job = jc.submitJob(conf); while (!r_job.isComplete()) { Thread.sleep(1000); } if (!r_job.isSuccessful()) { fail("Oops! The job broke due to an unexpected error"); } }
job.setOutputValueGroupingComparator( CompositeKeyOnlyComparator.class);
job.setOutputValueGroupingComparator( CompositeKeyOnlyComparator.class);
/** * Test all user comparators. Super-test of all tests here. * We generate composite keys that contain a random number, which acts * as a timestamp associated with the record. In our Reduce function, * values for a key should be sorted by the 'timestamp'. * We also provide our own comparators that reverse the default sorting * order. This lets us make sure that the right comparators are used. * @throws Exception */ public void testAllUserComparators() throws Exception { configure(); conf.setMapperClass(RandomGenMapper.class); // use a decreasing comparator so keys are sorted in reverse order conf.setOutputKeyComparatorClass(DecreasingIntComparator.class); conf.setReducerClass(DescendingGroupReducer.class); conf.setOutputValueGroupingComparator(CompositeIntReverseGroupFn.class); RunningJob r_job = jc.submitJob(conf); while (!r_job.isComplete()) { Thread.sleep(1000); } if (!r_job.isSuccessful()) { fail("Oops! The job broke due to an unexpected error"); } }
job.setMapOutputValueClass(LongWritable.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputValueGroupingComparator(GroupComparator.class);
/** * Test user-defined grouping comparator for grouping values in Reduce. * We generate composite keys that contain a random number, which acts * as a timestamp associated with the record. In our Reduce function, * values for a key should be sorted by the 'timestamp'. * @throws Exception */ @Test public void testUserValueGroupingComparator() throws Exception { conf.setMapperClass(RandomGenMapper.class); conf.setReducerClass(AscendingGroupReducer.class); conf.setOutputValueGroupingComparator(CompositeIntGroupFn.class); RunningJob r_job = jc.submitJob(conf); while (!r_job.isComplete()) { Thread.sleep(1000); } if (!r_job.isSuccessful()) { fail("Oops! The job broke due to an unexpected error"); } }
/** * Test all user comparators. Super-test of all tests here. * We generate composite keys that contain a random number, which acts * as a timestamp associated with the record. In our Reduce function, * values for a key should be sorted by the 'timestamp'. * We also provide our own comparators that reverse the default sorting * order. This lets us make sure that the right comparators are used. * @throws Exception */ @Test public void testAllUserComparators() throws Exception { conf.setMapperClass(RandomGenMapper.class); // use a decreasing comparator so keys are sorted in reverse order conf.setOutputKeyComparatorClass(DecreasingIntComparator.class); conf.setReducerClass(DescendingGroupReducer.class); conf.setOutputValueGroupingComparator(CompositeIntReverseGroupFn.class); RunningJob r_job = jc.submitJob(conf); while (!r_job.isComplete()) { Thread.sleep(1000); } if (!r_job.isSuccessful()) { fail("Oops! The job broke due to an unexpected error"); } }
conf.setMapOutputValueClass( ValueIndexTuple.class ); conf.setOutputKeyComparatorClass( IndexTupleCoGroupingComparator.class ); // sorts by group, then by index conf.setOutputValueGroupingComparator( CoGroupingComparator.class ); conf.setOutputValueGroupingComparator( GroupingComparator.class );