private void setupMapper(CubeSegment cubeSeg) throws IOException { IMRTableInputFormat flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat(); flatTableInputFormat.configureJob(job); job.setMapperClass(FactDistinctColumnsMapper.class); job.setCombinerClass(FactDistinctColumnsCombiner.class); job.setMapOutputKeyClass(SelfDefineSortableKey.class); job.setMapOutputValueClass(Text.class); }
job.setJarByClass(MapReduceIntegrationChecker.class); job.setMapperClass(CheckerMapper.class); job.setCombinerClass(CheckerReducer.class); job.setReducerClass(CheckerReducer.class); job.setOutputKeyClass(Text.class);
job.setMapperClass(mapper); if (Put.class.equals(outputValueClass)) { job.setCombinerClass(PutCombiner.class);
} else { job.setMapOutputValueClass(Put.class); job.setCombinerClass(PutCombiner.class); job.setReducerClass(PutSortReducer.class);
job.setCombinerClass(HadoopWordCount2Combiner.class);
job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(CuboidReducer.class); // for base cuboid shuffle skew, some rowkey aggregates far more records than others
@Override public List<WorkUnit> getWorkunits(SourceState state) { List<String> dirs = Splitter.on(",").splitToList(state.getProp(INPUT_DIRECTORIES_KEY)); String outputBase = state.getProp(OUTPUT_LOCATION); List<WorkUnit> workUnits = Lists.newArrayList(); for (String dir : dirs) { try { Path input = new Path(dir); Path output = new Path(outputBase, input.getName()); WorkUnit workUnit = new WorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, MRTaskFactory.class); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "WordCount_" + input.getName()); job.setJarByClass(MRTaskFactoryTest.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); MRTask.serializeJobToState(workUnit, job); workUnits.add(workUnit); } catch (IOException ioe) { log.error("Failed to create MR job for " + dir, ioe); } } return workUnits; }
job.setCombinerClass(IndexGeneratorCombiner.class); job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class);
/** * @throws Exception If failed. */ @Test public void testSimpleTaskSubmit() throws Exception { String testInputFile = "/test"; prepareTestFile(testInputFile); Configuration cfg = new Configuration(); setupFileSystems(cfg); Job job = Job.getInstance(cfg); job.setMapperClass(TestMapper.class); job.setCombinerClass(TestReducer.class); job.setReducerClass(TestReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/" + testInputFile)); FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/output")); job.setJarByClass(getClass()); IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1), createJobInfo(job.getConfiguration(), null)); fut.get(); }
groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class);
/** * @param combiner With combiner. * @throws Exception If failed. */ public void doTestGrouping(boolean combiner) throws Exception { HadoopGroupingTestState.values().clear(); Job job = Job.getInstance(); job.setInputFormatClass(InFormat.class); job.setOutputFormatClass(OutFormat.class); job.setOutputKeyClass(YearTemperature.class); job.setOutputValueClass(Text.class); job.setMapperClass(Mapper.class); if (combiner) { job.setCombinerClass(MyReducer.class); job.setNumReduceTasks(0); job.setCombinerKeyGroupingComparatorClass(YearComparator.class); } else { job.setReducerClass(MyReducer.class); job.setNumReduceTasks(4); job.setGroupingComparatorClass(YearComparator.class); } grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration(), null)).get(30000); assertTrue(HadoopGroupingTestState.values().isEmpty()); }
job.setCombinerClass(TestCombiner.class); job.setReducerClass(TestReducer.class);
job.setCombinerClass(TestReducer.class); job.setReducerClass(TestReducer.class);
job.setCombinerClass(TestCombiner.class); job.setInputFormatClass(InFormat.class);
job.setCombinerClass(TestCountingCombiner.class);
private void setupMapper() throws IOException { String tableName = job.getConfiguration().get(BatchConstants.TABLE_NAME); String[] dbTableNames = HadoopUtil.parseHiveTableName(tableName); log.info("setting hcat input format, db name {} , table name {}", dbTableNames[0],dbTableNames[1]); HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]); job.setInputFormatClass(HCatInputFormat.class); job.setMapperClass(IIDistinctColumnsMapper.class); job.setCombinerClass(IIDistinctColumnsCombiner.class); job.setMapOutputKeyClass(ShortWritable.class); job.setMapOutputValueClass(Text.class); }
private void setupMapper(String intermediateTable) throws IOException { // FileInputFormat.setInputPaths(job, input); String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable); HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]); job.setInputFormatClass(HCatInputFormat.class); job.setMapperClass(FactDistinctColumnsMapper.class); job.setCombinerClass(FactDistinctColumnsCombiner.class); job.setMapOutputKeyClass(ShortWritable.class); job.setMapOutputValueClass(Text.class); }
job.setCombinerClass(TestCombiner.class);
public int run(String[] args) throws Exception { Job job = new Job(getConf()); HadoopUtil.addJarsToDistributedCache(job, "/lib/"); job.setJobName("AggregateByKeyDriver"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(AggregateByKeyMapper.class); job.setReducerClass(AggregateByKeyReducer.class); job.setCombinerClass(AggregateByKeyReducer.class); // args[0] = input directory // args[1] = output directory FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean status = job.waitForCompletion(true); THE_LOGGER.info("run(): status="+status); return status ? 0 : 1; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); job.setJobName("MarkovStateTransitionModelDriver"); // add jars to distributed cache HadoopUtil.addJarsToDistributedCache(job, "/lib/"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MarkovStateTransitionModelMapper.class); job.setReducerClass(MarkovStateTransitionModelReducer.class); job.setCombinerClass(MarkovStateTransitionModelCombiner.class); // PairOfStrings = (fromState, toState) job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); int status = job.waitForCompletion(true) ? 0 : 1; return status; }