@Test public void testUngroupedHadoopReducer() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env); DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds. reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new AllCommentCntReducer())); String resultPath = tempFolder.newFile().toURI().toString(); commentCnts.writeAsText(resultPath); env.execute(); String expected = "(42,15)\n"; compareResultsByLinesInMemory(expected, resultPath); }
@Test public void testConfigurationViaJobConf() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); JobConf conf = new JobConf(); conf.set("my.cntPrefix", "Hello"); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env). map(new Mapper2()); DataSet<Tuple2<IntWritable, IntWritable>> helloCnts = ds. groupBy(0). reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>( new ConfigurableCntReducer(), conf)); String resultPath = tempFolder.newFile().toURI().toString(); helloCnts.writeAsText(resultPath); env.execute(); String expected = "(0,0)\n" + "(1,0)\n" + "(2,1)\n" + "(3,1)\n" + "(4,1)\n"; compareResultsByLinesInMemory(expected, resultPath); }
@Test public void testConfigurationViaJobConf() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); JobConf conf = new JobConf(); conf.set("my.cntPrefix", "Hello"); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env). map(new Mapper4()); DataSet<Tuple2<IntWritable, IntWritable>> hellos = ds. groupBy(0). reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>( new ConfigurableCntReducer(), conf)); String resultPath = tempFolder.newFile().toURI().toString(); hellos.writeAsText(resultPath); env.execute(); // return expected result String expected = "(0,0)\n" + "(1,0)\n" + "(2,1)\n" + "(3,1)\n" + "(4,1)\n"; compareResultsByLinesInMemory(expected, resultPath); }
@Test public void testStandardGrouping() throws Exception{ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<IntWritable, Text>> ds = HadoopTestData.getKVPairDataSet(env). map(new Mapper1()); DataSet<Tuple2<IntWritable, IntWritable>> commentCnts = ds. groupBy(0). reduceGroup(new HadoopReduceFunction<IntWritable, Text, IntWritable, IntWritable>(new CommentCntReducer())); String resultPath = tempFolder.newFile().toURI().toString(); commentCnts.writeAsText(resultPath); env.execute(); String expected = "(0,0)\n" + "(1,3)\n" + "(2,5)\n" + "(3,5)\n" + "(4,2)\n"; compareResultsByLinesInMemory(expected, resultPath); }