@Test public void testCorrectnessOfAllGroupReduceForTuplesWithCombine() throws Exception { /* * check correctness of all-groupreduce for tuples with combine */ org.junit.Assume.assumeTrue(mode != TestExecutionMode.COLLECTION); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env) .map(new IdentityMapper<Tuple3<Integer, Long, String>>()).setParallelism(4); Configuration cfg = new Configuration(); cfg.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION); DataSet<Tuple2<Integer, String>> reduceDs = ds.reduceGroup(new Tuple3AllGroupReduceWithCombine()) .withParameters(cfg); List<Tuple2<Integer, String>> result = reduceDs.collect(); String expected = "322," + "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest\n"; compareResultAsTuples(result, expected); }
.reduceGroup(new GroupByReducer(node)) .returns(new TupleTypeInfo(outFields)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .name("reduce-" + node.getID()); .reduceGroup(new GroupByReducer(node)) .returns(new TupleTypeInfo(outFields)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .name("reduce-" + node.getID());
private DataSet<Tuple> translateGlobalGroupBy(DataSet<Tuple> input, FlowNode node, int dop, String[] sortKeys, Order sortOrder, Fields outFields) { DataSet<Tuple> result = input; // sort on sorting keys if necessary if(sortKeys != null && sortKeys.length > 0) { result = result .sortPartition(sortKeys[0], sortOrder) .setParallelism(1) .name("reduce-"+ node.getID()); for(int i=1; i<sortKeys.length; i++) { result = result .sortPartition(sortKeys[i], sortOrder) .setParallelism(1); } } // group all data return result .reduceGroup(new GroupByReducer(node)) .returns(new TupleTypeInfo(outFields)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .name("reduce-"+ node.getID()); }
.groupBy(groupingKeys) .reduceGroup(new CoGroupReducer(node)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .returns(new TupleTypeInfo(outFields)) DataSet<Tuple> joinResult = ((DataSet<Tuple2<Tuple, Tuple[]>>) input) .reduceGroup(new CoGroupReducer(node)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(1) .returns(new TupleTypeInfo(outFields)) .sortGroup(1, Order.DESCENDING) .reduceGroup(new CoGroupBufferReducer(node)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .returns(new TupleTypeInfo(outFields)) .setParallelism(1) .reduceGroup(new CoGroupBufferReducer(node)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(1) .returns(new TupleTypeInfo(outFields))
.reduceGroup(new GroupByReducer(node)) .returns(new TupleTypeInfo(outFields)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .name("reduce-" + node.getID());