org.apache.flink.api.java.operators.UnsortedGrouping.reduceGroup java code examples

/**
 * Returns a new set containing the first n elements in this grouped {@link DataSet}.
 *
 * @param n The desired number of elements for each group.
 * @return A GroupReduceOperator that represents the DataSet containing the elements.
*/
public GroupReduceOperator<T, T> first(int n) {
  if (n < 1) {
    throw new InvalidProgramException("Parameter n of first(n) must be at least 1.");
  }
  return reduceGroup(new FirstReducer<T>(n));
}

  private <IN, OUT> DataSet<OUT> applyReduceOperation(UnsortedGrouping<IN> op1, PythonOperationInfo info, TypeInformation<OUT> type) {
    return op1
      .reduceGroup(new IdentityGroupReduce<IN>()).setCombinable(false).setParallelism(info.parallelism).name("PythonReducePreStep")
      .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type))
      .setParallelism(info.parallelism).name(info.name);
  }
}

private <IN, OUT> DataSet<OUT> applyGroupReduceOperation(UnsortedGrouping<IN> op1, PythonOperationInfo info, TypeInformation<OUT> type) {
  return op1
    .reduceGroup(new IdentityGroupReduce<IN>()).setCombinable(false).setParallelism(info.parallelism).name("PythonGroupReducePreStep")
    .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type))
    .setParallelism(info.parallelism).name(info.name);
}

@Test
public void testForkingReduceOnKeyedDataset() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(4);
  // creates the input data and distributes them evenly among the available downstream tasks
  DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
  UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(0);
  DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer());
  DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer());
  List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect();
  String expected = "k1,6,true\n" +
    "k2,4,true\n" +
    "k1,6,true\n" +
    "k2,4,true\n";
  compareResultAsTuples(actual, expected);
}

@Test
public void testForkingReduceOnKeyedDatasetWithSelection() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(4);
  // creates the input data and distributes them evenly among the available downstream tasks
  DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
  UnsortedGrouping<Tuple3<String, Integer, Boolean>> counts = input.groupBy(new KeySelectorX());
  DataSet<Tuple3<String, Integer, Boolean>> r1 = counts.reduceGroup(new KeyedCombReducer());
  DataSet<Tuple3<String, Integer, Boolean>> r2 = counts.reduceGroup(new KeyedGroupCombReducer());
  List<Tuple3<String, Integer, Boolean>> actual = r1.union(r2).collect();
  String expected = "k1,6,true\n" +
    "k2,4,true\n" +
    "k1,6,true\n" +
    "k2,4,true\n";
  compareResultAsTuples(actual, expected);
}

@Test
public void testTupleContainingPojosAndRegularFields() throws Exception {
  /*
   * Test Tuple containing pojos and regular fields
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, CrazyNested, POJO>> ds = CollectionDataSets.getTupleContainingPojos(env);
  DataSet<Integer> reduceDs = ds.groupBy("f0", "f1.*") // nested full tuple selection
      .reduceGroup(new GroupReducer4());
  List<Integer> result = reduceDs.collect();
  String expected = "3\n1\n";
  compareResultAsText(result, expected);
}

@Test
public void testGroupReduceWithAtomicValue() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Integer> ds = env.fromElements(1, 1, 2, 3, 4);
  DataSet<Integer> reduceDs = ds.groupBy("*").reduceGroup(new GroupReduceFunction<Integer, Integer>() {
    @Override
    public void reduce(Iterable<Integer> values, Collector<Integer> out) throws Exception {
      out.collect(values.iterator().next());
    }
  });
  List<Integer> result = reduceDs.collect();
  String expected = "1\n" +
      "2\n" +
      "3\n" +
      "4";
  compareResultAsText(result, expected);
}

@Test
public void testPojoContainigWritableAndTuples() throws Exception {
  /*
   * Test Pojo containing a Writable and Tuples
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<PojoContainingTupleAndWritable> ds = CollectionDataSets.getPojoContainingTupleAndWritable(env);
  DataSet<Integer> reduceDs = ds.groupBy("hadoopFan", "theTuple.*") // full tuple selection
      .reduceGroup(new GroupReducer3());
  List<Integer> result = reduceDs.collect();
  String expected = "1\n5\n";
  compareResultAsText(result, expected);
}

@Test
public void testPojoExtendingFromTupleWithCustomFields() throws Exception {
  /*
   * Test Pojo extending from tuple WITH custom fields
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<FromTupleWithCTor> ds = CollectionDataSets.getPojoExtendingFromTuple(env);
  DataSet<Integer> reduceDs = ds.groupBy("special", "f2")
      .reduceGroup(new GroupReducer2());
  List<Integer> result = reduceDs.collect();
  String expected = "3\n2\n";
  compareResultAsText(result, expected);
}

@Test
public void testCorrectnessofGroupReduceOnTupleContainingPrimitiveByteArrayWithKeyFieldSelectors() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<byte[], Integer>> ds = CollectionDataSets.getTuple2WithByteArrayDataSet(env);
  DataSet<Integer> reduceDs = ds.
      groupBy(0).reduceGroup(new ByteArrayGroupReduce());
  List<Integer> result = reduceDs.collect();
  String expected = "0\n"
      + "1\n"
      + "2\n"
      + "3\n"
      + "4\n";
  compareResultAsText(result, expected);
}

@Test
public void testDeepNesting() throws Exception {
  /*
   * Deep nesting test
   * + null value in pojo
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<CrazyNested> ds = CollectionDataSets.getCrazyNestedDataSet(env);
  DataSet<Tuple2<String, Integer>> reduceDs = ds.groupBy("nestLvl1.nestLvl2.nestLvl3.nestLvl4.f1nal")
      .reduceGroup(new GroupReducer1());
  List<Tuple2<String, Integer>> result = reduceDs.collect();
  String expected = "aa,1\nbb,2\ncc,3\n";
  compareResultAsTuples(result, expected);
}

@Test
public void testReduceOnKeyedDataset() throws Exception {
  // set up the execution environment
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(4);
  // creates the input data and distributes them evenly among the available downstream tasks
  DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
  List<Tuple3<String, Integer, Boolean>> actual = input.groupBy(0).reduceGroup(new KeyedCombReducer()).collect();
  String expected = "k1,6,true\nk2,4,true\n";
  compareResultAsTuples(actual, expected);
}

@Override
protected void testProgram() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<String> input = env.readTextFile(dataPath);
  input.flatMap(new TokenizeLine())
    .groupBy(0)
    .reduceGroup(new CountWords())
    .writeAsCsv(resultPath, "\n", " ");
  this.result = env.execute();
}

@Test
public void testJavaCollectionsWithinPojos() throws Exception {
  /*
   * Test Java collections within pojos ( == test kryo)
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(1);
  DataSet<CollectionDataSets.PojoWithCollection> ds = CollectionDataSets.getPojoWithCollection(env);
  // f0.f0 is first integer
  DataSet<String> reduceDs = ds.groupBy("key")
      .reduceGroup(new GroupReducer7());
  List<String> result = reduceDs.collect();
  String expected = "callFor key 0 we got: pojo.a=apojo.a=bFor key 0 we got: pojo.a=a2pojo.a=b2\n";
  compareResultAsText(result, expected);
}

@Test
public void testGroupingWithPojoContainingMultiplePojos() throws Exception {
  /*
   * Test grouping with pojo containing multiple pojos (was a bug)
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(1);
  DataSet<CollectionDataSets.PojoWithMultiplePojos> ds = CollectionDataSets.getPojoWithMultiplePojos(env);
  // f0.f0 is first integer
  DataSet<String> reduceDs = ds.groupBy("p2.a2")
      .reduceGroup(new GroupReducer6());
  List<String> result = reduceDs.collect();
  String expected = "b\nccc\nee\n";
  compareResultAsText(result, expected);
}

@Test
public void testStandardCountingWithCombiner() throws Exception{
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<IntWritable, IntWritable>> ds = HadoopTestData.getKVPairDataSet(env).
      map(new Mapper1());
  DataSet<Tuple2<IntWritable, IntWritable>> counts = ds.
      groupBy(0).
      reduceGroup(new HadoopReduceCombineFunction<IntWritable, IntWritable, IntWritable, IntWritable>(
          new SumReducer(), new SumReducer()));
  String resultPath = tempFolder.newFile().toURI().toString();
  counts.writeAsText(resultPath);
  env.execute();
  String expected = "(0,5)\n" +
      "(1,6)\n" +
      "(2,6)\n" +
      "(3,4)\n";
  compareResultsByLinesInMemory(expected, resultPath);
}

@Test
public void testReduceOnKeyedDatasetWithSelector() throws Exception {
  // set up the execution environment
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(4);
  // creates the input data and distributes them evenly among the available downstream tasks
  DataSet<Tuple3<String, Integer, Boolean>> input = createKeyedInput(env);
  List<Tuple3<String, Integer, Boolean>> actual = input
    .groupBy(new KeySelectorX())
    .reduceGroup(new KeyedCombReducer())
    .collect();
  String expected = "k1,6,true\nk2,4,true\n";
  compareResultAsTuples(actual, expected);
}

@Test
public void testUnsortedGroupReduceWithTypeInformationTypeHint() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.getConfig().disableSysoutLogging();
  DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
  DataSet<Integer> resultDs = ds
    .groupBy(0)
    .reduceGroup(new GroupReducer<Tuple3<Integer, Long, String>, Integer>())
    .returns(BasicTypeInfo.INT_TYPE_INFO);
  List<Integer> result = resultDs.collect();
  String expectedResult = "2\n" +
    "3\n" +
    "1\n";
  compareResultAsText(result, expectedResult);
}

@Test
public void testSupportForDataAndEnumSerialization() throws Exception {
  /**
   * Test support for Date and enum serialization
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<PojoWithDateAndEnum> ds = env.generateSequence(0, 2).map(new Mapper1());
  ds = ds.union(CollectionDataSets.getPojoWithDateAndEnum(env));
  DataSet<String> res = ds.groupBy("group").reduceGroup(new GroupReducer1());
  List<String> result = res.collect();
  String expected = "ok\nok";
  compareResultAsText(result, expected);
}

public static DataSet<Tuple2<Long, Long>> doDeltaIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) {
  DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> depIteration = vertices.iterateDelta(vertices, 100, 0);
      
  DataSet<Tuple1<Long>> candidates = depIteration.getWorkset().join(edges).where(0).equalTo(0)
      .projectSecond(1);
  
  DataSet<Tuple1<Long>> grouped = candidates.groupBy(0).reduceGroup(new Reduce101());
  
  DataSet<Tuple2<Long, Long>> candidatesDependencies = 
      grouped.join(edges).where(0).equalTo(1).projectSecond(0, 1);
  
  DataSet<Tuple2<Long, Long>> verticesWithNewComponents = 
      candidatesDependencies.join(depIteration.getSolutionSet()).where(0).equalTo(0)
      .with(new Join222())
      .groupBy(0).aggregate(Aggregations.MIN, 1);
  
  DataSet<Tuple2<Long, Long>> updatedComponentId = 
      verticesWithNewComponents.join(depIteration.getSolutionSet()).where(0).equalTo(0)
      .flatMap(new FlatMapJoin());
  
  DataSet<Tuple2<Long, Long>> depResult = depIteration.closeWith(updatedComponentId, updatedComponentId);
  return depResult;
  
}

Javadoc

Applies a GroupReduce transformation on a grouped DataSet.

The transformation calls a org.apache.flink.api.common.functions.RichGroupReduceFunction for each group of the DataSet. A GroupReduceFunction can iterate over all elements of a group and emit any number of output elements including none.

Popular methods of UnsortedGrouping

sortGroup
Sorts elements within a group on a key extracted by the specified org.apache.flink.api.java.function
sum
Syntactic sugar for aggregate (SUM, field).
reduce
Applies a Reduce transformation on a grouped DataSet.For each group, the transformation consecutivel
aggregate
first
Returns a new set containing the first n elements in this grouped DataSet.
<init>
combineGroup
Applies a GroupCombineFunction on a grouped DataSet. A GroupCombineFunction is similar to a GroupRed
getCustomPartitioner
getInputDataSet
getKeys
maxBy
Applies a special case of a reduce transformation (maxBy) on a grouped DataSet. The transformation c
min
Syntactic sugar for aggregate (MIN, field).

Popular in Java

Creating JSON documents from java classes using gson
getExternalFilesDir (Context)
runOnUiThread (Activity)
setScale (BigDecimal)
BufferedReader (java.io)
Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
BitSet (java.util)
The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
JButton (javax.swing)
JTable (javax.swing)
CodeWhisperer alternatives

How to use reduceGroupmethodin org.apache.flink.api.java.operators.UnsortedGrouping

Best Java code snippets using org.apache.flink.api.java.operators.UnsortedGrouping.reduceGroup (Showing top 20 results out of 315)

How to use
reduceGroup
method
in
org.apache.flink.api.java.operators.UnsortedGrouping