@Test public void testCountElementsPerPartition() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); long expectedSize = 100L; DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1); DataSet<Tuple2<Integer, Long>> ds = DataSetUtils.countElementsPerPartition(numbers); Assert.assertEquals(env.getParallelism(), ds.count()); Assert.assertEquals(expectedSize, ds.sum(1).collect().get(0).f1.longValue()); }
@Test public void testHashPartitionByKeyField2() throws Exception { /* * Test hash partition by key field */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); AggregateOperator<Tuple3<Integer, Long, String>> sum = ds .map(new PrefixMapper()) .partitionByHash(1, 2) .groupBy(1, 2) .sum(0); List<Tuple3<Integer, Long, String>> result = sum.collect(); String expected = "(1,1,Hi)\n" + "(5,2,Hello)\n" + "(4,3,Hello)\n" + "(5,3,I am )\n" + "(6,3,Luke )\n" + "(34,4,Comme)\n" + "(65,5,Comme)\n" + "(111,6,Comme)"; compareResultAsText(result, expected); }
@Test public void testRangePartitionByKeyField2() throws Exception { /* * Test range partition by key field */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); AggregateOperator<Tuple3<Integer, Long, String>> sum = ds .map(new PrefixMapper()) .partitionByRange(1, 2) .groupBy(1, 2) .sum(0); List<Tuple3<Integer, Long, String>> result = sum.collect(); String expected = "(1,1,Hi)\n" + "(5,2,Hello)\n" + "(4,3,Hello)\n" + "(5,3,I am )\n" + "(6,3,Luke )\n" + "(34,4,Comme)\n" + "(65,5,Comme)\n" + "(111,6,Comme)"; compareResultAsText(result, expected); }