@Test public void testProgramWithAutoParallelism() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(ExecutionConfig.PARALLELISM_AUTO_MAX); env.getConfig().disableSysoutLogging(); DataSet<Integer> result = env .createInput(new ParallelismDependentInputFormat()) .rebalance() .mapPartition(new ParallelismDependentMapPartition()); List<Integer> resultCollection = new ArrayList<>(); result.output(new LocalCollectionOutputFormat<>(resultCollection)); try { env.execute(); assertEquals(PARALLELISM, resultCollection.size()); } catch (Exception ex) { assertTrue( ExceptionUtils.findThrowableWithMessage(ex, ExecutionGraphBuilder.PARALLELISM_AUTO_MAX_ERROR_MESSAGE).isPresent()); } }
@Test public void testRangePartitionByKeyField() throws Exception { /* * Test range partition by key field */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Long> uniqLongs = ds .partitionByRange(1) .mapPartition(new UniqueTupleLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"; compareResultAsText(result, expected); }
@Test public void testHashPartitionByKeyField() throws Exception { /* * Test hash partition by key field */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Long> uniqLongs = ds .partitionByHash(1) .mapPartition(new UniqueTupleLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"; compareResultAsText(result, expected); }
/** * Ensure that the program parallelism can be set even if the configuration is supplied. */ @Test public void testUserSpecificParallelism() throws Exception { Configuration config = new Configuration(); config.setString(AkkaOptions.STARTUP_TIMEOUT, VALID_STARTUP_TIMEOUT); final URI restAddress = MINI_CLUSTER_RESOURCE.getMiniCluster().getRestAddress(); final String hostname = restAddress.getHost(); final int port = restAddress.getPort(); final ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment( hostname, port, config ); env.setParallelism(USER_DOP); env.getConfig().disableSysoutLogging(); DataSet<Integer> result = env.createInput(new ParallelismDependentInputFormat()) .rebalance() .mapPartition(new RichMapPartitionFunction<Integer, Integer>() { @Override public void mapPartition(Iterable<Integer> values, Collector<Integer> out) throws Exception { out.collect(getRuntimeContext().getIndexOfThisSubtask()); } }); List<Integer> resultCollection = result.collect(); assertEquals(USER_DOP, resultCollection.size()); }
@Test public void testHashPartitionByKeySelector() throws Exception { /* * Test hash partition by key selector */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Long> uniqLongs = ds .partitionByHash(new KeySelector1()) .mapPartition(new UniqueTupleLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"; compareResultAsText(result, expected); }
@Test public void testRangePartitionByKeySelector() throws Exception { /* * Test range partition by key selector */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Long> uniqLongs = ds .partitionByRange(new KeySelector1()) .mapPartition(new UniqueTupleLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"; compareResultAsText(result, expected); }
@Test public void testRangePartitionWithKeyExpression() throws Exception { /* * Test range partition with key expression */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(3); DataSet<POJO> ds = CollectionDataSets.getDuplicatePojoDataSet(env); DataSet<Long> uniqLongs = ds .partitionByRange("nestedPojo.longNumber").setParallelism(4) .mapPartition(new UniqueNestedPojoLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "10000\n" + "20000\n" + "30000\n"; compareResultAsText(result, expected); }
/** * Ensure that the user can pass a custom configuration object to the LocalEnvironment. */ @Test public void testLocalEnvironmentWithConfig() throws Exception { Configuration conf = new Configuration(); conf.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, PARALLELISM); final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf); env.setParallelism(ExecutionConfig.PARALLELISM_AUTO_MAX); env.getConfig().disableSysoutLogging(); DataSet<Integer> result = env.createInput(new ParallelismDependentInputFormat()) .rebalance() .mapPartition(new RichMapPartitionFunction<Integer, Integer>() { @Override public void mapPartition(Iterable<Integer> values, Collector<Integer> out) throws Exception { out.collect(getRuntimeContext().getIndexOfThisSubtask()); } }); List<Integer> resultCollection = result.collect(); assertEquals(PARALLELISM, resultCollection.size()); }
@Test public void testHashPartitionWithKeyExpression() throws Exception { /* * Test hash partition with key expression */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(3); DataSet<POJO> ds = CollectionDataSets.getDuplicatePojoDataSet(env); DataSet<Long> uniqLongs = ds .partitionByHash("nestedPojo.longNumber").setParallelism(4) .mapPartition(new UniqueNestedPojoLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "10000\n" + "20000\n" + "30000\n"; compareResultAsText(result, expected); }
@Test public void testHashPartitionByKeyFieldAndDifferentParallelism() throws Exception { /* * Test hash partition by key field and different parallelism */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(3); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Long> uniqLongs = ds .partitionByHash(1).setParallelism(4) .mapPartition(new UniqueTupleLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"; compareResultAsText(result, expected); }
@Test public void testRangePartitionByKeyFieldAndDifferentParallelism() throws Exception { /* * Test range partition by key field and different parallelism */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(3); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Long> uniqLongs = ds .partitionByRange(1).setParallelism(4) .mapPartition(new UniqueTupleLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"; compareResultAsText(result, expected); }
@Test public void testRangePartitionerOnSequenceData() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<Long> dataSource = env.generateSequence(0, 10000); KeySelector<Long, Long> keyExtractor = new ObjectSelfKeySelector(); MapPartitionFunction<Long, Tuple2<Long, Long>> minMaxSelector = new MinMaxSelector<>(new LongComparator(true)); Comparator<Tuple2<Long, Long>> tuple2Comparator = new Tuple2Comparator(new LongComparator(true)); List<Tuple2<Long, Long>> collected = dataSource.partitionByRange(keyExtractor).mapPartition(minMaxSelector).collect(); Collections.sort(collected, tuple2Comparator); long previousMax = -1; for (Tuple2<Long, Long> tuple2 : collected) { if (previousMax == -1) { previousMax = tuple2.f1; } else { long currentMin = tuple2.f0; assertTrue(tuple2.f0 < tuple2.f1); assertEquals(previousMax + 1, currentMin); previousMax = tuple2.f1; } } }
@Test public void testHashPartitionOfAtomicType() throws Exception { /* * Test hash partition of atomic type */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> uniqLongs = env.generateSequence(1, 6) .union(env.generateSequence(1, 6)) .rebalance() .partitionByHash("*") .mapPartition(new UniqueLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"; compareResultAsText(result, expected); }
.mapPartition(minMaxSelector) .collect();
@Test public void testRangePartitionOfAtomicType() throws Exception { /* * Test range partition of atomic type */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> uniqLongs = env.generateSequence(1, 6) .union(env.generateSequence(1, 6)) .rebalance() .partitionByRange("*") .mapPartition(new UniqueLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"; compareResultAsText(result, expected); }
.mapPartition(new MapPartitionFunction<Tuple2<Tuple2<Long, Long>, Long>, Tuple2<Long, Long>>() { @Override public void mapPartition(Iterable<Tuple2<Tuple2<Long, Long>, Long>> values,
.mapPartition(new RichMapPartitionFunction<Tuple3<Integer, Long, String>, Boolean>() {
.mapPartition(new IdentityPartitionerMapper<Tuple2<Integer,Integer>>()) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
.mapPartition(new IdentityPartitionerMapper<Pojo>()) .output(new DiscardingOutputFormat<Pojo>());
.mapPartition(new IdentityPartitionerMapper<Pojo>()) .output(new DiscardingOutputFormat<Pojo>());