private <K extends Tuple> void createHashPartitionOperation(PythonOperationInfo info) { DataSet<Tuple2<K, byte[]>> op1 = sets.getDataSet(info.parentID); DataSet<byte[]> result = op1 .partitionByHash(info.keys.toArray(new String[info.keys.size()])).setParallelism(info.parallelism) .map(new KeyDiscarder<K>()).setParallelism(info.parallelism).name("HashPartitionPostStep"); sets.add(info.setID, result); }
private void createRebalanceOperation(PythonOperationInfo info) { DataSet<?> op = sets.getDataSet(info.parentID); sets.add(info.setID, op.rebalance().setParallelism(info.parallelism).name("Rebalance")); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { int scale = Long.SIZE - Long.numberOfLeadingZeros(vertexCount - 1); // Edges int cyclesPerEdge = noiseEnabled ? 5 * scale : scale; List<BlockInfo<T>> generatorBlocks = randomGenerableFactory .getRandomGenerables(edgeCount, cyclesPerEdge); DataSet<Edge<LongValue, NullValue>> edges = env .fromCollection(generatorBlocks) .name("Random generators") .rebalance() .setParallelism(parallelism) .name("Rebalance") .flatMap(new GenerateEdges<>(vertexCount, scale, a, b, c, noiseEnabled, noise)) .setParallelism(parallelism) .name("RMat graph edges"); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSet(edges, parallelism); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Test public void testCustomPartitioningTupleInvalidType() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); try { data.groupBy(0).withPartitioner(new TestPartitionerLong()); fail("Should throw an exception"); } catch (InvalidProgramException e) {} } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningTupleInvalidType() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo2> data = env.fromElements(new Pojo2()) .rebalance().setParallelism(4); try { data.groupBy("a").withPartitioner(new TestPartitionerLong()); fail("Should throw an exception"); } catch (InvalidProgramException e) {} } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningTupleRejectCompositeKey() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo2> data = env.fromElements(new Pojo2()) .rebalance().setParallelism(4); try { data.groupBy("a", "b") .withPartitioner(new TestPartitionerInt()); fail("Should throw an exception"); } catch (InvalidProgramException e) {} } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningTupleRejectCompositeKey() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> data = env.fromElements(new Tuple3<Integer, Integer, Integer>(0, 0, 0)) .rebalance().setParallelism(4); try { data.groupBy(0, 1) .withPartitioner(new TestPartitionerInt()); fail("Should throw an exception"); } catch (InvalidProgramException e) {} } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningKeySelectorInvalidType() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); try { data .groupBy(new TestKeySelector<Tuple2<Integer,Integer>>()) .withPartitioner(new TestPartitionerLong()); fail("Should throw an exception"); } catch (InvalidProgramException e) {} } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningTupleRejectCompositeKey() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> data = env.fromElements(new Tuple3<Integer, Integer, Integer>(0, 0, 0)) .rebalance().setParallelism(4); try { data .groupBy(new TestBinaryKeySelector<Tuple3<Integer,Integer,Integer>>()) .withPartitioner(new TestPartitionerInt()); fail("Should throw an exception"); } catch (InvalidProgramException e) {} } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningTupleInvalidTypeSorted() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> data = env.fromElements(new Tuple3<Integer, Integer, Integer>(0, 0, 0)) .rebalance().setParallelism(4); try { data.groupBy(0) .sortGroup(1, Order.ASCENDING) .withPartitioner(new TestPartitionerLong()); fail("Should throw an exception"); } catch (InvalidProgramException e) {} } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningTupleInvalidTypeSorted() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo3> data = env.fromElements(new Pojo3()) .rebalance().setParallelism(4); try { data.groupBy("a") .sortGroup("b", Order.ASCENDING) .withPartitioner(new TestPartitionerLong()); fail("Should throw an exception"); } catch (InvalidProgramException e) {} } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningKeySelectorInvalidTypeSorted() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> data = env.fromElements(new Tuple3<Integer, Integer, Integer>(0, 0, 0)) .rebalance().setParallelism(4); try { data .groupBy(new TestKeySelector<Tuple3<Integer,Integer,Integer>>()) .sortGroup(1, Order.ASCENDING) .withPartitioner(new TestPartitionerLong()); fail("Should throw an exception"); } catch (InvalidProgramException e) {} } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testRangePartitionWithKeyExpression() throws Exception { /* * Test range partition with key expression */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(3); DataSet<POJO> ds = CollectionDataSets.getDuplicatePojoDataSet(env); DataSet<Long> uniqLongs = ds .partitionByRange("nestedPojo.longNumber").setParallelism(4) .mapPartition(new UniqueNestedPojoLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "10000\n" + "20000\n" + "30000\n"; compareResultAsText(result, expected); }
@Test public void testHashPartitionWithKeyExpression() throws Exception { /* * Test hash partition with key expression */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(3); DataSet<POJO> ds = CollectionDataSets.getDuplicatePojoDataSet(env); DataSet<Long> uniqLongs = ds .partitionByHash("nestedPojo.longNumber").setParallelism(4) .mapPartition(new UniqueNestedPojoLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "10000\n" + "20000\n" + "30000\n"; compareResultAsText(result, expected); }
@Test public void testHashPartitionByKeyFieldAndDifferentParallelism() throws Exception { /* * Test hash partition by key field and different parallelism */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(3); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Long> uniqLongs = ds .partitionByHash(1).setParallelism(4) .mapPartition(new UniqueTupleLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"; compareResultAsText(result, expected); }
@Test public void testRangePartitionByKeyFieldAndDifferentParallelism() throws Exception { /* * Test range partition by key field and different parallelism */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(3); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Long> uniqLongs = ds .partitionByRange(1).setParallelism(4) .mapPartition(new UniqueTupleLongMapper()); List<Long> result = uniqLongs.collect(); String expected = "1\n" + "2\n" + "3\n" + "4\n" + "5\n" + "6\n"; compareResultAsText(result, expected); }
@Test public void testCustomPartitioningTupleGroupReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); data.groupBy(0).withPartitioner(new TestPartitionerInt()) .reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Integer,Integer>>()) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningTupleGroupReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo2> data = env.fromElements(new Pojo2()) .rebalance().setParallelism(4); data.groupBy("a").withPartitioner(new TestPartitionerInt()) .reduceGroup(new IdentityGroupReducerCombinable<Pojo2>()) .output(new DiscardingOutputFormat<Pojo2>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningTupleReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo2> data = env.fromElements(new Pojo2()) .rebalance().setParallelism(4); data.groupBy("a").withPartitioner(new TestPartitionerInt()) .reduce(new SelectOneReducer<Pojo2>()) .output(new DiscardingOutputFormat<Pojo2>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testCustomPartitioningTupleGroupReduceSorted() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo3> data = env.fromElements(new Pojo3()) .rebalance().setParallelism(4); data.groupBy("a").withPartitioner(new TestPartitionerInt()) .sortGroup("b", Order.ASCENDING) .reduceGroup(new IdentityGroupReducerCombinable<Pojo3>()) .output(new DiscardingOutputFormat<Pojo3>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }