@Test(expected = SemanticProperties.InvalidSemanticAnnotationException.class) public void testUnaryForwardedOverwritingInLine1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new WildcardForwardedMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0->1; 2"); }
@Test(expected = SemanticProperties.InvalidSemanticAnnotationException.class) public void testUnaryForwardedOverwritingInLine2() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new AllForwardedExceptMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0->1; 2"); }
@Test public void reuseBothPartitioningCoGroup6() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1 .partitionByHash(2) .map(new MockMapper()).withForwardedFields("2") .coGroup(set2.partitionByHash(2) .map(new MockMapper()) .withForwardedFields("2")) .where(0, 2).equalTo(1, 2).with(new MockCoGroup()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); checkValidCoGroupInputProperties(coGroup); }
@Test public void reuseBothPartitioningCoGroup1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1 .partitionByHash(0,1) .map(new MockMapper()).withForwardedFields("0;1") .coGroup(set2.partitionByHash(0, 1) .map(new MockMapper()) .withForwardedFields("0;1")) .where(0, 1).equalTo(0, 1).with(new MockCoGroup()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); checkValidCoGroupInputProperties(coGroup); }
@Test public void reuseBothPartitioningCoGroup2() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1 .partitionByHash(0,1) .map(new MockMapper()).withForwardedFields("0;1") .coGroup(set2.partitionByHash(1, 2) .map(new MockMapper()) .withForwardedFields("1;2")) .where(0, 1).equalTo(2, 1).with(new MockCoGroup()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); checkValidCoGroupInputProperties(coGroup); }
@Test public void reuseBothPartitioningCoGroup5() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1 .partitionByHash(2) .map(new MockMapper()).withForwardedFields("2") .coGroup(set2.partitionByHash(1) .map(new MockMapper()) .withForwardedFields("1")) .where(0, 2).equalTo(2, 1).with(new MockCoGroup()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); checkValidCoGroupInputProperties(coGroup); }
@Test public void reuseBothPartitioningJoin5() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> joined = set1 .partitionByHash(2) .map(new MockMapper()).withForwardedFields("2") .join(set2.partitionByHash(1) .map(new MockMapper()) .withForwardedFields("1"), JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST) .where(0,2).equalTo(2,1).with(new MockJoin()); joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource(); checkValidJoinInputProperties(join); }
@Test public void reuseBothPartitioningJoin6() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> joined = set1 .partitionByHash(0) .map(new MockMapper()).withForwardedFields("0") .join(set2.partitionByHash(1) .map(new MockMapper()) .withForwardedFields("1"), JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST) .where(0,2).equalTo(1,2).with(new MockJoin()); joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource(); checkValidJoinInputProperties(join); }
@Test public void testUnaryFunctionForwardedInLine2() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new ReadSetMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0->1; 2") .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertTrue(fw1.contains(1)); assertTrue(fw2.contains(2)); }
@Test public void testUnaryFunctionForwardedInLine1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new NoAnnotationMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0->1; 2") .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertTrue(fw1.contains(1)); assertTrue(fw2.contains(2)); }
@Test public void reuseSinglePartitioningJoin1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> joined = set1 .partitionByHash(0,1) .map(new MockMapper()).withForwardedFields("0;1") .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST) .where(0,1).equalTo(0,1).with(new MockJoin()); joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource(); checkValidJoinInputProperties(join); }
@Test public void reuseSinglePartitioningCoGroup3() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1 .coGroup(set2.partitionByHash(2, 1) .map(new MockMapper()) .withForwardedFields("2;1")) .where(0,1).equalTo(2, 1).with(new MockCoGroup()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); checkValidCoGroupInputProperties(coGroup); }
@Test public void reuseSinglePartitioningJoin2() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> joined = set1 .partitionByHash(0,1) .map(new MockMapper()).withForwardedFields("0;1") .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST) .where(0,1).equalTo(2,1).with(new MockJoin()); joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource(); checkValidJoinInputProperties(join); }
@Test public void reuseSinglePartitioningJoin4() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> joined = set1 .partitionByHash(0) .map(new MockMapper()).withForwardedFields("0") .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST) .where(0,1).equalTo(2,1).with(new MockJoin()); joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource(); checkValidJoinInputProperties(join); }
@Test public void reuseSinglePartitioningCoGroup4() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1 .partitionByHash(0) .map(new MockMapper()).withForwardedFields("0") .coGroup(set2) .where(0, 1).equalTo(2, 1).with(new MockCoGroup()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); checkValidCoGroupInputProperties(coGroup); }
@Test public void reuseSinglePartitioningCoGroup1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1 .partitionByHash(0,1) .map(new MockMapper()).withForwardedFields("0;1") .coGroup(set2) .where(0,1).equalTo(0,1).with(new MockCoGroup()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); checkValidCoGroupInputProperties(coGroup); }
@Test public void reuseSinglePartitioningCoGroup2() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1 .partitionByHash(0,1) .map(new MockMapper()).withForwardedFields("0;1") .coGroup(set2) .where(0,1).equalTo(2,1).with(new MockCoGroup()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); checkValidCoGroupInputProperties(coGroup); }
@Test public void reuseSinglePartitioningCoGroup5() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1 .coGroup(set2.partitionByHash(2) .map(new MockMapper()) .withForwardedFields("2")) .where(0,1).equalTo(2,1).with(new MockCoGroup()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); checkValidCoGroupInputProperties(coGroup); }
@Test public void reuseSinglePartitioningJoin3() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> joined = set1 .join(set2.partitionByHash(2, 1) .map(new MockMapper()) .withForwardedFields("2;1"), JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST) .where(0,1).equalTo(2,1).with(new MockJoin()); joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource(); checkValidJoinInputProperties(join); }
@Test public void reuseSinglePartitioningJoin5() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> joined = set1 .join(set2.partitionByHash(2) .map(new MockMapper()) .withForwardedFields("2"), JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST) .where(0,1).equalTo(2,1).with(new MockJoin()); joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource(); checkValidJoinInputProperties(join); }