@Override public void setInput(Map<Operator<?>, OptimizerNode> contractToNode, ExecutionMode defaultExchangeMode) { Operator<?> children = getOperator().getInput(); final OptimizerNode pred; final DagConnection conn; pred = contractToNode.get(children); conn = new DagConnection(pred, this, defaultExchangeMode); // create the connection and add it this.input = conn; pred.addOutgoingConnection(conn); }
private <IN> void executeDataSink(GenericDataSinkBase<?> sink, int superStep) throws Exception { Operator<?> inputOp = sink.getInput(); if (inputOp == null) { throw new InvalidProgramException("The data sink " + sink.getName() + " has no input."); } @SuppressWarnings("unchecked") List<IN> input = (List<IN>) execute(inputOp); @SuppressWarnings("unchecked") GenericDataSinkBase<IN> typedSink = (GenericDataSinkBase<IN>) sink; // build the runtime context and compute broadcast variables, if necessary TaskInfo taskInfo = new TaskInfo(typedSink.getName(), 1, 0, 1, 0); RuntimeUDFContext ctx; MetricGroup metrics = new UnregisteredMetricsGroup(); if (RichOutputFormat.class.isAssignableFrom(typedSink.getUserCodeWrapper().getUserCodeClass())) { ctx = superStep == 0 ? new RuntimeUDFContext(taskInfo, userCodeClassLoader, executionConfig, cachedFiles, accumulators, metrics) : new IterationRuntimeUDFContext(taskInfo, userCodeClassLoader, executionConfig, cachedFiles, accumulators, metrics); } else { ctx = null; } typedSink.executeOnCollections(input, ctx, executionConfig); }
@Test public void testUnaryFunctionReadFieldsAnnotation() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new ReadSetMapper<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet read = semantics.getReadFields(0); assertNotNull(read); assertEquals(2, read.size()); assertTrue(read.contains(0)); assertTrue(read.contains(2)); }
@Test public void testUnaryFunctionInPlaceForwardedAnnotation() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(new Tuple3<Long, String, Integer>(3L, "test", 42)); input.map(new IndividualForwardedMapper<Long, String, Integer>()).output(new DiscardingOutputFormat<Tuple3<Long, String, Integer>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertTrue(fw1.contains(0)); assertTrue(fw2.contains(2)); }
@Test public void testUnaryFunctionAllForwardedExceptAnnotation() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new AllForwardedExceptMapper<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertTrue(fw1.contains(0)); assertTrue(fw2.contains(2)); }
@Test public void testUnaryFunctionForwardedInLine2() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new ReadSetMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0->1; 2") .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertTrue(fw1.contains(1)); assertTrue(fw2.contains(2)); }
@Test public void testUnaryFunctionForwardedInLine1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new NoAnnotationMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0->1; 2") .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertTrue(fw1.contains(1)); assertTrue(fw2.contains(2)); }
@Test public void testUnaryFunctionForwardedInLine3() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new ReadSetMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0->1; 2") .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertTrue(fw1.contains(1)); assertTrue(fw2.contains(2)); }
@Test public void testCorrectTranslation() { final String jobName = "Test JobName"; final int numIterations = 13; final int defaultParallelism = 133; final int iterationParallelism = 77; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // ------------ construct the test program ------------------ { env.setParallelism(defaultParallelism); @SuppressWarnings("unchecked") DataSet<Tuple3<Double, Long, String>> initialDataSet = env.fromElements(new Tuple3<>(3.44, 5L, "abc")); IterativeDataSet<Tuple3<Double, Long, String>> bulkIteration = initialDataSet.iterate(numIterations); bulkIteration.setParallelism(iterationParallelism); // test that multiple iteration consumers are supported DataSet<Tuple3<Double, Long, String>> identity = bulkIteration .map(new IdentityMapper<Tuple3<Double, Long, String>>()); DataSet<Tuple3<Double, Long, String>> result = bulkIteration.closeWith(identity); result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>()); result.writeAsText("/dev/null"); } Plan p = env.createProgramPlan(jobName); // ------------- validate the plan ---------------- BulkIterationBase<?> iteration = (BulkIterationBase<?>) p.getDataSinks().iterator().next().getInput(); assertEquals(jobName, p.getJobName()); assertEquals(defaultParallelism, p.getDefaultParallelism()); assertEquals(iterationParallelism, iteration.getParallelism()); }
@Test public void testUnaryFunctionMovingForwardedAnnotation() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new ShufflingMapper<Long>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 1); FieldSet fw3 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertNotNull(fw3); assertTrue(fw1.contains(2)); assertTrue(fw2.contains(0)); assertTrue(fw3.contains(1)); }
@Test public void testUnaryFunctionWildcardForwardedAnnotation() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(new Tuple3<Long, String, Integer>(3L, "test", 42)); input.map(new WildcardForwardedMapper<Tuple3<Long, String, Integer>>()).output(new DiscardingOutputFormat<Tuple3<Long, String, Integer>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 1); FieldSet fw3 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertNotNull(fw3); assertTrue(fw1.contains(0)); assertTrue(fw2.contains(1)); assertTrue(fw3.contains(2)); }
@Test public void testFunctionAnalyzerPrecedence() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setCodeAnalysisMode(CodeAnalysisMode.OPTIMIZE); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(Tuple3.of(3L, "test", 42)); input .map(new WildcardForwardedMapper<Tuple3<Long, String, Integer>>()) .output(new DiscardingOutputFormat<Tuple3<Long, String, Integer>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 1); FieldSet fw3 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertNotNull(fw3); assertTrue(fw1.contains(0)); assertTrue(fw2.contains(1)); assertTrue(fw3.contains(2)); }
@Test public void testFunctionForwardedAnnotationPrecedence() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setCodeAnalysisMode(CodeAnalysisMode.OPTIMIZE); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(Tuple3.of(3L, "test", 42)); input .map(new WildcardForwardedMapperWithForwardAnnotation<Tuple3<Long, String, Integer>>()) .output(new DiscardingOutputFormat<Tuple3<Long, String, Integer>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 1); FieldSet fw3 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertNotNull(fw3); assertTrue(fw1.contains(0)); assertFalse(fw2.contains(1)); assertFalse(fw3.contains(2)); }
@Test public void testFunctionSkipCodeAnalysisAnnotationPrecedence() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setCodeAnalysisMode(CodeAnalysisMode.OPTIMIZE); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(Tuple3.of(3L, "test", 42)); input .map(new WildcardForwardedMapperWithSkipAnnotation<Tuple3<Long, String, Integer>>()) .output(new DiscardingOutputFormat<Tuple3<Long, String, Integer>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet fw1 = semantics.getForwardingTargetFields(0, 0); FieldSet fw2 = semantics.getForwardingTargetFields(0, 1); FieldSet fw3 = semantics.getForwardingTargetFields(0, 2); assertNotNull(fw1); assertNotNull(fw2); assertNotNull(fw3); assertFalse(fw1.contains(0)); assertFalse(fw2.contains(1)); assertFalse(fw3.contains(2)); }
@Test public void testBinaryReadFieldsAnnotation() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(3L, 4L)); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input2 = env.fromElements(new Tuple2<Long, Long>(3L, 2L)); input1.join(input2).where(0).equalTo(0).with(new ReadSetJoin<Long>()) .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); InnerJoinOperatorBase<?, ?, ?, ?> join = (InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput(); DualInputSemanticProperties semantics = join.getSemanticProperties(); assertNotNull(semantics.getReadFields(0)); assertNotNull(semantics.getReadFields(1)); assertEquals(1, semantics.getReadFields(0).size()); assertEquals(1, semantics.getReadFields(1).size()); assertTrue(semantics.getReadFields(0).contains(1)); assertTrue(semantics.getReadFields(1).contains(0)); }
@Test public void testProjectionSemProps1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); tupleDs.project(1, 3, 2, 0, 3).output(new DiscardingOutputFormat<Tuple>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); PlanProjectOperator<?, ?> projectOperator = ((PlanProjectOperator<?, ?>) sink.getInput()); SingleInputSemanticProperties props = projectOperator.getSemanticProperties(); assertEquals(1, props.getForwardingTargetFields(0, 0).size()); assertEquals(1, props.getForwardingTargetFields(0, 1).size()); assertEquals(1, props.getForwardingTargetFields(0, 2).size()); assertEquals(2, props.getForwardingTargetFields(0, 3).size()); assertTrue(props.getForwardingTargetFields(0, 1).contains(0)); assertTrue(props.getForwardingTargetFields(0, 3).contains(1)); assertTrue(props.getForwardingTargetFields(0, 2).contains(2)); assertTrue(props.getForwardingTargetFields(0, 0).contains(3)); assertTrue(props.getForwardingTargetFields(0, 3).contains(4)); }
@Test public void testCrossProjectionSemProps1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); tupleDs.cross(tupleDs) .projectFirst(2, 3) .projectSecond(1, 4) .output(new DiscardingOutputFormat<Tuple>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); CrossOperatorBase<?, ?, ?, ?> projectCrossOperator = ((CrossOperatorBase<?, ?, ?, ?>) sink.getInput()); DualInputSemanticProperties props = projectCrossOperator.getSemanticProperties(); assertEquals(1, props.getForwardingTargetFields(0, 2).size()); assertEquals(1, props.getForwardingTargetFields(0, 3).size()); assertEquals(1, props.getForwardingTargetFields(1, 1).size()); assertEquals(1, props.getForwardingTargetFields(1, 4).size()); assertTrue(props.getForwardingTargetFields(0, 2).contains(0)); assertTrue(props.getForwardingTargetFields(0, 3).contains(1)); assertTrue(props.getForwardingTargetFields(1, 1).contains(2)); assertTrue(props.getForwardingTargetFields(1, 4).contains(3)); }
@Test public void testJoinProjectionSemProps1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); tupleDs.join(tupleDs).where(0).equalTo(0) .projectFirst(2, 3) .projectSecond(1, 4) .output(new DiscardingOutputFormat<Tuple>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); InnerJoinOperatorBase<?, ?, ?, ?> projectJoinOperator = ((InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput()); DualInputSemanticProperties props = projectJoinOperator.getSemanticProperties(); assertEquals(1, props.getForwardingTargetFields(0, 2).size()); assertEquals(1, props.getForwardingTargetFields(0, 3).size()); assertEquals(1, props.getForwardingTargetFields(1, 1).size()); assertEquals(1, props.getForwardingTargetFields(1, 4).size()); assertTrue(props.getForwardingTargetFields(0, 2).contains(0)); assertTrue(props.getForwardingTargetFields(0, 3).contains(1)); assertTrue(props.getForwardingTargetFields(1, 1).contains(2)); assertTrue(props.getForwardingTargetFields(1, 4).contains(3)); }
@Test public void testBinaryForwardedAnnotation() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, String>> input1 = env.fromElements(new Tuple2<Long, String>(3L, "test")); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Double>> input2 = env.fromElements(new Tuple2<Long, Double>(3L, 3.1415)); input1.join(input2).where(0).equalTo(0).with(new ForwardedBothAnnotationJoin<Long, String, Long, Double>()) .output(new DiscardingOutputFormat<Tuple2<String, Double>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); InnerJoinOperatorBase<?, ?, ?, ?> join = (InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput(); DualInputSemanticProperties semantics = join.getSemanticProperties(); assertNotNull(semantics.getForwardingTargetFields(0, 0)); assertNotNull(semantics.getForwardingTargetFields(1, 0)); assertEquals(1, semantics.getForwardingTargetFields(0, 1).size()); assertEquals(1, semantics.getForwardingTargetFields(1, 1).size()); assertTrue(semantics.getForwardingTargetFields(0, 1).contains(0)); assertTrue(semantics.getForwardingTargetFields(1, 1).contains(1)); assertEquals(0, semantics.getForwardingTargetFields(0, 0).size()); assertEquals(0, semantics.getForwardingTargetFields(1, 0).size()); }
@Test public void testBinaryForwardedInLine1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input1 = env.fromElements(new Tuple2<Long, Long>(3L, 4L)); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input2 = env.fromElements(new Tuple2<Long, Long>(3L, 2L)); input1.join(input2).where(0).equalTo(0).with(new NoAnnotationJoin<Long>()) .withForwardedFieldsFirst("0->1; 1->2").withForwardedFieldsSecond("1->0") .output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); InnerJoinOperatorBase<?, ?, ?, ?> join = (InnerJoinOperatorBase<?, ?, ?, ?>) sink.getInput(); DualInputSemanticProperties semantics = join.getSemanticProperties(); assertNotNull(semantics.getForwardingTargetFields(1, 0)); assertEquals(1, semantics.getForwardingTargetFields(0, 0).size()); assertEquals(1, semantics.getForwardingTargetFields(0, 1).size()); assertEquals(1, semantics.getForwardingTargetFields(1, 1).size()); assertTrue(semantics.getForwardingTargetFields(0, 0).contains(1)); assertTrue(semantics.getForwardingTargetFields(0, 1).contains(2)); assertTrue(semantics.getForwardingTargetFields(1, 1).contains(0)); assertEquals(0, semantics.getForwardingTargetFields(1, 0).size()); }