@Override protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { MapPartitionFunction<IN, OUT> function = this.userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, this.parameters); ArrayList<OUT> result = new ArrayList<OUT>(inputData.size() / 4); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingIterator<IN> source = new CopyingIterator<IN>(inputData.iterator(), inSerializer); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); function.mapPartition(source, resultCollector); result.trimToSize(); FunctionUtils.closeFunction(function); return result; } }
@Override protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { MapFunction<IN, OUT> function = this.userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, this.parameters); ArrayList<OUT> result = new ArrayList<OUT>(inputData.size()); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); for (IN element : inputData) { IN inCopy = inSerializer.copy(element); OUT out = function.map(inCopy); result.add(outSerializer.copy(out)); } FunctionUtils.closeFunction(function); return result; } }
@Override protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, parameters); ArrayList<OUT> result = new ArrayList<OUT>(input.size()); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); for (IN element : input) { IN inCopy = inSerializer.copy(element); function.flatMap(inCopy, resultCollector); } FunctionUtils.closeFunction(function); return result; } }
final TypeSerializer<IN> inputSerializer = inputType.createSerializer(executionConfig); if (keyColumns.length == 0) { TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); List<IN> inputDataCopy = new ArrayList<IN>(inputData.size()); for (IN in : inputData) { TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingListCollector<OUT> collector = new CopyingListCollector<OUT>(result, outSerializer);
TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); List<IN> inputDataCopy = new ArrayList<IN>(inputData.size()); for (IN in: inputData) { TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingListCollector<OUT> collector = new CopyingListCollector<OUT>(result, outSerializer);
@Test public void translateDistinctPlain() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
@Test public void translateDistinctPosition() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
@Test public void translateDistinctExpressionKey() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
@Test public void translateDistinctPlain2() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
@Test public void translateNonGroupedReduce() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() { public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) { return value1; } }).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
assertEquals(keyValueInfo, keyExtractor.getOperatorInfo().getOutputType()); assertEquals(keyValueInfo, reducer.getOperatorInfo().getOutputType()); assertEquals(initialData.getType(), keyRemover.getOperatorInfo().getOutputType());
assertEquals(keyValueInfo, keyExtractor.getOperatorInfo().getOutputType()); assertEquals(keyValueInfo, reducer.getOperatorInfo().getOutputType()); assertEquals(initialData.getType(), keyProjector.getOperatorInfo().getOutputType());
iterationNode.setSerializerForIterationChannel(createSerializer(operator.getOperatorInfo().getOutputType()));
@Override protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { MapPartitionFunction<IN, OUT> function = this.userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, this.parameters); ArrayList<OUT> result = new ArrayList<OUT>(inputData.size() / 4); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingIterator<IN> source = new CopyingIterator<IN>(inputData.iterator(), inSerializer); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); function.mapPartition(source, resultCollector); result.trimToSize(); FunctionUtils.closeFunction(function); return result; } }
@Override protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { MapPartitionFunction<IN, OUT> function = this.userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, this.parameters); ArrayList<OUT> result = new ArrayList<OUT>(inputData.size() / 4); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingIterator<IN> source = new CopyingIterator<IN>(inputData.iterator(), inSerializer); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); function.mapPartition(source, resultCollector); result.trimToSize(); FunctionUtils.closeFunction(function); return result; } }
@Override protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { MapFunction<IN, OUT> function = this.userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, this.parameters); ArrayList<OUT> result = new ArrayList<OUT>(inputData.size()); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); for (IN element : inputData) { IN inCopy = inSerializer.copy(element); OUT out = function.map(inCopy); result.add(outSerializer.copy(out)); } FunctionUtils.closeFunction(function); return result; } }
@Override protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { MapFunction<IN, OUT> function = this.userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, this.parameters); ArrayList<OUT> result = new ArrayList<OUT>(inputData.size()); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); for (IN element : inputData) { IN inCopy = inSerializer.copy(element); OUT out = function.map(inCopy); result.add(outSerializer.copy(out)); } FunctionUtils.closeFunction(function); return result; } }
@Override protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, parameters); ArrayList<OUT> result = new ArrayList<OUT>(input.size()); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); for (IN element : input) { IN inCopy = inSerializer.copy(element); function.flatMap(inCopy, resultCollector); } FunctionUtils.closeFunction(function); return result; } }
@Override protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception { FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject(); FunctionUtils.setFunctionRuntimeContext(function, ctx); FunctionUtils.openFunction(function, parameters); ArrayList<OUT> result = new ArrayList<OUT>(input.size()); TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig); TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig); CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer); for (IN element : input) { IN inCopy = inSerializer.copy(element); function.flatMap(inCopy, resultCollector); } FunctionUtils.closeFunction(function); return result; } }