} else { DriverStrategy combinerStrategy; switch(operator.getCombineHint()) { case OPTIMIZER_CHOOSES: combinerStrategy = DriverStrategy.SORTED_PARTIAL_REDUCE; throw new RuntimeException("Unknown CombineHint"); props = new ReduceProperties(this.keys, operator.getCustomPartitioner(), combinerStrategy);
UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType()); ReduceOperatorBase<T, ReduceFunction<T>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name); po.setCombineHint(hint); po.setInput(input); po.setParallelism(getParallelism()); po.setSemanticProperties(sProps);
@Test public void translateNonGroupedReduce() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() { public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) { return value1; } }).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
new ReduceOperatorBase<>(function, operatorInfo, new int[0], name); po.setInput(input); po.setParallelism(1); UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getInputType()); ReduceOperatorBase<IN, ReduceFunction<IN>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name); po.setCustomPartitioner(grouper.getCustomPartitioner()); po.setInput(input); po.setParallelism(getParallelism()); po.setCombineHint(hint);
UnaryOperatorInformation<T, T> operatorInfo = getOperatorInfo(); TypeInformation<T> inputType = operatorInfo.getInputType(); int[] inputColumns = getKeyColumns(0); FunctionUtils.openFunction(function, this.parameters); TypeSerializer<T> serializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
new ReduceOperatorBase<>( reducer, new UnaryOperatorInformation<>(STRING_INT_TUPLE, STRING_INT_TUPLE), List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, null, executionConfig); executionConfig.enableObjectReuse(); List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, null, executionConfig);
public void setCustomPartitioner(Partitioner<?> customPartitioner) { if (customPartitioner != null) { int[] keys = getKeyColumns(0); if (keys == null || keys.length == 0) { throw new IllegalArgumentException("Cannot use custom partitioner for a non-grouped GroupReduce (AllGroupReduce)"); } if (keys.length > 1) { throw new IllegalArgumentException("Cannot use the key partitioner for composite keys (more than one key field)"); } } this.customPartitioner = customPartitioner; }
new ReduceOperatorBase<>(function, operatorInfo, new int[0], name); po.setInput(input); po.setParallelism(1); UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getInputType()); ReduceOperatorBase<IN, ReduceFunction<IN>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name); po.setCustomPartitioner(grouper.getCustomPartitioner()); po.setInput(input); po.setParallelism(getParallelism()); po.setCombineHint(hint);
UnaryOperatorInformation<T, T> operatorInfo = getOperatorInfo(); TypeInformation<T> inputType = operatorInfo.getInputType(); int[] inputColumns = getKeyColumns(0); FunctionUtils.openFunction(function, this.parameters); TypeSerializer<T> serializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
new ReduceOperatorBase<>( reducer, new UnaryOperatorInformation<>(STRING_INT_TUPLE, STRING_INT_TUPLE), List<Tuple2<String, Integer>> resultMutableSafe = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(), List<Tuple2<String, Integer>> resultRegular = op.executeOnCollections(input, new RuntimeUDFContext(taskInfo, null, executionConfig, new HashMap<>(),
public void setCustomPartitioner(Partitioner<?> customPartitioner) { if (customPartitioner != null) { int[] keys = getKeyColumns(0); if (keys == null || keys.length == 0) { throw new IllegalArgumentException("Cannot use custom partitioner for a non-grouped GroupReduce (AllGroupReduce)"); } if (keys.length > 1) { throw new IllegalArgumentException("Cannot use the key partitioner for composite keys (more than one key field)"); } } this.customPartitioner = customPartitioner; }
@Test public void translateDistinctPlain2() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType()); ReduceOperatorBase<T, ReduceFunction<T>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name); po.setCombineHint(hint); po.setInput(input); po.setParallelism(getParallelism()); po.setSemanticProperties(sProps);
} else { DriverStrategy combinerStrategy; switch(operator.getCombineHint()) { case OPTIMIZER_CHOOSES: combinerStrategy = DriverStrategy.SORTED_PARTIAL_REDUCE; throw new RuntimeException("Unknown CombineHint"); props = new ReduceProperties(this.keys, operator.getCustomPartitioner(), combinerStrategy);
new ReduceOperatorBase<>(function, operatorInfo, new int[0], name); po.setInput(input); po.setParallelism(1); UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getInputType()); ReduceOperatorBase<IN, ReduceFunction<IN>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name); po.setCustomPartitioner(grouper.getCustomPartitioner()); po.setInput(input); po.setParallelism(getParallelism()); po.setCombineHint(hint);
UnaryOperatorInformation<T, T> operatorInfo = getOperatorInfo(); TypeInformation<T> inputType = operatorInfo.getInputType(); int[] inputColumns = getKeyColumns(0); FunctionUtils.openFunction(function, this.parameters); TypeSerializer<T> serializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
public void setCustomPartitioner(Partitioner<?> customPartitioner) { if (customPartitioner != null) { int[] keys = getKeyColumns(0); if (keys == null || keys.length == 0) { throw new IllegalArgumentException("Cannot use custom partitioner for a non-grouped GroupReduce (AllGroupReduce)"); } if (keys.length > 1) { throw new IllegalArgumentException("Cannot use the key partitioner for composite keys (more than one key field)"); } } this.customPartitioner = customPartitioner; }
@Test public void translateDistinctPlain() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
UnaryOperatorInformation<T, T> operatorInfo = new UnaryOperatorInformation<>(getInputType(), getResultType()); ReduceOperatorBase<T, ReduceFunction<T>> po = new ReduceOperatorBase<>(function, operatorInfo, logicalKeyPositions, name); po.setCombineHint(hint); po.setInput(input); po.setParallelism(getParallelism()); po.setSemanticProperties(sProps);
} else { DriverStrategy combinerStrategy; switch(operator.getCombineHint()) { case OPTIMIZER_CHOOSES: combinerStrategy = DriverStrategy.SORTED_PARTIAL_REDUCE; throw new RuntimeException("Unknown CombineHint"); props = new ReduceProperties(this.keys, operator.getCustomPartitioner(), combinerStrategy);