public void setCustomPartitioner(Partitioner<?> customPartitioner) { if (customPartitioner != null) { int[] keys = getKeyColumns(0); if (keys == null || keys.length == 0) { throw new IllegalArgumentException("Cannot use custom partitioner for a non-grouped GroupReduce (AllGroupReduce)"); } if (keys.length > 1) { throw new IllegalArgumentException("Cannot use the key partitioner for composite keys (more than one key field)"); } } this.customPartitioner = customPartitioner; }
TypeInformation<T> inputType = operatorInfo.getInputType(); int[] inputColumns = getKeyColumns(0);
@Test public void translateNonGroupedReduce() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.reduce(new RichReduceFunction<Tuple3<Double, StringValue, LongValue>>() { public Tuple3<Double, StringValue, LongValue> reduce(Tuple3<Double, StringValue, LongValue> value1, Tuple3<Double, StringValue, LongValue> value2) { return value1; } }).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertTrue(reducer.getKeyColumns(0) == null || reducer.getKeyColumns(0).length == 0); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == ExecutionConfig.PARALLELISM_DEFAULT); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
@Test public void translateDistinctPlain2() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
@Test public void translateDistinctPlain() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
@Test public void translateDistinctPosition() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct(1, 2).output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
@Test public void translateDistinctExpressionKey() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<CustomType> initialData = getSourcePojoDataSet(env); initialData.distinct("myInt").output(new DiscardingOutputFormat<CustomType>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
assertArrayEquals(new int[] {2}, reducer.getKeyColumns(0));
public void setCustomPartitioner(Partitioner<?> customPartitioner) { if (customPartitioner != null) { int[] keys = getKeyColumns(0); if (keys == null || keys.length == 0) { throw new IllegalArgumentException("Cannot use custom partitioner for a non-grouped GroupReduce (AllGroupReduce)"); } if (keys.length > 1) { throw new IllegalArgumentException("Cannot use the key partitioner for composite keys (more than one key field)"); } } this.customPartitioner = customPartitioner; }
public void setCustomPartitioner(Partitioner<?> customPartitioner) { if (customPartitioner != null) { int[] keys = getKeyColumns(0); if (keys == null || keys.length == 0) { throw new IllegalArgumentException("Cannot use custom partitioner for a non-grouped GroupReduce (AllGroupReduce)"); } if (keys.length > 1) { throw new IllegalArgumentException("Cannot use the key partitioner for composite keys (more than one key field)"); } } this.customPartitioner = customPartitioner; }
TypeInformation<T> inputType = operatorInfo.getInputType(); int[] inputColumns = getKeyColumns(0);
TypeInformation<T> inputType = operatorInfo.getInputType(); int[] inputColumns = getKeyColumns(0);