private <IN, OUT> void createMapOperation(PythonOperationInfo info, TypeInformation<OUT> type) { DataSet<IN> op1 = sets.getDataSet(info.parentID); sets.add(info.setID, op1 .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name)); }
private <IN, OUT> void createFilterOperation(PythonOperationInfo info, TypeInformation<OUT> type) { DataSet<IN> op1 = sets.getDataSet(info.parentID); sets.add(info.setID, op1 .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name)); }
private <IN, OUT> void createMapPartitionOperation(PythonOperationInfo info, TypeInformation<OUT> type) { DataSet<IN> op1 = sets.getDataSet(info.parentID); sets.add(info.setID, op1 .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name)); }
private <IN, OUT> void createFlatMapOperation(PythonOperationInfo info, TypeInformation<OUT> type) { DataSet<IN> op1 = sets.getDataSet(info.parentID); sets.add(info.setID, op1 .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name)); }
private <IN, OUT> DataSet<OUT> applyGroupReduceOperation(UnsortedGrouping<IN> op1, PythonOperationInfo info, TypeInformation<OUT> type) { return op1 .reduceGroup(new IdentityGroupReduce<IN>()).setCombinable(false).setParallelism(info.parallelism).name("PythonGroupReducePreStep") .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name); }
private <IN, OUT> DataSet<OUT> applyGroupReduceOperation(DataSet<IN> op1, PythonOperationInfo info, TypeInformation<OUT> type) { return op1 .reduceGroup(new IdentityGroupReduce<IN>()).setCombinable(false).name("PythonGroupReducePreStep").setParallelism(info.parallelism) .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name); }
private <IN, OUT> DataSet<OUT> applyGroupReduceOperation(SortedGrouping<IN> op1, PythonOperationInfo info, TypeInformation<OUT> type) { return op1 .reduceGroup(new IdentityGroupReduce<IN>()).setCombinable(false).setParallelism(info.parallelism).name("PythonGroupReducePreStep") .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name); }
private <IN, OUT> DataSet<OUT> applyReduceOperation(UnsortedGrouping<IN> op1, PythonOperationInfo info, TypeInformation<OUT> type) { return op1 .reduceGroup(new IdentityGroupReduce<IN>()).setCombinable(false).setParallelism(info.parallelism).name("PythonReducePreStep") .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name); } }
private <IN, OUT> DataSet<OUT> applyReduceOperation(DataSet<IN> op1, PythonOperationInfo info, TypeInformation<OUT> type) { return op1 .reduceGroup(new IdentityGroupReduce<IN>()).setCombinable(false).setParallelism(info.parallelism).name("PythonReducePreStep") .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name); }
private <IN1, IN2, OUT> void createJoinOperation(DatasizeHint mode, PythonOperationInfo info, TypeInformation<OUT> type) { DataSet<IN1> op1 = sets.getDataSet(info.parentID); DataSet<IN2> op2 = sets.getDataSet(info.otherID); if (info.usesUDF) { sets.add(info.setID, createDefaultJoin(op1, op2, info.keys1, info.keys2, mode, info.parallelism) .mapPartition(new PythonMapPartition<Tuple2<byte[], byte[]>, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name)); } else { sets.add(info.setID, createDefaultJoin(op1, op2, info.keys1, info.keys2, mode, info.parallelism)); } }
private <IN1, IN2, OUT> void createCrossOperation(DatasizeHint mode, PythonOperationInfo info, TypeInformation<OUT> type) { DataSet<IN1> op1 = sets.getDataSet(info.parentID); DataSet<IN2> op2 = sets.getDataSet(info.otherID); DefaultCross<IN1, IN2> defaultResult; switch (mode) { case NONE: defaultResult = op1.cross(op2); break; case HUGE: defaultResult = op1.crossWithHuge(op2); break; case TINY: defaultResult = op1.crossWithTiny(op2); break; default: throw new IllegalArgumentException("Invalid Cross mode specified: " + mode); } defaultResult.setParallelism(info.parallelism); if (info.usesUDF) { sets.add(info.setID, defaultResult .mapPartition(new PythonMapPartition<Tuple2<IN1, IN2>, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name)); } else { sets.add(info.setID, defaultResult.name("DefaultCross")); } }
private DataSet<Tuple> translateMap(DataSet<Tuple> input, FlowNode node) { Fields outFields = getOutScope(node).getOutValuesFields(); registerKryoTypes(outFields); int dop = ((Operator)input).getParallelism(); return input .mapPartition(new EachMapper(node)) .returns(new TupleTypeInfo(outFields)) .withParameters(this.getFlinkNodeConfig(node)) .setParallelism(dop) .name("map-" + node.getID()); }
.setParallelism(probeSideDOP) .returns(new TupleTypeInfo(outFields)) .name("hashjoin-" + node.getID());