/** * Applies a Map-style operation to the entire partition of the data. * The function is called once per parallel partition of the data, * and the entire partition is available through the given Iterator. * The number of elements that each instance of the MapPartition function * sees is non deterministic and depends on the parallelism of the operation. * * <p>This function is intended for operations that cannot transform individual elements, * requires no grouping of elements. To transform individual elements, * the use of {@code map()} and {@code flatMap()} is preferable. * * @param mapPartition The MapPartitionFunction that is called for the full DataSet. * @return A MapPartitionOperator that represents the transformed DataSet. * * @see MapPartitionFunction * @see MapPartitionOperator */ public <R> MapPartitionOperator<T, R> mapPartition(MapPartitionFunction<T, R> mapPartition) { if (mapPartition == null) { throw new NullPointerException("MapPartition function must not be null."); } String callLocation = Utils.getCallLocationName(); TypeInformation<R> resultType = TypeExtractor.getMapPartitionReturnTypes(mapPartition, getType(), callLocation, true); return new MapPartitionOperator<>(this, resultType, clean(mapPartition), callLocation); }
/** * Applies a Map-style operation to the entire partition of the data. * The function is called once per parallel partition of the data, * and the entire partition is available through the given Iterator. * The number of elements that each instance of the MapPartition function * sees is non deterministic and depends on the parallelism of the operation. * * <p>This function is intended for operations that cannot transform individual elements, * requires no grouping of elements. To transform individual elements, * the use of {@code map()} and {@code flatMap()} is preferable. * * @param mapPartition The MapPartitionFunction that is called for the full DataSet. * @return A MapPartitionOperator that represents the transformed DataSet. * * @see MapPartitionFunction * @see MapPartitionOperator */ public <R> MapPartitionOperator<T, R> mapPartition(MapPartitionFunction<T, R> mapPartition) { if (mapPartition == null) { throw new NullPointerException("MapPartition function must not be null."); } String callLocation = Utils.getCallLocationName(); TypeInformation<R> resultType = TypeExtractor.getMapPartitionReturnTypes(mapPartition, getType(), callLocation, true); return new MapPartitionOperator<>(this, resultType, clean(mapPartition), callLocation); }
/** * Applies a Map-style operation to the entire partition of the data. * The function is called once per parallel partition of the data, * and the entire partition is available through the given Iterator. * The number of elements that each instance of the MapPartition function * sees is non deterministic and depends on the parallelism of the operation. * * <p>This function is intended for operations that cannot transform individual elements, * requires no grouping of elements. To transform individual elements, * the use of {@code map()} and {@code flatMap()} is preferable. * * @param mapPartition The MapPartitionFunction that is called for the full DataSet. * @return A MapPartitionOperator that represents the transformed DataSet. * * @see MapPartitionFunction * @see MapPartitionOperator */ public <R> MapPartitionOperator<T, R> mapPartition(MapPartitionFunction<T, R> mapPartition) { if (mapPartition == null) { throw new NullPointerException("MapPartition function must not be null."); } String callLocation = Utils.getCallLocationName(); TypeInformation<R> resultType = TypeExtractor.getMapPartitionReturnTypes(mapPartition, getType(), callLocation, true); return new MapPartitionOperator<>(this, resultType, clean(mapPartition), callLocation); }
MapPartitionOperator<IN, RawUnionValue> outputDataSet = new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, transform.getName());
new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, fullName);
new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, fullName);
transform.getMainOutputTag()); outputDataSet = new MapPartitionOperator<>( inputDataSet, typeInformation, doFnWrapper, transform.getName());
@Override public void translateNode(ParDo.Bound<IN, OUT> transform, FlinkBatchTranslationContext context) { DataSet<IN> inputDataSet = context.getInputDataSet(context.getInput(transform)); final DoFn<IN, OUT> doFn = transform.getFn(); TypeInformation<OUT> typeInformation = context.getTypeInfo(context.getOutput(transform)); FlinkDoFnFunction<IN, OUT> doFnWrapper = new FlinkDoFnFunction<>(doFn, context.getPipelineOptions()); MapPartitionOperator<IN, OUT> outputDataSet = new MapPartitionOperator<>(inputDataSet, typeInformation, doFnWrapper, transform.getName()); transformSideInputs(transform.getSideInputs(), outputDataSet, context); context.setOutputDataSet(context.getOutput(transform), outputDataSet); } }
} else { taggedDataset = new MapPartitionOperator<>(inputDataSet, typeInformation, function, operatorName);
} else { taggedDataset = new MapPartitionOperator<>( inputDataSet, typeInformation, function, transform.getTransform().getUniqueName());