/** * Applies a FlatMap transformation on a {@link DataSet}. * * <p>The transformation calls a {@link org.apache.flink.api.common.functions.RichFlatMapFunction} for each element of the DataSet. * Each FlatMapFunction call can return any number of elements including none. * * @param flatMapper The FlatMapFunction that is called for each element of the DataSet. * @return A FlatMapOperator that represents the transformed DataSet. * * @see org.apache.flink.api.common.functions.RichFlatMapFunction * @see FlatMapOperator * @see DataSet */ public <R> FlatMapOperator<T, R> flatMap(FlatMapFunction<T, R> flatMapper) { if (flatMapper == null) { throw new NullPointerException("FlatMap function must not be null."); } String callLocation = Utils.getCallLocationName(); TypeInformation<R> resultType = TypeExtractor.getFlatMapReturnTypes(flatMapper, getType(), callLocation, true); return new FlatMapOperator<>(this, resultType, clean(flatMapper), callLocation); }
private static void pruneOutput( DataSet<RawUnionValue> taggedDataset, BatchTranslationContext context, int unionTag, Coder<WindowedValue<?>> outputCoder, String transformName, String collectionId) { TypeInformation<WindowedValue<?>> outputType = new CoderTypeInformation<>(outputCoder); FlinkExecutableStagePruningFunction pruningFunction = new FlinkExecutableStagePruningFunction(unionTag); FlatMapOperator<RawUnionValue, WindowedValue<?>> pruningOperator = new FlatMapOperator<>( taggedDataset, outputType, pruningFunction, String.format("ExtractOutput[%s]", unionTag)); context.addDataSet(collectionId, pruningOperator); } }
private static void pruneOutput( DataSet<RawUnionValue> taggedDataset, BatchTranslationContext context, int unionTag, Coder<WindowedValue<?>> outputCoder, String transformName, String collectionId) { TypeInformation<WindowedValue<?>> outputType = new CoderTypeInformation<>(outputCoder); FlinkExecutableStagePruningFunction pruningFunction = new FlinkExecutableStagePruningFunction(unionTag); FlatMapOperator<RawUnionValue, WindowedValue<?>> pruningOperator = new FlatMapOperator<>( taggedDataset, outputType, pruningFunction, String.format("%s/out.%d", transformName, unionTag)); context.addDataSet(collectionId, pruningOperator); } }
/** * Applies a FlatMap transformation on a {@link DataSet}. * * <p>The transformation calls a {@link org.apache.flink.api.common.functions.RichFlatMapFunction} for each element of the DataSet. * Each FlatMapFunction call can return any number of elements including none. * * @param flatMapper The FlatMapFunction that is called for each element of the DataSet. * @return A FlatMapOperator that represents the transformed DataSet. * * @see org.apache.flink.api.common.functions.RichFlatMapFunction * @see FlatMapOperator * @see DataSet */ public <R> FlatMapOperator<T, R> flatMap(FlatMapFunction<T, R> flatMapper) { if (flatMapper == null) { throw new NullPointerException("FlatMap function must not be null."); } String callLocation = Utils.getCallLocationName(); TypeInformation<R> resultType = TypeExtractor.getFlatMapReturnTypes(flatMapper, getType(), callLocation, true); return new FlatMapOperator<>(this, resultType, clean(flatMapper), callLocation); }
/** * Applies a FlatMap transformation on a {@link DataSet}. * * <p>The transformation calls a {@link org.apache.flink.api.common.functions.RichFlatMapFunction} for each element of the DataSet. * Each FlatMapFunction call can return any number of elements including none. * * @param flatMapper The FlatMapFunction that is called for each element of the DataSet. * @return A FlatMapOperator that represents the transformed DataSet. * * @see org.apache.flink.api.common.functions.RichFlatMapFunction * @see FlatMapOperator * @see DataSet */ public <R> FlatMapOperator<T, R> flatMap(FlatMapFunction<T, R> flatMapper) { if (flatMapper == null) { throw new NullPointerException("FlatMap function must not be null."); } String callLocation = Utils.getCallLocationName(); TypeInformation<R> resultType = TypeExtractor.getFlatMapReturnTypes(flatMapper, getType(), callLocation, true); return new FlatMapOperator<>(this, resultType, clean(flatMapper), callLocation); }
private <T> void pruneOutput( DataSet<WindowedValue<RawUnionValue>> taggedDataSet, FlinkBatchTranslationContext context, int integerTag, PCollection<T> collection) { TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection); FlinkMultiOutputPruningFunction<T> pruningFunction = new FlinkMultiOutputPruningFunction<>(integerTag); FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator = new FlatMapOperator<>(taggedDataSet, outputType, pruningFunction, collection.getName()); context.setOutputDataSet(collection, pruningOperator); } }
private <T> void pruneOutput( DataSet<WindowedValue<RawUnionValue>> taggedDataSet, FlinkBatchTranslationContext context, int integerTag, PCollection<T> collection) { TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection); FlinkMultiOutputPruningFunction<T> pruningFunction = new FlinkMultiOutputPruningFunction<>(integerTag); FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator = new FlatMapOperator<>(taggedDataSet, outputType, pruningFunction, collection.getName()); context.setOutputDataSet(collection, pruningOperator); } }
private <T> void pruneOutput( DataSet<WindowedValue<RawUnionValue>> taggedDataSet, FlinkBatchTranslationContext context, int integerTag, PCollection<T> collection) { TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection); FlinkMultiOutputPruningFunction<T> pruningFunction = new FlinkMultiOutputPruningFunction<>(integerTag); FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator = new FlatMapOperator<>( taggedDataSet, outputType, pruningFunction, collection.getName()); context.setOutputDataSet(collection, pruningOperator); } }
@Override public void translateNode(Create.Values<OUT> transform, FlinkBatchTranslationContext context) { TypeInformation<OUT> typeInformation = context.getOutputTypeInfo(); Iterable<OUT> elements = transform.getElements(); // we need to serialize the elements to byte arrays, since they might contain // elements that are not serializable by Java serialization. We deserialize them // in the FlatMap function using the Coder. List<byte[]> serializedElements = Lists.newArrayList(); Coder<OUT> coder = context.getOutput(transform).getCoder(); for (OUT element: elements) { ByteArrayOutputStream bao = new ByteArrayOutputStream(); try { coder.encode(element, bao, Coder.Context.OUTER); serializedElements.add(bao.toByteArray()); } catch (IOException e) { throw new RuntimeException("Could not serialize Create elements using Coder: " + e); } } DataSet<Integer> initDataSet = context.getExecutionEnvironment().fromElements(1); FlinkCreateFunction<Integer, OUT> flatMapFunction = new FlinkCreateFunction<>(serializedElements, coder); FlatMapOperator<Integer, OUT> outputDataSet = new FlatMapOperator<>(initDataSet, typeInformation, flatMapFunction, transform.getName()); context.setOutputDataSet(context.getOutput(transform), outputDataSet); } }
FlinkMultiOutputPruningFunction<Object> pruningFunction = new FlinkMultiOutputPruningFunction<>(outputTag); FlatMapOperator<RawUnionValue, Object> pruningOperator = new FlatMapOperator<>(outputDataSet, outputType, pruningFunction, output.getValue().getName()); context.setOutputDataSet(output.getValue(), pruningOperator);