context); viewStream.uid(pTransform.getUniqueName() + "-" + sideInput.getKey().getLocalName());
transform.getUniqueName(), windowedInputCoder, null, inputDataStream.transform(transform.getUniqueName(), outputTypeInformation, doFnOperator); } else { outputStream = inputDataStream .connect(transformedSideInputs.unionedSideInputs.broadcast()) .transform(transform.getUniqueName(), outputTypeInformation, doFnOperator);
outputMap.get(collectionId), outputCoders.get(collectionId), transform.getTransform().getUniqueName(), collectionId);
taggedDataset = new GroupReduceOperator<>( groupedInput, typeInformation, function, transform.getTransform().getUniqueName()); } else { taggedDataset = new MapPartitionOperator<>( inputDataSet, typeInformation, function, transform.getTransform().getUniqueName()); outputMap.get(collectionId), outputCoders.get(collectionId), transform.getTransform().getUniqueName(), collectionId);
Pipeline impulse = Pipeline.newBuilder() .addRootTransformIds(impulse1Transform.getUniqueName()) .addRootTransformIds(impulse2Transform.getUniqueName()) .addRootTransformIds(flattenTransform.getUniqueName()) .setComponents( Components.newBuilder() .putEnvironments("py", Environments.createDockerEnvironment("py")) .putPcollections(flattenOutput.getUniqueName(), flattenOutput) .putTransforms(flattenTransform.getUniqueName(), flattenTransform) .putPcollections(read1Output.getUniqueName(), read1Output) .putTransforms(read1Transform.getUniqueName(), read1Transform) .putPcollections(read2Output.getUniqueName(), read2Output) .putTransforms(read2Transform.getUniqueName(), read2Transform) .putPcollections(impulse1Output.getUniqueName(), impulse1Output) .putTransforms(impulse1Transform.getUniqueName(), impulse1Transform) .putPcollections(impulse2Output.getUniqueName(), impulse2Output) .putTransforms(impulse2Transform.getUniqueName(), impulse2Transform) .build()) .build(); containsInAnyOrder( ExecutableStageMatcher.withInput(impulse1Output.getUniqueName()) .withTransforms(flattenTransform.getUniqueName(), read1Transform.getUniqueName()), ExecutableStageMatcher.withInput(impulse2Output.getUniqueName()) .withTransforms(flattenTransform.getUniqueName(), read2Transform.getUniqueName()))); assertThat( fused
partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getTransform().getUniqueName()); partialReduceTypeInfo, reduceFunction, transform.getTransform().getUniqueName());
PTransform rawGbk = PTransform.newBuilder() .setUniqueName(String.format("%s/RawGBK", spk.getUniqueName())) .putAllInputs(spk.getInputsMap()) .setSpec(FunctionSpec.newBuilder().setUrn(DirectGroupByKey.DIRECT_GBKO_URN)) PTransform feedSDF = PTransform.newBuilder() .setUniqueName(String.format("%s/FeedSDF", spk.getUniqueName())) .putInputs("input", kwiCollectionId) .setSpec( PTransform runSDF = PTransform.newBuilder() .setUniqueName(String.format("%s/RunSDF", spk.getUniqueName())) .putInputs("input", feedSDFCollectionId) .setSpec(
for (String transformId : components.getTransformsMap().keySet()) { PTransform transform = components.getTransformsOrThrow(transformId); String previousId = uniqueNamesById.put(transform.getUniqueName(), transformId); transformId, previousId, transform.getUniqueName()); validateTransform(transformId, transform, components);
partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getTransform().getUniqueName()); partialReduceTypeInfo, reduceFunction, transform.getTransform().getUniqueName());
PTransform gbko = PTransform.newBuilder() .setUniqueName(String.format("%s/GBKO", gbk.getUniqueName())) .putAllInputs(gbk.getInputsMap()) .setSpec(FunctionSpec.newBuilder().setUrn(DirectGroupByKey.DIRECT_GBKO_URN)) PTransform gabw = PTransform.newBuilder() .setUniqueName(String.format("%s/GABW", gbk.getUniqueName())) .putInputs("input", kwiCollectionId) .setSpec(FunctionSpec.newBuilder().setUrn(DirectGroupByKey.DIRECT_GABW_URN))
assertThat(updated.getComponents().getTransformsMap(), hasKey("third_sub")); assertThat( updated.getComponents().getTransformsOrThrow("second_sub").getUniqueName(), equalTo("second_sub")); assertThat( updated.getComponents().getTransformsOrThrow("third_sub").getUniqueName(), equalTo("third_sub"));
windowingStrategy, windowedInputCoder, pTransform.getUniqueName(), context); outputDataStream.uid(pTransform.getUniqueName());
inputDataSet .flatMap(assignWindowsFunction) .name(transform.getTransform().getUniqueName()) .returns(resultTypeInfo);
inputDataSet .flatMap(assignWindowsFunction) .name(transform.getTransform().getUniqueName()) .returns(resultTypeInfo);
windowingStrategy, windowedInputCoder, pTransform.getUniqueName(), context);
private <T> void translateAssignWindows( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.Components components = pipeline.getComponents(); RunnerApi.PTransform transform = components.getTransformsOrThrow(id); RunnerApi.WindowIntoPayload payload; try { payload = RunnerApi.WindowIntoPayload.parseFrom(transform.getSpec().getPayload()); } catch (InvalidProtocolBufferException e) { throw new IllegalArgumentException(e); } //TODO: https://issues.apache.org/jira/browse/BEAM-4296 // This only works for well known window fns, we should defer this execution to the SDK // if the WindowFn can't be parsed or just defer it all the time. WindowFn<T, ? extends BoundedWindow> windowFn = (WindowFn<T, ? extends BoundedWindow>) WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn()); String inputCollectionId = Iterables.getOnlyElement(transform.getInputsMap().values()); String outputCollectionId = Iterables.getOnlyElement(transform.getOutputsMap().values()); Coder<WindowedValue<T>> outputCoder = instantiateCoder(outputCollectionId, components); TypeInformation<WindowedValue<T>> resultTypeInfo = new CoderTypeInformation<>(outputCoder); DataStream<WindowedValue<T>> inputDataStream = context.getDataStreamOrThrow(inputCollectionId); FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction = new FlinkAssignWindows<>(windowFn); DataStream<WindowedValue<T>> resultDataStream = inputDataStream .flatMap(assignWindowsFunction) .name(transform.getUniqueName()) .returns(resultTypeInfo); context.addDataStream(outputCollectionId, resultDataStream); }
private <T> void translateAssignWindows( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.Components components = pipeline.getComponents(); RunnerApi.PTransform transform = components.getTransformsOrThrow(id); RunnerApi.WindowIntoPayload payload; try { payload = RunnerApi.WindowIntoPayload.parseFrom(transform.getSpec().getPayload()); } catch (InvalidProtocolBufferException e) { throw new IllegalArgumentException(e); } //TODO: https://issues.apache.org/jira/browse/BEAM-4296 // This only works for well known window fns, we should defer this execution to the SDK // if the WindowFn can't be parsed or just defer it all the time. WindowFn<T, ? extends BoundedWindow> windowFn = (WindowFn<T, ? extends BoundedWindow>) WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn()); String inputCollectionId = Iterables.getOnlyElement(transform.getInputsMap().values()); String outputCollectionId = Iterables.getOnlyElement(transform.getOutputsMap().values()); Coder<WindowedValue<T>> outputCoder = instantiateCoder(outputCollectionId, components); TypeInformation<WindowedValue<T>> resultTypeInfo = new CoderTypeInformation<>(outputCoder); DataStream<WindowedValue<T>> inputDataStream = context.getDataStreamOrThrow(inputCollectionId); FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction = new FlinkAssignWindows<>(windowFn); DataStream<WindowedValue<T>> resultDataStream = inputDataStream .flatMap(assignWindowsFunction) .name(transform.getUniqueName()) .returns(resultTypeInfo); context.addDataStream(outputCollectionId, resultDataStream); }
@Test public void toAndFromProto() throws IOException { SdkComponents components = SdkComponents.create(spec.getTransform().getPipeline().getOptions()); RunnerApi.PTransform converted = convert(spec, components); Components protoComponents = components.toComponents(); // Sanity checks assertThat(converted.getInputsCount(), equalTo(spec.getTransform().getInputs().size())); assertThat(converted.getOutputsCount(), equalTo(spec.getTransform().getOutputs().size())); assertThat(converted.getSubtransformsCount(), equalTo(spec.getChildren().size())); assertThat(converted.getUniqueName(), equalTo(spec.getTransform().getFullName())); for (PValue inputValue : spec.getTransform().getInputs().values()) { PCollection<?> inputPc = (PCollection<?>) inputValue; protoComponents.getPcollectionsOrThrow(components.registerPCollection(inputPc)); } for (PValue outputValue : spec.getTransform().getOutputs().values()) { PCollection<?> outputPc = (PCollection<?>) outputValue; protoComponents.getPcollectionsOrThrow(components.registerPCollection(outputPc)); } }
private static void translateImpulse( PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) { TypeInformation<WindowedValue<byte[]>> typeInformation = new CoderTypeInformation<>( WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE)); DataSource<WindowedValue<byte[]>> dataSource = new DataSource<>( context.getExecutionEnvironment(), new ImpulseInputFormat(), typeInformation, transform.getTransform().getUniqueName()); context.addDataSet( Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), dataSource); }
private static void translateImpulse( PTransformNode transform, RunnerApi.Pipeline pipeline, BatchTranslationContext context) { TypeInformation<WindowedValue<byte[]>> typeInformation = new CoderTypeInformation<>( WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE)); DataSource<WindowedValue<byte[]>> dataSource = new DataSource<>( context.getExecutionEnvironment(), new ImpulseInputFormat(), typeInformation, transform.getTransform().getUniqueName()) .name("Impulse"); context.addDataSet( Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), dataSource); }