Map<String, String> outputs = transform.getOutputsMap(); stagePayload = RunnerApi.ExecutableStagePayload.parseFrom(transform.getSpec().getPayload()); } catch (IOException e) { throw new RuntimeException(e); transform.getUniqueName(), windowedInputCoder, null, inputDataStream.getTransformation(), sideInputStream.getTransformation(), transform.getUniqueName(), doFnOperator, outputTypeInformation, outputStream.uid(transform.getUniqueName());
.getTransforms() .stream() .flatMap(t -> t.getTransform().getOutputsMap().values().stream()) .collect(Collectors.toSet())); Set<String> danglingInputs = .getTransforms() .stream() .flatMap(t -> t.getTransform().getInputsMap().values().stream()) .filter(in -> !possibleInputs.contains(in)) .collect(Collectors.toSet()); Map<String, String> validInputs = transform .getInputsMap() .entrySet() .stream() .collect(Collectors.toMap(Entry::getKey, Entry::getValue)); if (!validInputs.equals(transform.getInputsMap())) { transform.toBuilder().clearInputs().putAllInputs(validInputs).build());
components .getTransformsOrThrow(sideInput.getKey().getTransformId()) .getInputsOrThrow(sideInput.getKey().getLocalName()); DataStream<Object> sideInputStream = context.getDataStreamOrThrow(collectionId); TypeInformation<Object> tpe = sideInputStream.getType(); RunnerApi.PTransform pTransform = components.getTransformsOrThrow(sideInput.getKey().getTransformId()); String collectionId = pTransform.getInputsOrThrow(sideInput.getKey().getLocalName()); DataStream<WindowedValue<?>> sideInputStream = context.getDataStreamOrThrow(collectionId); context); viewStream.uid(pTransform.getUniqueName() + "-" + sideInput.getKey().getLocalName());
PCollection impulse2Output = pc("impulse2.out"); PTransform flattenTransform = PTransform.newBuilder() .setUniqueName("Flatten") .putInputs(read1Output.getUniqueName(), read1Output.getUniqueName()) PTransform.newBuilder() .setUniqueName("read1") .putInputs(impulse1Output.getUniqueName(), impulse1Output.getUniqueName()) .build(); PTransform read2Transform = PTransform.newBuilder() .setUniqueName("read2") .putInputs(impulse2Output.getUniqueName(), impulse2Output.getUniqueName()) PTransform.newBuilder() .setUniqueName("impulse1") .putOutputs(impulse1Output.getUniqueName(), impulse1Output.getUniqueName()) .build(); PTransform impulse2Transform = PTransform.newBuilder() .setUniqueName("impulse2") .putOutputs(impulse2Output.getUniqueName(), impulse2Output.getUniqueName()) Pipeline impulse = Pipeline.newBuilder() .addRootTransformIds(impulse1Transform.getUniqueName())
Map<String, String> outputs = transform.getTransform().getOutputsMap(); stagePayload = RunnerApi.ExecutableStagePayload.parseFrom( transform.getTransform().getSpec().getPayload()); } catch (IOException e) { throw new RuntimeException(e); taggedDataset = new GroupReduceOperator<>( groupedInput, typeInformation, function, transform.getTransform().getUniqueName()); } else { taggedDataset = new MapPartitionOperator<>( inputDataSet, typeInformation, function, transform.getTransform().getUniqueName()); components .getTransformsOrThrow(sideInputId.getTransformId()) .getInputsOrThrow(sideInputId.getLocalName()); outputMap.get(collectionId), outputCoders.get(collectionId), transform.getTransform().getUniqueName(), collectionId);
Map<String, String> outputs = transform.getOutputsMap(); stagePayload = RunnerApi.ExecutableStagePayload.parseFrom(transform.getSpec().getPayload()); } catch (IOException e) { throw new RuntimeException(e); transform.getUniqueName(), windowedInputCoder, null, inputDataStream.transform(transform.getUniqueName(), outputTypeInformation, doFnOperator); } else { outputStream = inputDataStream .connect(transformedSideInputs.unionedSideInputs.broadcast()) .transform(transform.getUniqueName(), outputTypeInformation, doFnOperator);
Map<String, String> outputs = transform.getTransform().getOutputsMap(); stagePayload = RunnerApi.ExecutableStagePayload.parseFrom( transform.getTransform().getSpec().getPayload()); } catch (IOException e) { throw new RuntimeException(e); components .getTransformsOrThrow(sideInputId.getTransformId()) .getInputsOrThrow(sideInputId.getLocalName()); outputMap.get(collectionId), outputCoders.get(collectionId), transform.getTransform().getUniqueName(), collectionId);
RunnerApi.Components components = pipeline.getComponents(); String inputPCollectionId = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values()); PCollectionNode inputCollection = PipelineNode.pCollection( partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getTransform().getUniqueName()); partialReduceTypeInfo, reduceFunction, transform.getTransform().getUniqueName()); Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), outputDataSet);
for (String transformId : components.getTransformsMap().keySet()) { PTransform transform = components.getTransformsOrThrow(transformId); String previousId = uniqueNamesById.put(transform.getUniqueName(), transformId); transformId, previousId, transform.getUniqueName()); validateTransform(transformId, transform, components);
PTransform spk = components.getTransformsOrThrow(spkId); checkArgument( PTransformTranslation.SPLITTABLE_PROCESS_KEYED_URN.equals(spk.getSpec().getUrn()), "URN must be %s, got %s", PTransformTranslation.SPLITTABLE_PROCESS_KEYED_URN, spk.getSpec().getUrn()); Builder newPTransform = spk.toBuilder(); String inputId = getOnlyElement(spk.getInputsMap().values()); PCollection input = components.getPcollectionsOrThrow(inputId); uniqueId(String.format("%s/RawGBK", spkId), components::containsTransforms); PTransform rawGbk = PTransform.newBuilder() .setUniqueName(String.format("%s/RawGBK", spk.getUniqueName())) .putAllInputs(spk.getInputsMap()) .setSpec(FunctionSpec.newBuilder().setUrn(DirectGroupByKey.DIRECT_GBKO_URN)) .putOutputs("output", kwiCollectionId) uniqueId(String.format("%s/FeedSDF", spkId), components::containsTransforms); PTransform feedSDF = PTransform.newBuilder() .setUniqueName(String.format("%s/FeedSDF", spk.getUniqueName())) .putInputs("input", kwiCollectionId) .setSpec( uniqueId(String.format("%s/RunSDF", spkId), components::containsTransforms); PTransform runSDF =
RunnerApi.Components components = pipeline.getComponents(); String inputPCollectionId = Iterables.getOnlyElement(transform.getTransform().getInputsMap().values()); PCollectionNode inputCollection = PipelineNode.pCollection( partialReduceTypeInfo, partialReduceFunction, "GroupCombine: " + transform.getTransform().getUniqueName()); partialReduceTypeInfo, reduceFunction, transform.getTransform().getUniqueName()); Iterables.getOnlyElement(transform.getTransform().getOutputsMap().values()), outputDataSet);
(transformId, transform) -> { if (transform .getSpec() .getUrn() .equals(PTransformTranslation.CREATE_VIEW_TRANSFORM_URN)) { viewTransforms.add(transformId); viewOutputsToInputs.put( Iterables.getOnlyElement(transform.getOutputsMap().values()), Iterables.getOnlyElement(transform.getInputsMap().values())); .forEach( (transformId, transform) -> { RunnerApi.PTransform.Builder transformBuilder = transform.toBuilder(); transform .getInputsMap() .forEach( (key, value) -> { .getOutputsMap() .forEach( (key, value) -> { transformBuilder.addAllSubtransforms( transform .getSubtransformsList() .stream() .filter(id -> !viewTransforms.contains(id))
components .getTransformsOrThrow(sideInput.getKey().getTransformId()) .getInputsOrThrow(sideInput.getKey().getLocalName()); DataStream<Object> sideInputStream = context.getDataStreamOrThrow(collectionId); TypeInformation<Object> tpe = sideInputStream.getType(); components .getTransformsOrThrow(sideInput.getKey().getTransformId()) .getInputsOrThrow(sideInput.getKey().getLocalName()); DataStream<WindowedValue<?>> sideInputStream = context.getDataStreamOrThrow(collectionId);
q.getTransforms() .stream() .filter(node -> node.getTransform().getSpec().getUrn().equals(feedSdfUrn)) .collect(Collectors.toList()); Map<String, PTransformNode> stageToFeeder = Maps.newHashMap(); PCollectionNode output = Iterables.getOnlyElement(q.getOutputPCollections(node)); PTransformNode consumer = Iterables.getOnlyElement(q.getPerElementConsumers(output)); String consumerUrn = consumer.getTransform().getSpec().getUrn(); checkState( consumerUrn.equals(ExecutableStage.URN), node.getId(), node.getTransform() .toBuilder() .mergeSpec( Iterables.getOnlyElement(node.getTransform().getInputsMap().keySet()), rawGBKOutput.getId()) .build());
PTransform gbk = components.getTransformsOrThrow(gbkId); checkArgument( PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN.equals(gbk.getSpec().getUrn()), "URN must be %s, got %s", PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN, gbk.getSpec().getUrn()); PTransform.Builder newTransform = gbk.toBuilder(); Components.Builder newComponents = Components.newBuilder(); String inputId = getOnlyElement(gbk.getInputsMap().values()); String gbkoId = uniqueId(String.format("%s/GBKO", gbkId), components::containsTransforms); PTransform gbko = PTransform.newBuilder() .setUniqueName(String.format("%s/GBKO", gbk.getUniqueName())) .putAllInputs(gbk.getInputsMap()) .setSpec(FunctionSpec.newBuilder().setUrn(DirectGroupByKey.DIRECT_GBKO_URN)) .putOutputs("output", kwiCollectionId) PTransform.newBuilder() .setUniqueName(String.format("%s/GABW", gbk.getUniqueName())) .putInputs("input", kwiCollectionId) .setSpec(FunctionSpec.newBuilder().setUrn(DirectGroupByKey.DIRECT_GABW_URN)) .putAllOutputs(gbk.getOutputsMap()) .build(); newTransform.addSubtransforms(gabwId);
.putTransforms( "first", PTransform.newBuilder() .setSpec(FunctionSpec.newBuilder().setUrn("beam:first")) .build()) .putTransforms( "second", PTransform.newBuilder() .setSpec(FunctionSpec.newBuilder().setUrn("beam:repeated")) .build()) .putTransforms( "third", PTransform.newBuilder() .setSpec(FunctionSpec.newBuilder().setUrn("beam:repeated")) .build()) return MessageWithComponents.newBuilder() .setPtransform( PTransform.newBuilder() .setSpec( FunctionSpec.newBuilder() .putTransforms( subtransform, PTransform.newBuilder().setUniqueName(subtransform).build())) .build(); }); assertThat(updatedSecond.getSubtransformsList(), contains("second_sub"));
PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead")) .getTransform() .getOutputsMap() .values()); PCollectionNode mainInput = getOnlyElement( parDoTransform .getInputsMap() .entrySet() .stream() .map(Map.Entry::getKey) .collect(Collectors.toSet())); String sideInputCollectionId = parDoTransform.getInputsOrThrow(sideInputLocalName); PCollectionNode sideInput = PipelineNode.pCollection(
private <T> void translateFlatten( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { Map<String, String> allInputs = pipeline.getComponents().getTransformsOrThrow(id).getInputsMap(); context.addDataStream( Iterables.getOnlyElement( pipeline.getComponents().getTransformsOrThrow(id).getOutputsMap().values()), result); } else { pipeline.getComponents().getTransformsOrThrow(id).getOutputsMap().values()), result);
private <T> void translateFlatten( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform transform = pipeline.getComponents().getTransformsOrThrow(id); Map<String, String> allInputs = transform.getInputsMap(); WindowedValue.getFullCoder( (Coder<T>) VoidCoder.of(), GlobalWindow.Coder.INSTANCE))); context.addDataStream(Iterables.getOnlyElement(transform.getOutputsMap().values()), result); } else { DataStream<T> result = null; context.addDataStream(Iterables.getOnlyElement(transform.getOutputsMap().values()), result);
components .getTransformsOrThrow(sideInputId.getTransformId()) .getInputsOrThrow(sideInputId.getLocalName()); RunnerApi.WindowingStrategy windowingStrategyProto = components.getWindowingStrategiesOrThrow(