public static ImmutableExecutableStage ofFullComponents( Components components, Environment environment, PCollectionNode input, Collection<SideInputReference> sideInputs, Collection<UserStateReference> userStates, Collection<TimerReference> timers, Collection<PTransformNode> transforms, Collection<PCollectionNode> outputs) { Components prunedComponents = components .toBuilder() .clearTransforms() .putAllTransforms( transforms .stream() .collect(Collectors.toMap(PTransformNode::getId, PTransformNode::getTransform))) .build(); return of( prunedComponents, environment, input, sideInputs, userStates, timers, transforms, outputs); }
getComponents().toBuilder().putAllTransforms(executableStageTransforms).build(); List<String> rootTransformIds = StreamSupport.stream(
.collect( Collectors.toMap(PCollectionNode::getId, PCollectionNode::getPCollection))) .build(); return ImmutableExecutableStage.of( updatedStageComponents,
String sdkWireCoderId = WireCoders.addSdkWireCoder(fooPC, builder); String barSdkWireCoderId = WireCoders.addSdkWireCoder(barPC, builder); Components components = builder.build();
Components components = builder.build();
"window.out", PCollection.newBuilder().setUniqueName("window.out").build()) .putEnvironments("common", Environments.createDockerEnvironment("common")) .build());
.build()) .build()) .build();
.putPcollections("output.out", pc("output.out")) .putEnvironments("common", Environments.createDockerEnvironment("common")) .build();
"gbk.out", PCollection.newBuilder().setUniqueName("parDo.out").build()) .putEnvironments("common", env) .build());
Components components = builder.build();
.build()) .putPcollections("enigma.out", pc("enigma.out")) .build(); FusedPipeline fused = GreedyPipelineFuser.fuse(Pipeline.newBuilder().setComponents(components).build());
private Coder<K> getKeyCoder(PTransformNode application) { PCollectionNode inputPCollection = getOnlyElement(graph.getPerElementInputs(application)); try { // We know the type restrictions on the input PCollection, and the restrictions on the // Wire coder Builder builder = GroupByKeyOnlyEvaluatorFactory.this.components.toBuilder(); String wireCoderId = WireCoders.addRunnerWireCoder(inputPCollection, builder); Coder<WindowedValue<KV<K, V>>> wireCoder = (Coder<WindowedValue<KV<K, V>>>) RehydratedComponents.forComponents(builder.build()).getCoder(wireCoderId); checkArgument( wireCoder instanceof WindowedValue.WindowedValueCoder, "Wire %s must be a %s", Coder.class.getSimpleName(), WindowedValueCoder.class.getSimpleName()); WindowedValueCoder<KV<K, V>> windowedValueCoder = (WindowedValueCoder<KV<K, V>>) wireCoder; checkArgument( windowedValueCoder.getValueCoder() instanceof KvCoder, "Input elements to %s must be encoded with a %s", DirectGroupByKey.DirectGroupByKeyOnly.class.getSimpleName(), KvCoder.class.getSimpleName()); KvCoder<K, V> kvCoder = (KvCoder<K, V>) windowedValueCoder.getValueCoder(); return kvCoder.getKeyCoder(); } catch (IOException e) { throw new RuntimeException(e); } }
/** This method doesn't do any pruning for reachability, but this may not require a test. */ @Test public void retainOnlyPrimitivesIgnoresUnreachableNodes() { Pipeline p = Pipeline.create(); p.apply( new org.apache.beam.sdk.transforms.PTransform<PBegin, PCollection<Long>>() { @Override public PCollection<Long> expand(PBegin input) { return input .apply(GenerateSequence.from(2L)) .apply(Window.into(FixedWindows.of(Duration.standardMinutes(5L)))) .apply(MapElements.into(TypeDescriptors.longs()).via(l -> l + 1)); } }); Components augmentedComponents = PipelineTranslation.toProto(p) .getComponents() .toBuilder() .putCoders("extra-coder", RunnerApi.Coder.getDefaultInstance()) .putWindowingStrategies( "extra-windowing-strategy", RunnerApi.WindowingStrategy.getDefaultInstance()) .putEnvironments("extra-env", RunnerApi.Environment.getDefaultInstance()) .putPcollections("extra-pc", RunnerApi.PCollection.getDefaultInstance()) .build(); Collection<String> primitiveComponents = QueryablePipeline.getPrimitiveTransformIds(augmentedComponents); } }
@Test public void forTransformsWithSubgraph() { Components components = Components.newBuilder() .putTransforms( "root", PTransform.newBuilder().putOutputs("output", "output.out").build()) .putPcollections( "output.out", RunnerApi.PCollection.newBuilder().setUniqueName("output.out").build()) .putTransforms( "consumer", PTransform.newBuilder().putInputs("input", "output.out").build()) .putTransforms( "ignored", PTransform.newBuilder().putInputs("input", "output.out").build()) .build(); QueryablePipeline pipeline = QueryablePipeline.forTransforms(ImmutableSet.of("root", "consumer"), components); assertThat( pipeline.getRootTransforms(), contains(PipelineNode.pTransform("root", components.getTransformsOrThrow("root")))); Set<PTransformNode> consumers = pipeline.getPerElementConsumers( PipelineNode.pCollection( "output.out", components.getPcollectionsOrThrow("output.out"))); assertThat( consumers, contains(PipelineNode.pTransform("consumer", components.getTransformsOrThrow("consumer")))); }
@Test public void noEnvironmentThrows() { // (impulse.out) -> runnerTransform -> gbk.out // runnerTransform can't be executed in an environment, so trying to construct it should fail PTransform gbkTransform = PTransform.newBuilder() .putInputs("input", "impulse.out") .setSpec( FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)) .putOutputs("output", "gbk.out") .build(); QueryablePipeline p = QueryablePipeline.forPrimitivesIn( partialComponents .toBuilder() .putTransforms("runnerTransform", gbkTransform) .putPcollections( "gbk.out", PCollection.newBuilder().setUniqueName("gbk.out").build()) .build()); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Environment must be populated"); GreedyStageFuser.forGrpcPortRead( p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("runnerTransform", gbkTransform))); }
@Before public void setup() { partialComponents = Components.newBuilder() .putTransforms( "impulse", PTransform.newBuilder() .setUniqueName("Impulse") .putOutputs("output", "impulse.out") .setSpec( FunctionSpec.newBuilder() .setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)) .build()) .putPcollections("impulse.out", pc("impulse.out")) .putEnvironments("go", Environments.createDockerEnvironment("go")) .putEnvironments("py", Environments.createDockerEnvironment("py")) .putCoders("coder", Coder.newBuilder().build()) .putCoders("windowCoder", Coder.newBuilder().build()) .putWindowingStrategies( "ws", WindowingStrategy.newBuilder().setWindowCoderId("windowCoder").build()) .build(); }
@Test public void fromComponentsWithMalformedComponents() { Components components = Components.newBuilder() .putTransforms( "root", PTransform.newBuilder() .setSpec( FunctionSpec.newBuilder() .setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN) .build()) .putOutputs("output", "output.out") .build()) .build(); thrown.expect(IllegalArgumentException.class); QueryablePipeline.forPrimitivesIn(components).getComponents(); }
@Test public void forTransformsWithMalformedGraph() { Components components = Components.newBuilder() .putTransforms( "root", PTransform.newBuilder().putOutputs("output", "output.out").build()) .putPcollections( "output.out", RunnerApi.PCollection.newBuilder().setUniqueName("output.out").build()) .putTransforms( "consumer", PTransform.newBuilder().putInputs("input", "output.out").build()) .build(); thrown.expect(IllegalArgumentException.class); // Consumer consumes a PCollection which isn't produced. QueryablePipeline.forTransforms(ImmutableSet.of("consumer"), components); }
@Before public void setup() { partialComponents = Components.newBuilder() .putTransforms( "impulse", PTransform.newBuilder() .putOutputs("output", "impulse.out") .setSpec( FunctionSpec.newBuilder() .setUrn(PTransformTranslation.IMPULSE_TRANSFORM_URN)) .build()) .putPcollections("impulse.out", impulseDotOut) .build(); }
/** * Convert this {@link SdkComponents} into a {@link RunnerApi.Components}, including all of the * contained {@link Coder coders}, {@link WindowingStrategy windowing strategies}, {@link * PCollection PCollections}, and {@link PTransform PTransforms}. */ @Experimental public RunnerApi.Components toComponents() { return componentsBuilder.build(); } }