/** * Returns a {@link Map} from the ID of a {@link PCollectionNode PCollection} to a {@link * PCollectionNode} that contains part of that {@link PCollectionNode PCollection}. */ private static Map<String, PCollectionNode> createPartialPCollections( Collection<PCollectionNode> duplicates, Predicate<String> existingPCollectionIds) { Map<String, PCollectionNode> unzippedOutputs = new LinkedHashMap<>(); Predicate<String> existingOrNewIds = existingPCollectionIds.or( id -> unzippedOutputs.values().stream().map(PCollectionNode::getId).anyMatch(id::equals)); for (PCollectionNode duplicateOutput : duplicates) { String id = SyntheticComponents.uniqueId(duplicateOutput.getId(), existingOrNewIds); PCollection partial = duplicateOutput.getPCollection().toBuilder().setUniqueName(id).build(); // Check to make sure there is only one duplicated output with the same id - which ensures we // only introduce one 'partial output' per producer of that output. PCollectionNode alreadyDeduplicated = unzippedOutputs.put(duplicateOutput.getId(), PipelineNode.pCollection(id, partial)); checkArgument(alreadyDeduplicated == null, "a duplicate should only appear once per stage"); } return unzippedOutputs; }
@Before public void setup() { MockitoAnnotations.initMocks(this); bundleFactory = ImmutableListBundleFactory.create(); transformEvaluationState = TransformExecutorServices.parallel(MoreExecutors.newDirectExecutorService()); evaluatorCompleted = new CountDownLatch(1); completionCallback = new RegisteringCompletionCallback(evaluatorCompleted); PipelineNode.pCollection( "created", RunnerApi.PCollection.newBuilder().setUniqueName("created").build()); when(evaluationContext.getMetrics()).thenReturn(metrics); }
@Before public void setup() { MockitoAnnotations.initMocks(this); input = PipelineNode.pCollection( "created", RunnerApi.PCollection.newBuilder().setUniqueName("created").build()); bundleFactory = ImmutableListBundleFactory.create(); factory = new WindowEvaluatorFactory(evaluationContext); }
.build(); PCollection parDoOutput = PCollection.newBuilder().setUniqueName("parDo.out").build(); QueryablePipeline p = QueryablePipeline.forPrimitivesIn( .build()) .putPcollections( "window.out", PCollection.newBuilder().setUniqueName("window.out").build()) .putEnvironments("rare", Environments.createDockerEnvironment("rare")) .putEnvironments("common", env)
.build()) .putPcollections( "impulse.out", PCollection.newBuilder().setUniqueName("impulse.out").build()) .putTransforms("parDo", parDoTransform) .putPcollections( "parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()) .putTransforms("window", windowTransform) .putPcollections( "window.out", PCollection.newBuilder().setUniqueName("window.out").build()) .putEnvironments("common", Environments.createDockerEnvironment("common")) .build(); p, PipelineNode.pCollection( "impulse.out", PCollection.newBuilder().setUniqueName("impulse.out").build()), ImmutableSet.of( PipelineNode.pTransform("parDo", parDoTransform),
input.toBuilder().setUniqueName(kwiCollectionId).setCoderId(kwiCoderId).build(); String gbkoId = uniqueId(String.format("%s/GBKO", gbkId), components::containsTransforms); PTransform gbko =
.putTransforms("parDo", parDoTransform) .putPcollections( "parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()) .putTransforms("stateful", statefulTransform) .putPcollections( "stateful.out", PCollection.newBuilder().setUniqueName("stateful.out").build()) .putEnvironments("common", Environments.createDockerEnvironment("common")) .build()); contains( PipelineNode.pCollection( "parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()))); assertThat(subgraph, hasSubtransforms("parDo"));
.build()) .putPcollections( "read.out", PCollection.newBuilder().setUniqueName("read.out").build()) .putTransforms( "goTransform", .toByteString())) .build()) .putPcollections("go.out", PCollection.newBuilder().setUniqueName("go.out").build()) .putTransforms( "pyTransform", .toByteString())) .build()) .putPcollections("py.out", PCollection.newBuilder().setUniqueName("py.out").build()) .putEnvironments("go", Environments.createDockerEnvironment("go")) .putEnvironments("py", Environments.createDockerEnvironment("py")) p.getPerElementConsumers( PipelineNode.pCollection( "read.out", PCollection.newBuilder().setUniqueName("read.out").build())); p, PipelineNode.pCollection( "read.out", PCollection.newBuilder().setUniqueName("read.out").build()), differentEnvironments);
.putTransforms("parDo", parDoTransform) .putPcollections( "parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()) .putTransforms("window", windowTransform) .putPcollections( "window.out", PCollection.newBuilder().setUniqueName("window.out").build()) .putEnvironments("common", Environments.createDockerEnvironment("common")) .build());
.putTransforms("parDo", parDoTransform) .putPcollections( "parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()) .putTransforms("timer", timerTransform) .putPcollections( "timer.out", PCollection.newBuilder().setUniqueName("timer.out").build()) .putEnvironments("common", Environments.createDockerEnvironment("common")) .build()); contains( PipelineNode.pCollection( "parDo.out", PCollection.newBuilder().setUniqueName("parDo.out").build()))); assertThat(subgraph, hasSubtransforms("parDo"));
.putTransforms("read", readTransform) .putPcollections( "read.out", PCollection.newBuilder().setUniqueName("read.out").build()) .putTransforms( "gbk", .build()) .putPcollections( "gbk.out", PCollection.newBuilder().setUniqueName("parDo.out").build()) .putEnvironments("common", env) .build());
"bounded", RunnerApi.PCollection.newBuilder() .setUniqueName("bounded") .setIsBounded(Enum.BOUNDED) .build())) "unbounded", RunnerApi.PCollection.newBuilder() .setUniqueName("unbounded") .setIsBounded(Enum.UNBOUNDED) .build()))
@Test public void forTransformsWithSubgraph() { Components components = Components.newBuilder() .putTransforms( "root", PTransform.newBuilder().putOutputs("output", "output.out").build()) .putPcollections( "output.out", RunnerApi.PCollection.newBuilder().setUniqueName("output.out").build()) .putTransforms( "consumer", PTransform.newBuilder().putInputs("input", "output.out").build()) .putTransforms( "ignored", PTransform.newBuilder().putInputs("input", "output.out").build()) .build(); QueryablePipeline pipeline = QueryablePipeline.forTransforms(ImmutableSet.of("root", "consumer"), components); assertThat( pipeline.getRootTransforms(), contains(PipelineNode.pTransform("root", components.getTransformsOrThrow("root")))); Set<PTransformNode> consumers = pipeline.getPerElementConsumers( PipelineNode.pCollection( "output.out", components.getPcollectionsOrThrow("output.out"))); assertThat( consumers, contains(PipelineNode.pTransform("consumer", components.getTransformsOrThrow("consumer")))); }
@Test public void noEnvironmentThrows() { // (impulse.out) -> runnerTransform -> gbk.out // runnerTransform can't be executed in an environment, so trying to construct it should fail PTransform gbkTransform = PTransform.newBuilder() .putInputs("input", "impulse.out") .setSpec( FunctionSpec.newBuilder().setUrn(PTransformTranslation.GROUP_BY_KEY_TRANSFORM_URN)) .putOutputs("output", "gbk.out") .build(); QueryablePipeline p = QueryablePipeline.forPrimitivesIn( partialComponents .toBuilder() .putTransforms("runnerTransform", gbkTransform) .putPcollections( "gbk.out", PCollection.newBuilder().setUniqueName("gbk.out").build()) .build()); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Environment must be populated"); GreedyStageFuser.forGrpcPortRead( p, impulseOutputNode, ImmutableSet.of(PipelineNode.pTransform("runnerTransform", gbkTransform))); }
@Test public void forTransformsWithMalformedGraph() { Components components = Components.newBuilder() .putTransforms( "root", PTransform.newBuilder().putOutputs("output", "output.out").build()) .putPcollections( "output.out", RunnerApi.PCollection.newBuilder().setUniqueName("output.out").build()) .putTransforms( "consumer", PTransform.newBuilder().putInputs("input", "output.out").build()) .build(); thrown.expect(IllegalArgumentException.class); // Consumer consumes a PCollection which isn't produced. QueryablePipeline.forTransforms(ImmutableSet.of("consumer"), components); }
private ExecutableGraphBuilder addPCollection(String name) { components.putPcollections(name, PCollection.newBuilder().setUniqueName(name).build()); return this; }
@Before public void setup() { created = PipelineNode.pCollection( "created", RunnerApi.PCollection.newBuilder().setUniqueName("created").build()); created = PipelineNode.pCollection( "downstream", RunnerApi.PCollection.newBuilder().setUniqueName("downstream").build()); }
@Before public void setup() { pc = PipelineNode.pCollection( "pc", RunnerApi.PCollection.newBuilder().setUniqueName("pc").build()); transform = PipelineNode.pTransform("pt", PTransform.newBuilder().putOutputs("out", "pc").build()); bundleFactory = ImmutableListBundleFactory.create(); }
public static RunnerApi.PCollection toProto(PCollection<?> pCollection, SdkComponents components) throws IOException { String coderId = components.registerCoder(pCollection.getCoder()); String windowingStrategyId = components.registerWindowingStrategy(pCollection.getWindowingStrategy()); // TODO: Display Data return RunnerApi.PCollection.newBuilder() .setUniqueName(pCollection.getName()) .setCoderId(coderId) .setIsBounded(toProto(pCollection.isBounded())) .setWindowingStrategyId(windowingStrategyId) .build(); }
private static PCollection pc(String name) { return PCollection.newBuilder() .setUniqueName(name) .setCoderId("coder") .setWindowingStrategyId("ws") .build(); }