public Collection<TimerReference> getTimers(PTransformNode transform) { return getLocalTimerNames(transform.getTransform()) .stream() .map( localName -> { String transformId = transform.getId(); PTransform transformProto = components.getTransformsOrThrow(transformId); return TimerReference.of( PipelineNode.pTransform(transformId, transformProto), localName); }) .collect(Collectors.toSet()); }
private void urnNotFound( String id, RunnerApi.Pipeline pipeline, FlinkStreamingPortablePipelineTranslator.TranslationContext context) { throw new IllegalArgumentException( String.format( "Unknown type of URN %s for PTransform with id %s.", pipeline.getComponents().getTransformsOrThrow(id).getSpec().getUrn(), id)); }
private void urnNotFound( String id, RunnerApi.Pipeline pipeline, FlinkStreamingPortablePipelineTranslator.TranslationContext context) { throw new IllegalArgumentException( String.format( "Unknown type of URN %s for PTransform with id %s.", pipeline.getComponents().getTransformsOrThrow(id).getSpec().getUrn(), id)); }
public Collection<UserStateReference> getUserStates(PTransformNode transform) { return getLocalUserStateNames(transform.getTransform()) .stream() .map( localName -> { String transformId = transform.getId(); PTransform transformProto = components.getTransformsOrThrow(transformId); // Get the main input PCollection id. String collectionId = transform .getTransform() .getInputsOrThrow( Iterables.getOnlyElement( Sets.difference( transform.getTransform().getInputsMap().keySet(), ImmutableSet.builder() .addAll(getLocalSideInputNames(transformProto)) .addAll(getLocalTimerNames(transformProto)) .build()))); PCollection collection = components.getPcollectionsOrThrow(collectionId); return UserStateReference.of( PipelineNode.pTransform(transformId, transformProto), localName, PipelineNode.pCollection(collectionId, collection)); }) .collect(Collectors.toSet()); }
private void assertRootsInTopologicalOrder(RunnerApi.Pipeline fusedProto) { Set<String> consumedPCollections = new HashSet<>(); Set<String> producedPCollections = new HashSet<>(); for (int i = 0; i < fusedProto.getRootTransformIdsCount(); i++) { PTransform rootTransform = fusedProto.getComponents().getTransformsOrThrow(fusedProto.getRootTransformIds(i)); assertThat( String.format( "All %s consumed by %s must be produced before it", PCollection.class.getSimpleName(), fusedProto.getRootTransformIds(i)), producedPCollections, hasItems(rootTransform.getInputsMap().values().toArray(new String[0]))); for (String consumed : consumedPCollections) { assertThat( String.format( "%s %s was consumed before all of its producers produced it", PCollection.class.getSimpleName(), consumed), rootTransform.getOutputsMap().values(), not(hasItem(consumed))); } consumedPCollections.addAll(rootTransform.getInputsMap().values()); producedPCollections.addAll(rootTransform.getOutputsMap().values()); } } }
private void translateStreamingImpulse( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id); TypeInformation<WindowedValue<byte[]>> typeInfo = new CoderTypeInformation<>( WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE)); ObjectMapper objectMapper = new ObjectMapper(); final int intervalMillis; final int messageCount; try { JsonNode config = objectMapper.readTree(pTransform.getSpec().getPayload().toByteArray()); intervalMillis = config.path("interval_ms").asInt(100); messageCount = config.path("message_count").asInt(0); } catch (IOException e) { throw new RuntimeException("Failed to parse configuration for streaming impulse", e); } SingleOutputStreamOperator<WindowedValue<byte[]>> source = context .getExecutionEnvironment() .addSource( new StreamingImpulseSource(intervalMillis, messageCount), StreamingImpulseSource.class.getSimpleName()) .returns(typeInfo); context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), source); }
private void translateStreamingImpulse( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id); TypeInformation<WindowedValue<byte[]>> typeInfo = new CoderTypeInformation<>( WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE)); ObjectMapper objectMapper = new ObjectMapper(); final int intervalMillis; final int messageCount; try { JsonNode config = objectMapper.readTree(pTransform.getSpec().getPayload().toByteArray()); intervalMillis = config.path("interval_ms").asInt(100); messageCount = config.path("message_count").asInt(0); } catch (IOException e) { throw new RuntimeException("Failed to parse configuration for streaming impulse", e); } SingleOutputStreamOperator<WindowedValue<byte[]>> source = context .getExecutionEnvironment() .addSource(new StreamingImpulseSource(intervalMillis, messageCount)) .returns(typeInfo); context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), source); }
/** * Tests that {@link QueryablePipeline#getPerElementConsumers(PCollectionNode)} returns a * transform that consumes the node more than once. */ @Test public void perElementConsumersWithConsumingMultipleTimes() { Pipeline p = Pipeline.create(); PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L))); PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections()); Components components = PipelineTranslation.toProto(p).getComponents(); // This breaks if the way that IDs are assigned to PTransforms changes in PipelineTranslation String readOutput = getOnlyElement(components.getTransformsOrThrow("BoundedRead").getOutputsMap().values()); QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components); Set<PTransformNode> consumers = qp.getPerElementConsumers( PipelineNode.pCollection(readOutput, components.getPcollectionsOrThrow(readOutput))); assertThat(consumers.size(), equalTo(1)); assertThat( getOnlyElement(consumers).getTransform().getSpec().getUrn(), equalTo(PTransformTranslation.FLATTEN_TRANSFORM_URN)); }
/** * Returns the {@link SideInputReference SideInputReferences} that the provided transform consumes * as side inputs. */ public Collection<SideInputReference> getSideInputs(PTransformNode transform) { return getLocalSideInputNames(transform.getTransform()) .stream() .map( localName -> { String transformId = transform.getId(); PTransform transformProto = components.getTransformsOrThrow(transformId); String collectionId = transform.getTransform().getInputsOrThrow(localName); PCollection collection = components.getPcollectionsOrThrow(collectionId); return SideInputReference.of( PipelineNode.pTransform(transformId, transformProto), localName, PipelineNode.pCollection(collectionId, collection)); }) .collect(Collectors.toSet()); }
/** Create a user state reference from a UserStateId proto and components. */ public static UserStateReference fromUserStateId( UserStateId userStateId, RunnerApi.Components components) { PTransform transform = components.getTransformsOrThrow(userStateId.getTransformId()); String mainInputCollectionId; try { mainInputCollectionId = transform.getInputsOrThrow(ParDoTranslation.getMainInputName(transform)); } catch (IOException e) { throw new RuntimeException(e); } return UserStateReference.of( PipelineNode.pTransform(userStateId.getTransformId(), transform), userStateId.getLocalName(), PipelineNode.pCollection( mainInputCollectionId, components.getPcollectionsOrThrow(mainInputCollectionId))); }
@Test public void registerTransformAfterChildren() throws IOException { Create.Values<Long> create = Create.of(1L, 2L, 3L); GenerateSequence createChild = GenerateSequence.from(0); PCollection<Long> pt = pipeline.apply(create); String userName = "my_transform"; String childUserName = "my_transform/my_nesting"; AppliedPTransform<?, ?, ?> transform = AppliedPTransform.of(userName, pipeline.begin().expand(), pt.expand(), create, pipeline); AppliedPTransform<?, ?, ?> childTransform = AppliedPTransform.of( childUserName, pipeline.begin().expand(), pt.expand(), createChild, pipeline); String childId = components.registerPTransform(childTransform, Collections.emptyList()); String parentId = components.registerPTransform(transform, Collections.singletonList(childTransform)); Components components = this.components.toComponents(); assertThat(components.getTransformsOrThrow(parentId).getSubtransforms(0), equalTo(childId)); assertThat(components.getTransformsOrThrow(childId).getSubtransformsCount(), equalTo(0)); }
public static Optional<Environment> getEnvironment(String ptransformId, Components components) { try { PTransform ptransform = components.getTransformsOrThrow(ptransformId); String envId = KNOWN_URN_SPEC_EXTRACTORS .getOrDefault(ptransform.getSpec().getUrn(), DEFAULT_SPEC_EXTRACTOR) .getEnvironmentId(ptransform); if (Strings.isNullOrEmpty(envId)) { // Some PTransform payloads may have an unspecified (empty) Environment ID, for example a // WindowIntoPayload with a known WindowFn. Others will never have an Environment ID, such // as a GroupByKeyPayload, and the Default extractor returns null in this case. return Optional.empty(); } else { return Optional.of(components.getEnvironmentsOrThrow(envId)); } } catch (IOException e) { throw new RuntimeException(e); } }
@Test public void getEnvironmentWithEnvironment() { Pipeline p = Pipeline.create(); PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L))); PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections()); Components components = PipelineTranslation.toProto(p).getComponents(); QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components); PTransformNode environmentalRead = PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead")); PTransformNode nonEnvironmentalTransform = PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten")); assertThat(qp.getEnvironment(environmentalRead).isPresent(), is(true)); assertThat( qp.getEnvironment(environmentalRead).get(), equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT)); assertThat(qp.getEnvironment(nonEnvironmentalTransform).isPresent(), is(false)); }
private void translateImpulse( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id); TypeInformation<WindowedValue<byte[]>> typeInfo = new CoderTypeInformation<>( WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE)); boolean keepSourceAlive = !context.getPipelineOptions().isShutdownSourcesOnFinalWatermark(); SingleOutputStreamOperator<WindowedValue<byte[]>> source = context .getExecutionEnvironment() .addSource(new ImpulseSourceFunction(keepSourceAlive), "Impulse") .returns(typeInfo); context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), source); }
private void translateImpulse( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform pTransform = pipeline.getComponents().getTransformsOrThrow(id); TypeInformation<WindowedValue<byte[]>> typeInfo = new CoderTypeInformation<>( WindowedValue.getFullCoder(ByteArrayCoder.of(), GlobalWindow.Coder.INSTANCE)); boolean keepSourceAlive = !context.getPipelineOptions().isShutdownSourcesOnFinalWatermark(); SingleOutputStreamOperator<WindowedValue<byte[]>> source = context .getExecutionEnvironment() .addSource(new ImpulseSourceFunction(keepSourceAlive)) .returns(typeInfo); context.addDataStream(Iterables.getOnlyElement(pTransform.getOutputsMap().values()), source); }
private <K, V> void translateReshuffle( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform transform = pipeline.getComponents().getTransformsOrThrow(id); DataStream<WindowedValue<KV<K, V>>> inputDataStream = context.getDataStreamOrThrow(Iterables.getOnlyElement(transform.getInputsMap().values())); context.addDataStream( Iterables.getOnlyElement(transform.getOutputsMap().values()), inputDataStream.rebalance()); }
/** Create a side input reference from a SideInputId proto and components. */ public static SideInputReference fromSideInputId( SideInputId sideInputId, RunnerApi.Components components) { String transformId = sideInputId.getTransformId(); String localName = sideInputId.getLocalName(); String collectionId = components.getTransformsOrThrow(transformId).getInputsOrThrow(localName); PTransform transform = components.getTransformsOrThrow(transformId); PCollection collection = components.getPcollectionsOrThrow(collectionId); return SideInputReference.of( PipelineNode.pTransform(transformId, transform), localName, PipelineNode.pCollection(collectionId, collection)); }
private <K, V> void translateReshuffle( String id, RunnerApi.Pipeline pipeline, StreamingTranslationContext context) { RunnerApi.PTransform transform = pipeline.getComponents().getTransformsOrThrow(id); DataStream<WindowedValue<KV<K, V>>> inputDataStream = context.getDataStreamOrThrow(Iterables.getOnlyElement(transform.getInputsMap().values())); context.addDataStream( Iterables.getOnlyElement(transform.getOutputsMap().values()), inputDataStream.rebalance()); }
/** Create a timer reference from a TimerId proto and components. */ public static TimerReference fromTimerId( RunnerApi.ExecutableStagePayload.TimerId timerId, RunnerApi.Components components) { String transformId = timerId.getTransformId(); String localName = timerId.getLocalName(); RunnerApi.PTransform transform = components.getTransformsOrThrow(transformId); return of(PipelineNode.pTransform(transformId, transform), localName); }
private RunnerApi.Pipeline makeKnownUrnsPrimitives( RunnerApi.Pipeline pipeline, Set<String> knownUrns) { RunnerApi.Pipeline.Builder trimmedPipeline = pipeline.toBuilder(); for (String ptransformId : pipeline.getComponents().getTransformsMap().keySet()) { if (knownUrns.contains( pipeline.getComponents().getTransformsOrThrow(ptransformId).getSpec().getUrn())) { LOG.debug("Removing descendants of known PTransform {}" + ptransformId); removeDescendants(trimmedPipeline, ptransformId); } } return trimmedPipeline.build(); }