private <T> void flattenHelper( Flatten.PCollections<T> transform, TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "Flatten"); List<OutputReference> inputs = new ArrayList<>(); for (PValue input : context.getInputs(transform).values()) { inputs.add(context.asOutputReference(input, context.getProducer(input))); } stepContext.addInput(PropertyNames.INPUTS, inputs); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform)); } });
private <InputT, OutputT> void translateMultiHelper( ParDo.MultiOutput<InputT, OutputT> transform, TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "ParallelDo"); Map<TupleTag<?>, Coder<?>> outputCoders = context .getOutputs(transform) .entrySet() .stream() .collect( Collectors.toMap( Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder())); translateInputs( stepContext, context.getInput(transform), transform.getSideInputs(), context); translateOutputs(context.getOutputs(transform), stepContext); String ptransformId = context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform()); translateFn( stepContext, ptransformId, transform.getFn(), context.getInput(transform).getWindowingStrategy(), transform.getSideInputs(), context.getInput(transform).getCoder(), context, transform.getMainOutputTag(), outputCoders); } });
TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "SplittableProcessKeyed"); Map<TupleTag<?>, Coder<?>> outputCoders = context
"StreamingPubsubIORead is only for streaming pipelines."); PubsubUnboundedSource overriddenTransform = transform.getOverriddenTransform(); StepTranslationContext stepContext = context.addStep(transform, "ParallelRead"); stepContext.addInput(PropertyNames.FORMAT, "pubsub"); if (overriddenTransform.getTopicProvider() != null) {
"StreamingPubsubIOWrite is only for streaming pipelines."); PubsubUnboundedSink overriddenTransform = transform.getOverriddenTransform(); StepTranslationContext stepContext = context.addStep(transform, "ParallelWrite"); stepContext.addInput(PropertyNames.FORMAT, "pubsub"); if (overriddenTransform.getTopicProvider().isAccessible()) {
primitiveTransform.getOriginalCombine(); StepTranslationContext stepContext = context.addStep(primitiveTransform, "CombineValues"); translateInputs( stepContext,
private <InputT, OutputT> void translateSingleHelper( ParDoSingle<InputT, OutputT> transform, TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "ParallelDo"); Map<TupleTag<?>, Coder<?>> outputCoders = context .getOutputs(transform) .entrySet() .stream() .collect( Collectors.toMap( Map.Entry::getKey, e -> ((PCollection) e.getValue()).getCoder())); translateInputs( stepContext, context.getInput(transform), transform.getSideInputs(), context); stepContext.addOutput( transform.getMainOutputTag().getId(), context.getOutput(transform)); String ptransformId = context.getSdkComponents().getPTransformIdOrThrow(context.getCurrentTransform()); translateFn( stepContext, ptransformId, transform.getFn(), context.getInput(transform).getWindowingStrategy(), transform.getSideInputs(), context.getInput(transform).getCoder(), context, transform.getMainOutputTag(), outputCoders); } });
private <K, V> void groupByKeyHelper( GroupByKey<K, V> transform, TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "GroupByKey"); PCollection<KV<K, V>> input = context.getInput(transform); stepContext.addInput(PropertyNames.PARALLEL_INPUT, input); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform)); WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy(); boolean isStreaming = context.getPipelineOptions().as(StreamingOptions.class).isStreaming(); boolean allowCombinerLifting = windowingStrategy.getWindowFn().isNonMerging() && windowingStrategy.getWindowFn().assignsToOneWindow(); if (isStreaming) { allowCombinerLifting &= transform.fewKeys(); // TODO: Allow combiner lifting on the non-default trigger, as appropriate. allowCombinerLifting &= (windowingStrategy.getTrigger() instanceof DefaultTrigger); } stepContext.addInput(PropertyNames.DISALLOW_COMBINER_LIFTING, !allowCombinerLifting); stepContext.addInput( PropertyNames.SERIALIZED_FN, byteArrayToJsonString(serializeWindowingStrategy(windowingStrategy))); stepContext.addInput( PropertyNames.IS_MERGING_WINDOW_FN, !windowingStrategy.getWindowFn().isNonMerging()); } });
@Override public void translate(Impulse transform, TranslationContext context) { if (context.getPipelineOptions().isStreaming()) { StepTranslationContext stepContext = context.addStep(transform, "ParallelRead"); stepContext.addInput(PropertyNames.FORMAT, "pubsub"); stepContext.addInput(PropertyNames.PUBSUB_SUBSCRIPTION, "_starting_signal/"); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform)); } else { StepTranslationContext stepContext = context.addStep(transform, "ParallelRead"); stepContext.addInput(PropertyNames.FORMAT, "impulse"); WindowedValue.FullWindowedValueCoder<byte[]> coder = WindowedValue.getFullCoder( context.getOutput(transform).getCoder(), GlobalWindow.Coder.INSTANCE); byte[] encodedImpulse; try { encodedImpulse = encodeToByteArray(coder, WindowedValue.valueInGlobalWindow(new byte[0])); } catch (Exception e) { throw new RuntimeException(e); } stepContext.addInput(PropertyNames.IMPULSE_ELEMENT, byteArrayToJsonString(encodedImpulse)); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform)); } } }
@Test public void testTransformTranslator() throws IOException { // Test that we can provide a custom translation DataflowPipelineOptions options = buildPipelineOptions(); Pipeline p = Pipeline.create(options); TestTransform transform = new TestTransform(); p.apply(Create.of(Arrays.asList(1, 2, 3)).withCoder(BigEndianIntegerCoder.of())) .apply(transform); DataflowPipelineTranslator translator = DataflowRunner.fromOptions(options).getTranslator(); DataflowPipelineTranslator.registerTransformTranslator( TestTransform.class, (transform1, context) -> { transform1.translated = true; // Note: This is about the minimum needed to fake out a // translation. This obviously isn't a real translation. TransformTranslator.StepTranslationContext stepContext = context.addStep(transform1, "TestTranslate"); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform1)); }); translator.translate(p, DataflowRunner.fromOptions(options), Collections.emptyList()); assertTrue(transform.translated); }
private <ElemT, ViewT> void translateTyped( View.CreatePCollectionView<ElemT, ViewT> transform, TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "CollectionToSingleton"); PCollection<ElemT> input = context.getInput(transform); stepContext.addInput(PropertyNames.PARALLEL_INPUT, input); WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy(); stepContext.addInput( PropertyNames.WINDOWING_STRATEGY, byteArrayToJsonString(serializeWindowingStrategy(windowingStrategy))); stepContext.addInput( PropertyNames.IS_MERGING_WINDOW_FN, !windowingStrategy.getWindowFn().isNonMerging()); stepContext.addCollectionToSingletonOutput( input, PropertyNames.OUTPUT, transform.getView()); } });
public static <T> void translateReadHelper( Source<T> source, PTransform<?, ? extends PCollection<?>> transform, TranslationContext context) { try { StepTranslationContext stepContext = context.addStep(transform, "ParallelRead"); stepContext.addInput(PropertyNames.FORMAT, PropertyNames.CUSTOM_SOURCE_FORMAT); stepContext.addInput( PropertyNames.SOURCE_STEP_INPUT, cloudSourceToDictionary( CustomSources.serializeToCloudSource(source, context.getPipelineOptions()))); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform)); } catch (Exception e) { throw new RuntimeException(e); } }
private <ElemT, ViewT> void translateTyped( CreateDataflowView<ElemT, ViewT> transform, TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "CollectionToSingleton"); PCollection<ElemT> input = context.getInput(transform); stepContext.addInput(PropertyNames.PARALLEL_INPUT, input); stepContext.addCollectionToSingletonOutput( input, PropertyNames.OUTPUT, transform.getView()); } });
private <K1, K2, V> void groupByKeyAndSortValuesHelper( GroupByKeyAndSortValuesOnly<K1, K2, V> transform, TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "GroupByKey"); PCollection<KV<K1, KV<K2, V>>> input = context.getInput(transform); stepContext.addInput(PropertyNames.PARALLEL_INPUT, input); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform)); stepContext.addInput(PropertyNames.SORT_VALUES, true); // TODO: Add support for combiner lifting once the need arises. stepContext.addInput(PropertyNames.DISALLOW_COMBINER_LIFTING, true); } });
private <T> void translateHelper(Window.Assign<T> transform, TranslationContext context) { StepTranslationContext stepContext = context.addStep(transform, "Bucket"); PCollection<T> input = context.getInput(transform); stepContext.addInput(PropertyNames.PARALLEL_INPUT, input); stepContext.addOutput(PropertyNames.OUTPUT, context.getOutput(transform)); WindowingStrategy<?, ?> strategy = context.getOutput(transform).getWindowingStrategy(); byte[] serializedBytes = serializeWindowingStrategy(strategy); String serializedJson = byteArrayToJsonString(serializedBytes); stepContext.addInput(PropertyNames.SERIALIZED_FN, serializedJson); } });