@Override public PCollection<T> expand(PCollection<T> input) { WindowingStrategy<?, ?> outputWindowingStrategy = getOutputWindowing(input.getWindowingStrategy()); return input // We first apply a (trivial) transform to the input PCollection to produce a new // PCollection. This ensures that we don't modify the windowing strategy of the input // which may be used elsewhere. .apply( "Identity", MapElements.via( new SimpleFunction<T, T>() { @Override public T apply(T element) { return element; } })) // Then we modify the windowing strategy. .setWindowingStrategyInternal(outputWindowingStrategy); }
@Override public PCollection<KV<K, Iterable<V>>> expand(PCollection<KV<K, V>> input) { WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy(); return input // Group by just the key. // Combiner lifting will not happen regardless of the disallowCombinerLifting value. // There will be no combiners right after the GroupByKeyOnly because of the two ParDos // introduced in here. .apply(new GroupByKeyOnly<>()) // Sort each key's values by timestamp. GroupAlsoByWindow requires // its input to be sorted by timestamp. .apply(new SortValuesByTimestamp<>()) // Group each key's values by window, merging windows as needed. .apply(new GroupAlsoByWindow<>(windowingStrategy)) // And update the windowing strategy as appropriate. .setWindowingStrategyInternal(gbkTransform.updateWindowingStrategy(windowingStrategy)); }
@Override public PCollection<T> expand(PCollection<T> input) { applicableTo(input); WindowingStrategy<?, ?> outputStrategy = getOutputStrategyInternal(input.getWindowingStrategy()); if (getWindowFn() == null) { // A new PCollection must be created in case input is reused in a different location as the // two PCollections will, in general, have a different windowing strategy. return PCollectionList.of(input) .apply(Flatten.pCollections()) .setWindowingStrategyInternal(outputStrategy); } else { // This is the AssignWindows primitive return input.apply(new Assign<>(this, outputStrategy)); } }
@Override public PCollection<KV<K, Iterable<KV<Instant, WindowedValue<KV<K, V>>>>>> expand( PCollection<KV<K, V>> input) { WindowingStrategy<?, ?> inputWindowingStrategy = input.getWindowingStrategy(); // A KvCoder is required since this goes through GBK. Further, WindowedValueCoder // is not registered by default, so we explicitly set the relevant coders. checkState( input.getCoder() instanceof KvCoder, "Input to a %s using state requires a %s, but the coder was %s", ParDo.class.getSimpleName(), KvCoder.class.getSimpleName(), input.getCoder()); KvCoder<K, V> kvCoder = (KvCoder<K, V>) input.getCoder(); Coder<K> keyCoder = kvCoder.getKeyCoder(); Coder<? extends BoundedWindow> windowCoder = inputWindowingStrategy.getWindowFn().windowCoder(); return input // Stash the original timestamps, etc, for when it is fed to the user's DoFn .apply("ReifyWindows", ParDo.of(new ReifyWindowedValueFn<>())) .setCoder( KvCoder.of( keyCoder, KvCoder.of(InstantCoder.of(), WindowedValue.getFullCoder(kvCoder, windowCoder)))) // Group by key and sort by timestamp, dropping windows as they are reified .apply("PartitionKeys", new GroupByKeyAndSortValuesOnly<>()) // The GBKO sets the windowing strategy to the global default .setWindowingStrategyInternal(inputWindowingStrategy); } }
private PCollection<OutputT> insertDefaultValueIfEmpty(PCollection<OutputT> maybeEmpty) { final PCollectionView<Iterable<OutputT>> maybeEmptyView = maybeEmpty.apply(View.asIterable()); final OutputT defaultValue = fn.defaultValue(); PCollection<OutputT> defaultIfEmpty = maybeEmpty .getPipeline() .apply("CreateVoid", Create.of((Void) null).withCoder(VoidCoder.of())) .apply( "ProduceDefault", ParDo.of( new DoFn<Void, OutputT>() { @ProcessElement public void processElement(ProcessContext c) { Iterator<OutputT> combined = c.sideInput(maybeEmptyView).iterator(); if (!combined.hasNext()) { c.output(defaultValue); } } }) .withSideInputs(maybeEmptyView)) .setCoder(maybeEmpty.getCoder()) .setWindowingStrategyInternal(maybeEmpty.getWindowingStrategy()); return PCollectionList.of(maybeEmpty).and(defaultIfEmpty).apply(Flatten.pCollections()); } }
.setWindowingStrategyInternal(originalStrategy) .apply( "ExpandIterable",
@Override public PCollection<Iterable<ValueInSingleWindow<T>>> expand(PCollection<T> input) { WindowFn<?, ?> originalWindowFn = input.getWindowingStrategy().getWindowFn(); return input .apply(Reify.windows()) .apply( WithKeys.<Integer, ValueInSingleWindow<T>>of(0) .withKeyType(new TypeDescriptor<Integer>() {})) .apply( Window.into( new IdentityWindowFn<KV<Integer, ValueInSingleWindow<T>>>( originalWindowFn.windowCoder())) .triggering(Never.ever()) .withAllowedLateness(input.getWindowingStrategy().getAllowedLateness()) .discardingFiredPanes()) // all values have the same key so they all appear as a single output element .apply(GroupByKey.create()) .apply(Values.create()) .setWindowingStrategyInternal(input.getWindowingStrategy()); } }
KvCoder.of(inputCoder.getKeyCoder(), VarIntCoder.of()), inputCoder.getValueCoder())) .setWindowingStrategyInternal(preCombineStrategy) .apply("PreCombineHot", Combine.perKey(hotPreCombine, fnDisplayData)) .apply( .setCoder(KvCoder.of(inputCoder.getKeyCoder(), inputOrAccumCoder)) .apply(Window.remerge()) .setWindowingStrategyInternal(input.getWindowingStrategy()); PCollection<KV<K, InputOrAccum<InputT, AccumT>>> preprocessedCold = split
.setWindowingStrategyInternal(inputWindowingStrategy);
.setWindowingStrategyInternal(originalStrategy) .apply( "ExpandIterable",